Skip to content
17 changes: 12 additions & 5 deletions dandi/metadata/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def extract_cellLine(metadata: dict) -> str | None:

NCBITAXON_URI_TEMPLATE = "http://purl.obolibrary.org/obo/NCBITaxon_{}"

# common_names, prefix, uri, name
# common_names, prefix, uri, name ({current name} - {GenBank common name})
species_map = [
Comment thread
yarikoptic marked this conversation as resolved.
(
["mouse"],
Expand Down Expand Up @@ -386,25 +386,31 @@ def extract_cellLine(metadata: dict) -> str | None:
["c. elegans", "caenorhabditis elegans"],
"caenorhabditis",
NCBITAXON_URI_TEMPLATE.format("6239"),
"Caenorhabditis elegans",
"Caenorhabditis elegans - Roundworm",
),
(
["pig-tailed macaque", "pigtail monkey", "pigtail macaque"],
None,
NCBITAXON_URI_TEMPLATE.format("9545"),
"Macaca nemestrina",
"Macaca nemestrina - Pig-tailed macaque",
),
(
["bonnet macaque", "bonnet monkey", "radiata"],
None,
NCBITAXON_URI_TEMPLATE.format("9548"),
"Macaca radiata - Bonnet macaque",
),
(
["mongolian gerbil", "mongolian jird"],
None,
NCBITAXON_URI_TEMPLATE.format("10047"),
"Meriones unguiculatus",
"Meriones unguiculatus - Mongolian gerbil",
),
(
["common paper wasp"],
None,
NCBITAXON_URI_TEMPLATE.format("30207"),
"Polistes fuscatus",
"Polistes fuscatus - Common paper wasp",
),
]

Expand Down Expand Up @@ -486,6 +492,7 @@ def extract_species(metadata: dict) -> models.SpeciesType | None:
for common_names, prefix, uri, name in species_map:
if (
lower_value == name.lower()
or lower_value == name.partition(" - ")[0].lower()
Comment thread
yarikoptic marked this conversation as resolved.
Outdated
or any(key in lower_value for key in common_names)
or (prefix is not None and lower_value.startswith(prefix))
):
Expand Down
5 changes: 3 additions & 2 deletions dandi/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ def test_species_all_possible(species: str) -> None:
assert species_rec.model_dump(mode="json", exclude_none=True) == {
"identifier": "http://purl.obolibrary.org/obo/NCBITaxon_10047",
"schemaKey": "SpeciesType",
"name": "Meriones unguiculatus",
"name": "Meriones unguiculatus - Mongolian gerbil",
}


Expand All @@ -782,9 +782,10 @@ def test_extract_unknown_species():

def test_species_map():
# all alternative names should be lower case
for common_names, *_ in species_map:
for common_names, _, _, name in species_map:
for key in common_names:
assert key.lower() == key
assert " - " in name


@pytest.mark.parametrize(
Expand Down