From f836a69e0f12130dee4894b924aeb3a2565ba32a Mon Sep 17 00:00:00 2001 From: Juanje Mendoza Date: Thu, 16 Apr 2026 15:43:53 +0200 Subject: [PATCH 1/2] move version from citation to their own category. Missed version when mapping license. --- README.md | 1 - docs/citationcff.md | 2 +- src/somef/process_files.py | 31 +++++++++++++++++++++++++++++- src/somef/test/test_JSON_export.py | 14 ++++++++++++-- 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e79ec4bb..fca2fb9c 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,6 @@ We aim to recognize the following properties: - URL: URL of the publication - DOI: Digital object identifier of the publication - Date published - - Version: Software version (if applicable) - Journal: Journal name where the paper was published - Year: Year of publication - Pages: Page range in the journal diff --git a/docs/citationcff.md b/docs/citationcff.md index dd7b100c..4637be21 100644 --- a/docs/citationcff.md +++ b/docs/citationcff.md @@ -18,7 +18,7 @@ These fields are defined in the [CITATION.cff specification](https://citation-fi | license - value | license[i].result.value | license | | license - spdx_id | license[i].result.spdx_id | license | | license - name | license[i].result.name | license | - +| version - value | version[i].result.value |version --- *(1)* diff --git a/src/somef/process_files.py b/src/somef/process_files.py index 06a4c566..ba58c008 100644 --- a/src/somef/process_files.py +++ b/src/somef/process_files.py @@ -576,12 +576,20 @@ def get_file_content_or_link(repo_type, file_path, owner, repo_name, repo_defaul if yaml_content: license_value = yaml_content.get("license") + version_value = yaml_content.get("version") + logging.info(f"Extracted license value from CFF: {license_value}") if license_value: if isinstance(license_value, list): license_value = license_value[0] parse_license_cff(license_value, metadata_result, url) + logging.info(f"Extracted version value from CFF: {version_value}") + if version_value: + if isinstance(version_value, list): + version_value = version_value[0] + parse_version_cff(version_value, metadata_result, url) + root_result = parse_cff_root(yaml_content, metadata_result,url) root_result[constants.PROP_VALUE] = file_text # root_result[constants.PROP_TYPE] = constants.FILE_DUMP @@ -722,7 +730,7 @@ def parse_cff_root(yaml_content, metadata_result, url): result[constants.PROP_TITLE] = yaml_content.get("title") result["authors"] = parse_authors_citation(yaml_content.get("authors", [])) - result[constants.PROP_VERSION] = yaml_content.get("version") + # result[constants.PROP_VERSION] = yaml_content.get("version") result[constants.PROP_DOI] = yaml_content.get("doi") result[constants.PROP_URL] = yaml_content.get("url") result[constants.PROP_TYPE] = constants.SOFTWARE_APPLICATION @@ -789,4 +797,25 @@ def parse_license_cff(license_value, metadata_result, url): logging.error(f"Error parsing license from CFF: {str(e)}") +def parse_version_cff(version_value, metadata_result, url): + """ + Parses the version from a CFF file and adds it to the global version metadata. + """ + try: + + version_result = { + constants.PROP_VALUE: str(version_value), + constants.PROP_TYPE: "String" + + } + + metadata_result.add_result( + constants.CAT_VERSION, + version_result, + 1, + constants.TECHNIQUE_FILE_EXPLORATION, + url + ) + except Exception as e: + logging.error(f"Error parsing version from CFF: {str(e)}") diff --git a/src/somef/test/test_JSON_export.py b/src/somef/test/test_JSON_export.py index eee0dafb..ab9a4916 100644 --- a/src/somef/test/test_JSON_export.py +++ b/src/somef/test/test_JSON_export.py @@ -630,7 +630,7 @@ def test_new_properties_citation_issue_935(self): assert software_entry is not None, "Software citation (root) not found" sw_result = software_entry["result"] assert sw_result["title"] == 'SOMEF: Software metadata extraction framework' - assert sw_result["version"] == "0.1.0" + # assert sw_result["version"] == "0.1.0" assert "doi" not in sw_result or sw_result.get("doi") is None # it is in preferred (referencePublication) but not in the root assert preferred_entry is not None, "Preferred citation (article) not found" @@ -638,7 +638,17 @@ def test_new_properties_citation_issue_935(self): assert pref_result["title"] == "A Framework for Creating Knowledge Graphs of Scientific Software Metadata" assert pref_result["doi"] == "10.1162/qss_a_00167" assert pref_result["journal"] == "Quantitative Science Studies" - assert "version" not in pref_result # it is in the root in citation but not in the preferred (referencePublication) + # assert "version" not in pref_result # it is in the root in citation but not in the preferred (referencePublication) + + versions = json_content.get(constants.CAT_VERSION, []) + cff_version_entry = next( + (v for v in versions if "CITATION.cff" in v.get("source", "")), + None + ) + + # 2. Validamos que la versión existe en su nueva ubicación + assert cff_version_entry is not None, "Version from CFF not found in global version field" + assert cff_version_entry["result"]["value"] == "0.1.0" os.remove(test_data_path + "test_new_properties_citation_issue_935.json") From 30bac491c37af3b9ed26c1d0953c1cebe09f3efe Mon Sep 17 00:00:00 2001 From: Juanje Mendoza Date: Fri, 17 Apr 2026 07:30:38 +0200 Subject: [PATCH 2/2] suggested changes in docs --- README.md | 2 +- docs/index.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7c1cf666..97a61437 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Given a readme file (or a GitHub/Gitlab repository) SOMEF will extract the follo - **Build file**: Build file(s) of the project. For example, files used to create a Docker image for the target software, package files, etc. - **Citation**: Preferred citation(s) as the authors have stated in their readme file. SOMEF recognizes Bibtex, Citation File Format files and other means by which authors cite their papers (e.g., by in-text citation). For CITATION.cff files, SOMEF now generates two separate entries: one for the software tool and another for the preferred citation (if available). This ensures metadata like DOI or version is correctly assigned to each entity. -We aim to recognize the following properties: +We recognize the following properties: - Title: Title of the publication - Author: list of author names in the publication - URL: URL of the publication diff --git a/docs/index.md b/docs/index.md index 6fd0cebe..6ea318f5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -34,7 +34,7 @@ Given a readme file (or a GitHub repository) SOMEF will extract the following ca - **Build file**: Build file(s) of the project. For example, files used to create a Docker image for the target software, package files, etc. - **Citation**: Preferred citation(s) as the authors have stated in their readme file. SOMEF recognizes Bibtex, Citation File Format files and other means by which authors cite their papers (e.g., by in-text citation). For CITATION.cff files, SOMEF now generates two separate entries: one for the software tool and another for the preferred citation (if available). This ensures metadata like DOI or version is correctly assigned to each entity. -We aim to recognize the following properties: +We recognize the following properties: - Title: Title of the publication - Author: list of author names in the publication - URL: URL of the publication