Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,12 @@ Given a readme file (or a GitHub/Gitlab repository) SOMEF will extract the follo
- **Build file**: Build file(s) of the project. For example, files used to create a Docker image for the target software, package files, etc.
- **Citation**: Preferred citation(s) as the authors have stated in their readme file. SOMEF recognizes Bibtex, Citation File Format files and other means by which authors cite their papers (e.g., by in-text citation).
For CITATION.cff files, SOMEF now generates two separate entries: one for the software tool and another for the preferred citation (if available). This ensures metadata like DOI or version is correctly assigned to each entity.
We aim to recognize the following properties:
We recognize the following properties:
- Title: Title of the publication
- Author: list of author names in the publication
- URL: URL of the publication
- DOI: Digital object identifier of the publication
- Date published
- Version: Software version (if applicable)
- Journal: Journal name where the paper was published
- Year: Year of publication
- Pages: Page range in the journal
Expand Down
2 changes: 1 addition & 1 deletion docs/citationcff.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ These fields are defined in the [CITATION.cff specification](https://citation-fi
| license - value | license[i].result.value | license |
| license - spdx_id | license[i].result.spdx_id | license |
| license - name | license[i].result.name | license |

| version - value | version[i].result.value |version
---

*(1)*
Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Given a readme file (or a GitHub repository) SOMEF will extract the following ca
- **Build file**: Build file(s) of the project. For example, files used to create a Docker image for the target software, package files, etc.
- **Citation**: Preferred citation(s) as the authors have stated in their readme file. SOMEF recognizes Bibtex, Citation File Format files and other means by which authors cite their papers (e.g., by in-text citation).
For CITATION.cff files, SOMEF now generates two separate entries: one for the software tool and another for the preferred citation (if available). This ensures metadata like DOI or version is correctly assigned to each entity.
We aim to recognize the following properties:
We recognize the following properties:
- Title: Title of the publication
- Author: list of author names in the publication
- URL: URL of the publication
Expand Down
31 changes: 30 additions & 1 deletion src/somef/process_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,12 +576,20 @@ def get_file_content_or_link(repo_type, file_path, owner, repo_name, repo_defaul

if yaml_content:
license_value = yaml_content.get("license")
version_value = yaml_content.get("version")

logging.info(f"Extracted license value from CFF: {license_value}")
if license_value:
if isinstance(license_value, list):
license_value = license_value[0]
parse_license_cff(license_value, metadata_result, url)

logging.info(f"Extracted version value from CFF: {version_value}")
if version_value:
if isinstance(version_value, list):
version_value = version_value[0]
parse_version_cff(version_value, metadata_result, url)

root_result = parse_cff_root(yaml_content, metadata_result,url)
root_result[constants.PROP_VALUE] = file_text
# root_result[constants.PROP_TYPE] = constants.FILE_DUMP
Expand Down Expand Up @@ -722,7 +730,7 @@ def parse_cff_root(yaml_content, metadata_result, url):

result[constants.PROP_TITLE] = yaml_content.get("title")
result["authors"] = parse_authors_citation(yaml_content.get("authors", []))
result[constants.PROP_VERSION] = yaml_content.get("version")
# result[constants.PROP_VERSION] = yaml_content.get("version")
result[constants.PROP_DOI] = yaml_content.get("doi")
result[constants.PROP_URL] = yaml_content.get("url")
result[constants.PROP_TYPE] = constants.SOFTWARE_APPLICATION
Expand Down Expand Up @@ -789,4 +797,25 @@ def parse_license_cff(license_value, metadata_result, url):
logging.error(f"Error parsing license from CFF: {str(e)}")


def parse_version_cff(version_value, metadata_result, url):
"""
Parses the version from a CFF file and adds it to the global version metadata.
"""
try:

version_result = {
constants.PROP_VALUE: str(version_value),
constants.PROP_TYPE: "String"

}

metadata_result.add_result(
constants.CAT_VERSION,
version_result,
1,
constants.TECHNIQUE_FILE_EXPLORATION,
url
)

except Exception as e:
logging.error(f"Error parsing version from CFF: {str(e)}")
14 changes: 12 additions & 2 deletions src/somef/test/test_JSON_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,15 +630,25 @@ def test_new_properties_citation_issue_935(self):
assert software_entry is not None, "Software citation (root) not found"
sw_result = software_entry["result"]
assert sw_result["title"] == 'SOMEF: Software metadata extraction framework'
assert sw_result["version"] == "0.1.0"
# assert sw_result["version"] == "0.1.0"
assert "doi" not in sw_result or sw_result.get("doi") is None # it is in preferred (referencePublication) but not in the root

assert preferred_entry is not None, "Preferred citation (article) not found"
pref_result = preferred_entry["result"]
assert pref_result["title"] == "A Framework for Creating Knowledge Graphs of Scientific Software Metadata"
assert pref_result["doi"] == "10.1162/qss_a_00167"
assert pref_result["journal"] == "Quantitative Science Studies"
assert "version" not in pref_result # it is in the root in citation but not in the preferred (referencePublication)
# assert "version" not in pref_result # it is in the root in citation but not in the preferred (referencePublication)

versions = json_content.get(constants.CAT_VERSION, [])
cff_version_entry = next(
(v for v in versions if "CITATION.cff" in v.get("source", "")),
None
)

# 2. Validamos que la versión existe en su nueva ubicación
assert cff_version_entry is not None, "Version from CFF not found in global version field"
assert cff_version_entry["result"]["value"] == "0.1.0"

os.remove(test_data_path + "test_new_properties_citation_issue_935.json")

Expand Down
Loading