Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
0f46c34
Fix the nx_char type for numpy to and .
RubelMozumder Feb 18, 2025
dd8beb4
Still char instead of the int is being validated which is wrong.
RubelMozumder Feb 19, 2025
351f377
Remove auto conversion for datatype.
RubelMozumder Feb 20, 2025
00b78aa
extends tests.
RubelMozumder Feb 20, 2025
3ad40a0
resolve PR comments
RubelMozumder Feb 27, 2025
6cf6f6d
Remove unnecessary returned value.
RubelMozumder Feb 27, 2025
539bdea
fix np integer and float.
RubelMozumder Feb 27, 2025
cde7a6e
minor change.
RubelMozumder Feb 28, 2025
4ab4b53
check enums for documented fields, and don't return False if any issu…
rettigl Feb 28, 2025
ae10916
add enum checking for attributes
rettigl Feb 18, 2025
8f4291e
Adds parsing code for enumeration tree generation during validation
sherjeelshabih Feb 24, 2025
099b965
Fix typos in old test
sherjeelshabih Feb 24, 2025
4da5735
always check data types and enums, and check NXdata attributes separa…
rettigl Feb 25, 2025
d4f4c2f
fix typos
rettigl Feb 26, 2025
1925d5d
move enum checking into is_valid_data_field, and proper bool conversion
rettigl Feb 28, 2025
ecb4914
satisfy mypy
rettigl Feb 28, 2025
3a1703e
mark namefitted group as undocumented
lukaspie Mar 3, 2025
2fae4de
Merge pull request #565 from FAIRmat-NFDI/fix_check_undocumented
rettigl Mar 4, 2025
4a29251
add tests from branch fix_attribute_enum_check
rettigl Mar 4, 2025
31cd131
add review suggestion
rettigl Mar 5, 2025
ca1ef4f
Merge pull request #573 from FAIRmat-NFDI/add_more_tests
sherjeelshabih Mar 5, 2025
3aabe0b
Fixes the types and removes bytes from NX_char as that creates failures
sherjeelshabih Mar 5, 2025
bd478e8
Fixes for arrays in an array
sherjeelshabih Mar 5, 2025
b336ca5
fix mypy error
rettigl Mar 5, 2025
e9b025e
ruff
rettigl Mar 5, 2025
8c89eef
Applies suggested fix
sherjeelshabih Mar 11, 2025
1c5538d
Update src/pynxtools/dataconverter/helpers.py
sherjeelshabih Mar 11, 2025
365f061
Applies fixes from suggestions
sherjeelshabih Mar 11, 2025
bd250cc
Updates
sherjeelshabih Mar 11, 2025
84426b7
Ruff
sherjeelshabih Mar 11, 2025
5a63c3b
Update src/pynxtools/dataconverter/helpers.py
sherjeelshabih Mar 11, 2025
0d056b7
ruff
sherjeelshabih Mar 11, 2025
70a457c
remove empty string
rettigl Mar 11, 2025
b6b112b
Fixes
sherjeelshabih Mar 11, 2025
467194e
rename original into one of the aux copies
rettigl Mar 13, 2025
65a1774
clean copies
rettigl Mar 13, 2025
a93300f
revert name of original file
rettigl Mar 13, 2025
9023d8d
rename original targets
rettigl Mar 13, 2025
36b6517
rename aux targets
rettigl Mar 13, 2025
ce574a8
combine with renamed
rettigl Mar 13, 2025
d878bf0
restore original target filenames
rettigl Mar 13, 2025
5bba3d3
fix converted tests
rettigl Mar 11, 2025
6520ffa
add additional tests for base class elements
rettigl Mar 11, 2025
67cb8c1
fix validation issues and add further tests
rettigl Mar 12, 2025
25fe60e
add case and tests for undocumented units
rettigl Mar 12, 2025
e719ea8
Merge remote-tracking branch 'origin/fix_validation_and_add_more_test…
rettigl Mar 13, 2025
5a8b46a
Merge remote-tracking branch 'origin/master' into fix_validation_and_…
rettigl Mar 13, 2025
3a4ecd8
reset definitions
rettigl Mar 13, 2025
74bb0da
revert tests and reflog
rettigl Mar 13, 2025
88451d7
update definitions to ref
rettigl Mar 13, 2025
434a626
Merge remote-tracking branch 'origin/fix_validation_and_add_more_test…
rettigl Mar 13, 2025
3cc7a01
update and fix merge errors, add test
rettigl Mar 13, 2025
7de7d5a
revert nomad example test - open enums not working yet
rettigl Mar 13, 2025
3f256a4
don't check for units if no good namefit or a group
rettigl Mar 13, 2025
bdf8997
allow multiple error messages and test for additional error messages
rettigl Mar 14, 2025
eb5f8f6
add test for wrong namefitting
rettigl Mar 14, 2025
802c394
Merge remote-tracking branch 'origin/fix_validation_and_add_more_test…
rettigl Mar 14, 2025
25656bd
trigger workflow
rettigl Mar 14, 2025
8829f06
fix reader tests
rettigl Mar 14, 2025
501537d
catch failing namefitting
rettigl Mar 14, 2025
55397df
tread undocumented units as warning
rettigl Mar 14, 2025
6828e03
require concept_name fornamefitting
lukaspie Mar 14, 2025
a493e1a
add test for namefitting with typo in concept_name
lukaspie Mar 14, 2025
3371825
remove test comment
lukaspie Mar 14, 2025
d317a7e
add proper handling of group attributes
rettigl Mar 14, 2025
a018c9b
add exception for @URL, and run tests on update branch
rettigl Mar 14, 2025
25e0013
prefer direct name match over concept match reliably
rettigl Mar 14, 2025
f24e9e0
streamline best_namefit_of function
lukaspie Mar 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/plugin_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ jobs:
branch: main
tests_to_run: tests/.
- plugin: pynxtools-igor
branch: main
branch: update-definitions
tests_to_run: tests/.
- plugin: pynxtools-mpes
branch: main
branch: update-definitions
tests_to_run: tests/.
- plugin: pynxtools-raman
branch: main
Expand Down
22 changes: 22 additions & 0 deletions src/pynxtools/data/NXtest.nxdl.xml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,13 @@
</field>
</group>
<group type="NXdata" name="NXODD_name">
<field name="anamethatRENAMES" nameType="any" type="NX_INT" units="NX_UNITLESS"/>
<field name="float_value" type="NX_FLOAT" optional="true" units="NX_ENERGY">
<doc>A dummy entry for a float value.</doc>
</field>
<field name="number_value" type="NX_NUMBER" optional="true" units="NX_ENERGY">
<doc>A dummy entry for a number value.</doc>
</field>
<field name="bool_value" type="NX_BOOLEAN" required="true" units="NX_UNITLESS">
<doc>A dummy entry for a bool value.</doc>
</field>
Expand All @@ -53,7 +57,20 @@
<item value="3rd type" />
<item value="4th type" />
</enumeration>
<attribute name="array" type="NX_INT">
<enumeration>
<item value="[0, 1, 2]" />
<item value="[2, 3, 4]" />
</enumeration>
</attribute>
</field>
<attribute name="group_attribute">
</attribute>
<attribute name="signal">
<enumeration>
<item value="data"/>
</enumeration>
</attribute>
</group>
<group type="NXnote" name="required_group">
<doc>This is a required yet empty group.</doc>
Expand All @@ -72,5 +89,10 @@
<doc>This is a required group in an optional group.</doc>
</group>
</group>
<group type="NXuser" optional="true">
<field name="name">
<doc>A required NXuser entry.</doc>
</field>
</group>
</group>
</definition>
184 changes: 76 additions & 108 deletions src/pynxtools/dataconverter/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from datetime import datetime, timezone
from enum import Enum
from functools import lru_cache
from typing import Any, Callable, List, Optional, Tuple, Union
from typing import Any, Callable, List, Optional, Tuple, Union, Sequence

import h5py
import lxml.etree as ET
Expand Down Expand Up @@ -80,12 +80,12 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
value = "<unknown>"

if log_type == ValidationProblem.UnitWithoutDocumentation:
logger.warning(
f"The unit, {path} = {value}, is being written but has no documentation"
logger.info(
f"The unit, {path} = {value}, is being written but has no documentation."
)
elif log_type == ValidationProblem.InvalidEnum:
logger.warning(
f"The value at {path} should be on of the following strings: {value}"
f"The value at {path} should be one of the following: {value}"
)
elif log_type == ValidationProblem.MissingRequiredGroup:
logger.warning(f"The required group, {path}, hasn't been supplied.")
Expand All @@ -96,7 +96,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
)
elif log_type == ValidationProblem.InvalidType:
logger.warning(
f"The value at {path} should be one of: {value}"
f"The value at {path} should be one of the following Python types: {value}"
f", as defined in the NXDL as {args[0] if args else '<unknown>'}."
)
elif log_type == ValidationProblem.InvalidDatetime:
Expand All @@ -114,15 +114,18 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
f"Expected a group at {path} but found a field or attribute."
)
elif log_type == ValidationProblem.MissingDocumentation:
logger.warning(f"Field {path} written without documentation.")
if "@" in path.rsplit("/")[-1]:
logger.warning(f"Attribute {path} written without documentation.")
else:
logger.warning(f"Field {path} written without documentation.")
elif log_type == ValidationProblem.MissingUnit:
logger.warning(
f"Field {path} requires a unit in the unit category {value}."
)
elif log_type == ValidationProblem.MissingRequiredAttribute:
logger.warning(f'Missing attribute: "{path}"')
elif log_type == ValidationProblem.UnitWithoutField:
logger.warning(f"Unit {path} in dataset without its field {value}")
logger.warning(f"Unit {path} in dataset without its field {value}.")
elif log_type == ValidationProblem.AttributeForNonExistingField:
logger.warning(
f"There were attributes set for the field {path}, "
Expand Down Expand Up @@ -158,9 +161,11 @@ def collect_and_log(
"NX_ANY",
):
return
if self.logging:
if self.logging and path + str(log_type) + str(value) not in self.data:
self._log(path, log_type, value, *args, **kwargs)
self.data.add(path)
# info messages should not fail validation
if log_type not in (ValidationProblem.UnitWithoutDocumentation,):
self.data.add(path + str(log_type) + str(value))

def has_validation_problems(self):
"""Returns True if there were any validation problems."""
Expand Down Expand Up @@ -215,7 +220,6 @@ def get_nxdl_name_for(xml_elem: ET._Element) -> Optional[str]:
The name of the element.
None if the xml element has no name or type attribute.
"""
""""""
if "name" in xml_elem.attrib:
return xml_elem.attrib["name"]
if "type" in xml_elem.attrib:
Expand Down Expand Up @@ -575,146 +579,103 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]:
return True, []


NUMPY_FLOAT_TYPES = (np.half, np.float16, np.single, np.double, np.longdouble)
NUMPY_INT_TYPES = (np.short, np.intc, np.int_)
NUMPY_UINT_TYPES = (np.ushort, np.uintc, np.uint)
# np int for np version 1.26.0
np_int = (
np.intc,
np.int_,
np.intp,
np.int8,
np.int16,
np.int32,
np.int64,
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.unsignedinteger,
np.signedinteger,
)
np_float = (np.float16, np.float32, np.float64, np.floating)
np_bytes = (np.bytes_, np.byte, np.ubyte)
np_char = (np.str_, np.char.chararray, *np_bytes)
np_bool = (np.bool_,)
np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle)
nx_char = (str, np.character)
nx_int = (int, np.integer)
nx_float = (float, np.floating)
nx_number = nx_int + nx_float

NEXUS_TO_PYTHON_DATA_TYPES = {
"ISO8601": (str,),
"NX_BINARY": (
bytes,
bytearray,
np.ndarray,
*np_bytes,
),
"NX_BOOLEAN": (bool, np.ndarray, *np_bool),
"NX_CHAR": (str, np.ndarray, *np_char),
"NX_BINARY": (bytes, bytearray, np.bytes_),
"NX_BOOLEAN": (bool, np.bool_),
"NX_CHAR": nx_char,
"NX_DATE_TIME": (str,),
"NX_FLOAT": (float, np.ndarray, *np_float),
"NX_INT": (int, np.ndarray, *np_int),
"NX_UINT": (np.ndarray, np.unsignedinteger),
"NX_NUMBER": (
int,
float,
np.ndarray,
*np_int,
*np_float,
dict,
),
"NX_POSINT": (
int,
np.ndarray,
np.signedinteger,
), # > 0 is checked in is_valid_data_field()
"NX_COMPLEX": (complex, np.ndarray, *np_complex),
"NXDL_TYPE_UNAVAILABLE": (str,), # Defaults to a string if a type is not provided.
"NX_CHAR_OR_NUMBER": (
str,
int,
float,
np.ndarray,
*np_char,
*np_int,
*np_float,
dict,
"NX_FLOAT": nx_float,
"NX_INT": nx_int,
"NX_UINT": (np.unsignedinteger,),
"NX_NUMBER": nx_number,
"NX_POSINT": nx_int, # > 0 is checked in is_valid_data_field()
"NX_COMPLEX": (
complex,
np.complexfloating,
),
"NX_CHAR_OR_NUMBER": nx_char + nx_number,
"NXDL_TYPE_UNAVAILABLE": (
nx_char,
), # Defaults to a string if a type is not provided.
}


def check_all_children_for_callable(objects: list, check: Callable, *args) -> bool:
"""Checks whether all objects in list are validated by given callable."""
for obj in objects:
if not check(obj, *args):
return False
def check_all_children_for_callable(
objects: Union[list, np.ndarray], check_function: Optional[Callable] = None, *args
) -> bool:
"""Checks whether all objects in list or numpy array are validated
by given callable and types.
"""
if not isinstance(objects, np.ndarray):
objects = np.array(objects)

return True
return all([check_function(o, *args) for o in objects.flat])


def is_valid_data_type(value, accepted_types):
"""Checks whether the given value or its children are of an accepted type."""
if not isinstance(value, list):
return isinstance(value, accepted_types)

return check_all_children_for_callable(value, isinstance, accepted_types)


def is_positive_int(value):
"""Checks whether the given value or its children are positive."""

def is_greater_than(num):
return num.flat[0] > 0 if isinstance(num, np.ndarray) else num > 0
return num > 0

if isinstance(value, list):
return check_all_children_for_callable(value, is_greater_than)

return value.flat[0] > 0 if isinstance(value, np.ndarray) else value > 0
return check_all_children_for_callable(
objects=value, check_function=is_greater_than
)


def convert_str_to_bool_safe(value):
def convert_str_to_bool_safe(value: str) -> Optional[bool]:
"""Only returns True or False if someone mistakenly adds quotation marks but mean a bool.

For everything else it returns None.
For everything else it raises a ValueError.
"""
if value.lower() == "true":
return True
if value.lower() == "false":
return False
return None
raise ValueError(f"Could not interpret string '{value}' as boolean.")


def is_valid_data_field(value, nxdl_type, path):
# todo: Check this funciton and wtire test for it. It seems the funciton is not
def is_valid_data_field(value: Any, nxdl_type: str, nxdl_enum: list, path: str) -> Any:
# todo: Check this function and write test for it. It seems the function is not
# working as expected.
"""Checks whether a given value is valid according to what is defined in the NXDL.

This function will also try to convert typical types, for example int to float,
and return the successful conversion.
"""Checks whether a given value is valid according to the type defined in the NXDL.

If it fails to convert, it raises an Exception.
This function only tries to convert boolean value in str format (e.g. "true" ) to
python Boolean (True). In case, it fails to convert, it raises an Exception.

Returns two values: first, boolean (True if the the value corresponds to nxdl_type,
False otherwise) and second, result of attempted conversion or the original value
(if conversion is not needed or impossible)
Return:
value: the possibly converted data value
"""
accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type]
output_value = value

accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type]
# Do not count the dict as it represents a link value
if not isinstance(value, dict) and not is_valid_data_type(value, accepted_types):
try:
if accepted_types[0] is bool and isinstance(value, str):
# try to convert string to bool
if accepted_types[0] is bool and isinstance(value, str):
try:
value = convert_str_to_bool_safe(value)
if value is None:
raise ValueError
output_value = accepted_types[0](value)
except ValueError:
except (ValueError, TypeError):
collector.collect_and_log(
path, ValidationProblem.InvalidType, accepted_types, nxdl_type
)
else:
collector.collect_and_log(
path, ValidationProblem.InvalidType, accepted_types, nxdl_type
)
return False, value

if nxdl_type == "NX_POSINT" and not is_positive_int(value):
collector.collect_and_log(path, ValidationProblem.IsNotPosInt, value)
return False, value

if nxdl_type in ("ISO8601", "NX_DATE_TIME"):
iso8601 = re.compile(
Expand All @@ -724,9 +685,16 @@ def is_valid_data_field(value, nxdl_type, path):
results = iso8601.search(value)
if results is None:
collector.collect_and_log(path, ValidationProblem.InvalidDatetime, value)
return False, value

return True, output_value
# Check enumeration
if nxdl_enum is not None and value not in nxdl_enum:
collector.collect_and_log(
path,
ValidationProblem.InvalidEnum,
nxdl_enum,
)

return value


@lru_cache(maxsize=None)
Expand Down
17 changes: 13 additions & 4 deletions src/pynxtools/dataconverter/nexus_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ class NexusEntity(NexusNode):
type: Literal["field", "attribute"]
unit: Optional[NexusUnitCategory] = None
dtype: NexusType = "NX_CHAR"
items: Optional[List[str]] = None
items: Optional[List[Any]] = None
shape: Optional[Tuple[Optional[int], ...]] = None

def _set_type(self):
Expand Down Expand Up @@ -790,14 +790,23 @@ def _set_items(self):
based on the values in the inheritance chain.
The first vale found is used.
"""
if not self.dtype == "NX_CHAR":
return
for elem in self.inheritance:
enum = elem.find(f"nx:enumeration", namespaces=namespaces)
if enum is not None:
self.items = []
for items in enum.findall(f"nx:item", namespaces=namespaces):
self.items.append(items.attrib["value"])
value = items.attrib["value"]
if value[0] == "[" and value[-1] == "]":
import ast

try:
self.items.append(ast.literal_eval(value))
except (ValueError, SyntaxError):
raise Exception(
f"Error parsing enumeration item in the provided NXDL: {value}"
)
else:
self.items.append(value)
return

def _set_shape(self):
Expand Down
Loading