Skip to content
10 changes: 10 additions & 0 deletions src/pynxtools/data/NXtest.nxdl.xml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,13 @@
</field>
</group>
<group type="NXdata" name="NXODD_name">
<field name="anamethatRENAMES" nametype="any" type="NX_INT" units="NX_UNITLESS"/>
<field name="float_value" type="NX_FLOAT" optional="true" units="NX_ENERGY">
<doc>A dummy entry for a float value.</doc>
</field>
<field name="number_value" type="NX_NUMBER" optional="true" units="NX_ENERGY">
<doc>A dummy entry for a number value.</doc>
</field>
<field name="bool_value" type="NX_BOOLEAN" required="true" units="NX_UNITLESS">
<doc>A dummy entry for a bool value.</doc>
</field>
Expand All @@ -53,6 +57,12 @@
<item value="3rd type" />
<item value="4th type" />
</enumeration>
<attribute name="array" type="NX_INT">
<enumeration>
<item value="[0, 1, 2]" />
<item value="[2, 3, 4]" />
</enumeration>
</attribute>
</field>
</group>
<group type="NXnote" name="required_group">
Expand Down
106 changes: 33 additions & 73 deletions src/pynxtools/dataconverter/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
)
elif log_type == ValidationProblem.InvalidEnum:
logger.warning(
f"The value at {path} should be on of the following strings: {value}"
f"The value at {path} should be one of the following: {value}"
)
elif log_type == ValidationProblem.MissingRequiredGroup:
logger.warning(f"The required group, {path}, hasn't been supplied.")
Expand All @@ -96,7 +96,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
)
elif log_type == ValidationProblem.InvalidType:
logger.warning(
f"The value at {path} should be one of: {value}"
f"The value at {path} should be one of the following Python types: {value}"
f", as defined in the NXDL as {args[0] if args else '<unknown>'}."
)
elif log_type == ValidationProblem.InvalidDatetime:
Expand Down Expand Up @@ -158,9 +158,9 @@ def collect_and_log(
"NX_ANY",
):
return
if self.logging:
if self.logging and path + str(log_type) + str(value) not in self.data:
self._log(path, log_type, value, *args, **kwargs)
self.data.add(path)
self.data.add(path + str(log_type) + str(value))

def has_validation_problems(self):
"""Returns True if there were any validation problems."""
Expand Down Expand Up @@ -578,66 +578,22 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]:
NUMPY_FLOAT_TYPES = (np.half, np.float16, np.single, np.double, np.longdouble)
NUMPY_INT_TYPES = (np.short, np.intc, np.int_)
NUMPY_UINT_TYPES = (np.ushort, np.uintc, np.uint)
# np int for np version 1.26.0
np_int = (
np.intc,
np.int_,
np.intp,
np.int8,
np.int16,
np.int32,
np.int64,
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.unsignedinteger,
np.signedinteger,
)
np_float = (np.float16, np.float32, np.float64, np.floating)
np_bytes = (np.bytes_, np.byte, np.ubyte)
np_char = (np.str_, np.char.chararray, *np_bytes)
np_bool = (np.bool_,)
np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle)

NEXUS_TO_PYTHON_DATA_TYPES = {
"ISO8601": (str,),
"NX_BINARY": (
bytes,
bytearray,
np.ndarray,
*np_bytes,
),
"NX_BOOLEAN": (bool, np.ndarray, *np_bool),
"NX_CHAR": (str, np.ndarray, *np_char),
"NX_DATE_TIME": (str,),
"NX_FLOAT": (float, np.ndarray, *np_float),
"NX_INT": (int, np.ndarray, *np_int),
"NX_UINT": (np.ndarray, np.unsignedinteger),
"NX_NUMBER": (
int,
float,
np.ndarray,
*np_int,
*np_float,
dict,
),
"ISO8601": (str),
"NX_BINARY": (bytes, bytearray, np.byte, np.ubyte),
"NX_BOOLEAN": (bool, np.bool_),
"NX_CHAR": (str, np.chararray),
"NX_DATE_TIME": (str),
"NX_FLOAT": (float, np.floating),
"NX_INT": (int, np.integer),
"NX_UINT": (np.unsignedinteger),
"NX_NUMBER": (int, float, np.integer, np.floating),
"NX_POSINT": (
int,
np.ndarray,
np.signedinteger,
np.integer,
), # > 0 is checked in is_valid_data_field()
"NX_COMPLEX": (complex, np.ndarray, *np_complex),
"NXDL_TYPE_UNAVAILABLE": (str,), # Defaults to a string if a type is not provided.
"NX_CHAR_OR_NUMBER": (
str,
int,
float,
np.ndarray,
*np_char,
*np_int,
*np_float,
dict,
),
"NXDL_TYPE_UNAVAILABLE": (str), # Defaults to a string if a type is not provided.
}


Expand All @@ -650,9 +606,14 @@ def check_all_children_for_callable(objects: list, check: Callable, *args) -> bo
return True


def is_list_like(object) -> bool:
"""Checks whether the given object is a list-like object (ndarray, list)."""
return isinstance(object, (list, np.ndarray))


def is_valid_data_type(value, accepted_types):
"""Checks whether the given value or its children are of an accepted type."""
if not isinstance(value, list):
if not is_list_like(value):
return isinstance(value, accepted_types)

return check_all_children_for_callable(value, isinstance, accepted_types)
Expand All @@ -664,7 +625,7 @@ def is_positive_int(value):
def is_greater_than(num):
return num.flat[0] > 0 if isinstance(num, np.ndarray) else num > 0

if isinstance(value, list):
if is_list_like(value):
return check_all_children_for_callable(value, is_greater_than)

return value.flat[0] > 0 if isinstance(value, np.ndarray) else value > 0
Expand Down Expand Up @@ -700,17 +661,16 @@ def is_valid_data_field(value, nxdl_type, path):
output_value = value

if not isinstance(value, dict) and not is_valid_data_type(value, accepted_types):
try:
if accepted_types[0] is bool and isinstance(value, str):
value = convert_str_to_bool_safe(value)
if value is None:
raise ValueError
output_value = accepted_types[0](value)
except ValueError:
collector.collect_and_log(
path, ValidationProblem.InvalidType, accepted_types, nxdl_type
)
return False, value
Comment on lines -703 to -713

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would keep the boolean string to bool conversion, this certainly makes sense and We could even think about doing this for all datatypes with safe literal conversion, as you did for the enums.

if accepted_types[0] is bool and isinstance(value, str):
converted_value = convert_str_to_bool_safe(value)
if converted_value is not None:
output_value = converted_value
return True, converted_value

collector.collect_and_log(
path, ValidationProblem.InvalidType, accepted_types, nxdl_type
)
return False, value

if nxdl_type == "NX_POSINT" and not is_positive_int(value):
collector.collect_and_log(path, ValidationProblem.IsNotPosInt, value)
Expand Down
17 changes: 13 additions & 4 deletions src/pynxtools/dataconverter/nexus_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ class NexusEntity(NexusNode):
type: Literal["field", "attribute"]
unit: Optional[NexusUnitCategory] = None
dtype: NexusType = "NX_CHAR"
items: Optional[List[str]] = None
items: Optional[List[Any]] = None
shape: Optional[Tuple[Optional[int], ...]] = None

def _set_type(self):
Expand Down Expand Up @@ -790,14 +790,23 @@ def _set_items(self):
based on the values in the inheritance chain.
The first vale found is used.
"""
if not self.dtype == "NX_CHAR":
return
for elem in self.inheritance:
enum = elem.find(f"nx:enumeration", namespaces=namespaces)
if enum is not None:
self.items = []
for items in enum.findall(f"nx:item", namespaces=namespaces):
self.items.append(items.attrib["value"])
value = items.attrib["value"]
if value[0] == "[" and value[-1] == "]":
import ast

try:
self.items.append(ast.literal_eval(value))
except (ValueError, SyntaxError):
raise Exception(
f"Error parsing enumeration item in the provided NXDL: {value}"
)
else:
self.items.append(value)
return

def _set_shape(self):
Expand Down
6 changes: 5 additions & 1 deletion src/pynxtools/dataconverter/readers/example/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ def read(
# outputs with --generate-template for a provided NXDL file
if (
k.startswith("/ENTRY[entry]/required_group")
or k == "/ENTRY[entry]/optional_parent/req_group_in_opt_group"
or k
in (
"/ENTRY[entry]/optional_parent/req_group_in_opt_group",
"/ENTRY[entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatrenames]",
)
or k.startswith("/ENTRY[entry]/OPTIONAL_group")
):
continue
Expand Down
55 changes: 40 additions & 15 deletions src/pynxtools/dataconverter/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def validate_dict_against(
appdef: str, mapping: Mapping[str, Any], ignore_undocumented: bool = False
) -> Tuple[bool, List]:
"""
Validates a mapping against the NeXus tree for applicationd definition `appdef`.
Validates a mapping against the NeXus tree for application definition `appdef`.

Args:
appdef (str): The appdef name to validate against.
Expand Down Expand Up @@ -248,6 +248,14 @@ def check_nxdata():
prev_path=prev_path,
)

# check NXdata attributes
for attr in ("signal", "auxiliary_signals", "axes"):
handle_attribute(
node.search_add_child_for(attr),
keys,
prev_path=prev_path,
)

for i, axis in enumerate(axes):
if axis == ".":
continue
Expand Down Expand Up @@ -392,17 +400,17 @@ def _follow_link(
def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
full_path = remove_from_not_visited(f"{prev_path}/{node.name}")
variants = get_variations_of(node, keys)
if not variants:
if node.optionality == "required" and node.type in missing_type_err:
collector.collect_and_log(
full_path, missing_type_err.get(node.type), None
)

if (
not variants
and node.optionality == "required"
and node.type in missing_type_err
):
collector.collect_and_log(full_path, missing_type_err.get(node.type), None)
return

for variant in variants:
if node.optionality == "required" and isinstance(keys[variant], Mapping):
# Check if all fields in the dict are actual attributes (startwith @)
# Check if all fields in the dict are actual attributes (startswith @)
all_attrs = True
for entry in keys[variant]:
if not entry.startswith("@"):
Expand Down Expand Up @@ -454,17 +462,20 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
prev_path=f"{prev_path}/{variant}",
)

remove_from_not_visited(f"{prev_path}/{variant}")

# TODO: Build variadic map for fields and attributes
# Introduce variadic siblings in NexusNode?

def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
full_path = remove_from_not_visited(f"{prev_path}/@{node.name}")
variants = get_variations_of(node, keys)
if not variants:
if node.optionality == "required" and node.type in missing_type_err:
collector.collect_and_log(
full_path, missing_type_err.get(node.type), None
)
if (
not variants
and node.optionality == "required"
and node.type in missing_type_err
):
collector.collect_and_log(full_path, missing_type_err.get(node.type), None)
return

for variant in variants:
Expand All @@ -476,6 +487,20 @@ def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}",
)

# Check enumeration
if (
node.items is not None
and mapping[
f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}"
]
not in node.items
):
collector.collect_and_log(
f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}",
ValidationProblem.InvalidEnum,
node.items,
)

Comment thread
sherjeelshabih marked this conversation as resolved.
def handle_choice(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
global collector
old_collector = collector
Expand Down Expand Up @@ -556,7 +581,7 @@ def check_attributes_of_nonexisting_field(
) -> list:
"""
This method runs through the mapping dictionary and checks if there are any
attributes assigned to the fields (not groups!) which are not expicitly
attributes assigned to the fields (not groups!) which are not explicitly
present in the mapping.
If there are any found, a warning is logged and the corresponding items are
added to the list returned by the method.
Expand Down Expand Up @@ -657,7 +682,7 @@ def check_type_with_tree(
if (next_child_class is not None) or (next_child_name is not None):
output = None
for child in node.children:
# regexs to separarte the class and the name from full name of the child
# regexs to separate the class and the name from full name of the child
child_class_from_node = re.sub(
r"(\@.*)*(\[.*?\])*(\(.*?\))*([a-z]\_)*(\_[a-z])*[a-z]*\s*",
"",
Expand Down
5 changes: 4 additions & 1 deletion tests/data/dataconverter/readers/example/testdata.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
"float_value_units": "nm",
"int_value": -3,
"int_value_units": "eV",
"number_value": 3,
"number_value_units": "eV",
"posint_value": 7,
"posint_value_units": "kg",
"definition": "NXtest",
Expand All @@ -17,5 +19,6 @@
"date_value_units": "",
"required_child": 1,
"optional_child": 1,
"@version": "1.0"
"@version": "1.0",
"@array": [0, 1, 2]
}
Loading