From 0f46c34200bfe2a748cad9bfc273cd4ffdffa606 Mon Sep 17 00:00:00 2001 From: Rubel Date: Tue, 18 Feb 2025 15:11:26 +0100 Subject: [PATCH 01/61] Fix the nx_char type for numpy to and . --- src/pynxtools/dataconverter/helpers.py | 5 +++-- tests/dataconverter/test_helpers.py | 18 +++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 71d4a4b9f..b9e99d1c6 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -591,12 +591,13 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: np.uint16, np.uint32, np.uint64, + np.uint, np.unsignedinteger, np.signedinteger, ) np_float = (np.float16, np.float32, np.float64, np.floating) np_bytes = (np.bytes_, np.byte, np.ubyte) -np_char = (np.str_, np.char.chararray, *np_bytes) +np_char = (np.str_, np.bytes_) # Only numpy Unicode string and Byte string np_bool = (np.bool_,) np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle) NEXUS_TO_PYTHON_DATA_TYPES = { @@ -608,7 +609,7 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: *np_bytes, ), "NX_BOOLEAN": (bool, np.ndarray, *np_bool), - "NX_CHAR": (str, np.ndarray, *np_char), + "NX_CHAR": (str, *np_char), "NX_DATE_TIME": (str,), "NX_FLOAT": (float, np.ndarray, *np_float), "NX_INT": (int, np.ndarray, *np_int), diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 0ef64ea0d..a34a48827 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -25,6 +25,7 @@ import numpy as np import pytest + from pynxtools.dataconverter import helpers from pynxtools.dataconverter.template import Template from pynxtools.dataconverter.validation import validate_dict_against @@ -278,15 +279,14 @@ def fixture_filled_test_data(template, tmp_path): "not_a_num", ), ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/in" - "t_value should be one of: (, , , ," - " , , , , , " - ", , , , , ), as defined in " - "the NXDL as NX_INT." + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value " + "should be one of: (, , , , , , , , , , , , , , , ), as defined in " + "the NXDL as NX_INT" ), id="string-instead-of-int", ), From dd8beb4cbaf07c2c7894a2cb4c676148d890aebb Mon Sep 17 00:00:00 2001 From: Rubel Date: Wed, 19 Feb 2025 12:20:24 +0100 Subject: [PATCH 02/61] Still char instead of the int is being validated which is wrong. --- src/pynxtools/dataconverter/helpers.py | 70 +++++++++++++++++--------- tests/dataconverter/test_helpers.py | 46 ++++++++++++----- 2 files changed, 80 insertions(+), 36 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index b9e99d1c6..602a8abc2 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -596,7 +596,9 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: np.signedinteger, ) np_float = (np.float16, np.float32, np.float64, np.floating) -np_bytes = (np.bytes_, np.byte, np.ubyte) +# Not to be confused with `np.byte` and `np.ubyte`, these store +# and integer of `8bit` and `unsigned 8bit` respectively. +np_bytes = (np.bytes_,) np_char = (np.str_, np.bytes_) # Only numpy Unicode string and Byte string np_bool = (np.bool_,) np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle) @@ -604,59 +606,77 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: "ISO8601": (str,), "NX_BINARY": ( bytes, - bytearray, - np.ndarray, *np_bytes, ), - "NX_BOOLEAN": (bool, np.ndarray, *np_bool), + "NX_BOOLEAN": (bool, *np_bool), "NX_CHAR": (str, *np_char), "NX_DATE_TIME": (str,), - "NX_FLOAT": (float, np.ndarray, *np_float), - "NX_INT": (int, np.ndarray, *np_int), - "NX_UINT": (np.ndarray, np.unsignedinteger), + "NX_FLOAT": (float, *np_float), + "NX_INT": (int, *np_int), + "NX_UINT": ( + np.unsignedinteger, + np.uint, + ), "NX_NUMBER": ( int, float, - np.ndarray, *np_int, *np_float, - dict, ), "NX_POSINT": ( int, - np.ndarray, np.signedinteger, ), # > 0 is checked in is_valid_data_field() - "NX_COMPLEX": (complex, np.ndarray, *np_complex), - "NXDL_TYPE_UNAVAILABLE": (str,), # Defaults to a string if a type is not provided. + "NX_COMPLEX": (complex, *np_complex), + "NXDL_TYPE_UNAVAILABLE": ( + str, + *np_char, + ), # Defaults to a string if a type is not provided. "NX_CHAR_OR_NUMBER": ( str, int, float, - np.ndarray, *np_char, *np_int, *np_float, - dict, ), } -def check_all_children_for_callable(objects: list, check: Callable, *args) -> bool: - """Checks whether all objects in list are validated by given callable.""" - for obj in objects: - if not check(obj, *args): - return False +def check_all_children_for_callable( + objects: Union[list, np.ndarray], + checker: Optional[Callable] = None, + accepted_types: Optional[tuple] = None, +) -> bool: + """Checks whether all objects in list or numpy array are validated + by given callable and types. + """ - return True + if checker is not None: + for obj in objects: + args = (obj, accepted_types) if accepted_types is not None else (obj,) + if not checker(*args): + return False + return True + + # default checker + tmp_arr = None + if isinstance(objects, list): + # Handles list and list of list + tmp_arr = np.array(objects) + elif isinstance(objects, np.ndarray): + tmp_arr = objects + if tmp_arr is not None: + return any([np.issubdtype(tmp_arr.dtype, type_) for type_ in accepted_types]) def is_valid_data_type(value, accepted_types): """Checks whether the given value or its children are of an accepted type.""" - if not isinstance(value, list): + + if not isinstance(value, (list, np.ndarray)): return isinstance(value, accepted_types) - return check_all_children_for_callable(value, isinstance, accepted_types) + return check_all_children_for_callable(objects=value, accepted_types=accepted_types) def is_positive_int(value): @@ -666,7 +686,7 @@ def is_greater_than(num): return num.flat[0] > 0 if isinstance(num, np.ndarray) else num > 0 if isinstance(value, list): - return check_all_children_for_callable(value, is_greater_than) + return check_all_children_for_callable(objects=value, checker=is_greater_than) return value.flat[0] > 0 if isinstance(value, np.ndarray) else value > 0 @@ -707,7 +727,9 @@ def is_valid_data_field(value, nxdl_type, path): if value is None: raise ValueError output_value = accepted_types[0](value) - except ValueError: + ## TODO recheck the the TypeError after discussion with Lev + ## of the above line + except (ValueError, TypeError): collector.collect_and_log( path, ValidationProblem.InvalidType, accepted_types, nxdl_type ) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index a34a48827..1352ce6b5 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -279,14 +279,13 @@ def fixture_filled_test_data(template, tmp_path): "not_a_num", ), ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value " - "should be one of: (, , , , , , , , , , , , , , , ), as defined in " - "the NXDL as NX_INT" + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should " + "be one of: (, , " + ", , , " + ", , , " + ", , , " + ", , , ), as defined in the NXDL as NX_INT" ), id="string-instead-of-int", ), @@ -297,9 +296,9 @@ def fixture_filled_test_data(template, tmp_path): "NOT_TRUE_OR_FALSE", ), ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value sh" - "ould be one of: (, , , ), as defined in the " + "NXDL as NX_BOOLEAN" ), id="string-instead-of-int", ), @@ -327,12 +326,22 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", 3 ), ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be of Python type:" + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should " + "be of Python type:" " (, , )," " as defined in the NXDL as NX_CHAR." ), id="int-instead-of-chars", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + ["list", "of", "chars"], + ), + "", + id="list-of-char-instead-of-chars", + ), pytest.param( alter_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", None @@ -340,6 +349,18 @@ def fixture_filled_test_data(template, tmp_path): "", id="empty-optional-field", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + [2], # pylint: disable=E1126 + ), + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be" + " one of: (, , " + ", , ), as defined in the " + "NXDL as NX_FLOAT", + id="list-of-int-instead-of-float", + ), pytest.param( set_to_none_in_dict( TEMPLATE, @@ -534,6 +555,7 @@ def test_validate_data_dict(caplog, data_dict, error_message, request): "link-dict-instead-of-bool", "opt-group-completely-removed", "required-field-provided-in-variadic-optional-group", + "list-of-char-instead-of-chars", ): with caplog.at_level(logging.WARNING): assert validate_dict_against("NXtest", data_dict)[0] From 351f377db1c16e70866f36b9758fca220f1f371a Mon Sep 17 00:00:00 2001 From: Rubel Date: Thu, 20 Feb 2025 10:08:08 +0100 Subject: [PATCH 03/61] Remove auto conversion for datatype. --- src/pynxtools/dataconverter/helpers.py | 30 ++++++++++++----------- src/pynxtools/dataconverter/validation.py | 4 +-- tests/dataconverter/test_helpers.py | 29 +++++++++++----------- 3 files changed, 32 insertions(+), 31 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 602a8abc2..39429ada6 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -668,6 +668,7 @@ def check_all_children_for_callable( tmp_arr = objects if tmp_arr is not None: return any([np.issubdtype(tmp_arr.dtype, type_) for type_ in accepted_types]) + return False def is_valid_data_type(value, accepted_types): @@ -706,29 +707,30 @@ def convert_str_to_bool_safe(value): def is_valid_data_field(value, nxdl_type, path): # todo: Check this funciton and wtire test for it. It seems the funciton is not # working as expected. - """Checks whether a given value is valid according to what is defined in the NXDL. - - This function will also try to convert typical types, for example int to float, - and return the successful conversion. + """Checks whether a given value is valid according to the type defined in the NXDL. - If it fails to convert, it raises an Exception. + This function also converts bool value comes in str format. In case, it fails to + convert, it raises an Exception. - Returns two values: first, boolean (True if the the value corresponds to nxdl_type, - False otherwise) and second, result of attempted conversion or the original value - (if conversion is not needed or impossible) + Returns two values: + boolean (True if the the value corresponds to nxdl_type, False otherwise) + converted_value bool value. """ - accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type] - output_value = value + accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type] + # Do not count the dict as it represents a link value if not isinstance(value, dict) and not is_valid_data_type(value, accepted_types): try: if accepted_types[0] is bool and isinstance(value, str): value = convert_str_to_bool_safe(value) if value is None: raise ValueError - output_value = accepted_types[0](value) - ## TODO recheck the the TypeError after discussion with Lev - ## of the above line + return True, value + + collector.collect_and_log( + path, ValidationProblem.InvalidType, accepted_types, nxdl_type + ) + return False, value except (ValueError, TypeError): collector.collect_and_log( path, ValidationProblem.InvalidType, accepted_types, nxdl_type @@ -749,7 +751,7 @@ def is_valid_data_field(value, nxdl_type, path): collector.collect_and_log(path, ValidationProblem.InvalidDatetime, value) return False, value - return True, output_value + return True, value @lru_cache(maxsize=None) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 4b599b43a..cf2a4c0f1 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -422,7 +422,7 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): continue # Check general validity - is_valid_data_field( + _, _ = is_valid_data_field( mapping[f"{prev_path}/{variant}"], node.dtype, f"{prev_path}/{variant}" ) @@ -468,7 +468,7 @@ def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str): return for variant in variants: - is_valid_data_field( + _, _ = is_valid_data_field( mapping[ f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}" ], diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 1352ce6b5..12ce9f0f1 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -279,13 +279,13 @@ def fixture_filled_test_data(template, tmp_path): "not_a_num", ), ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should " - "be one of: (, , " - ", , , " - ", , , " - ", , , " - ", , , ), as defined in the NXDL as NX_INT" + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should be" + " one of: (, , ," + " , , , <" + "class 'numpy.int32'>, , , <" + "class 'numpy.uint16'>, , , <" + "class 'numpy.uint64'>, , ), as defined in the NXDL as NX_INT." ), id="string-instead-of-int", ), @@ -300,7 +300,7 @@ def fixture_filled_test_data(template, tmp_path): "be one of: (, ), as defined in the " "NXDL as NX_BOOLEAN" ), - id="string-instead-of-int", + id="string-instead-of-bool", ), pytest.param( alter_dict( @@ -309,7 +309,7 @@ def fixture_filled_test_data(template, tmp_path): {"link": "/a-link"}, ), (""), - id="link-dict-instead-of-bool", + id="link-dict-instead-of-int", ), pytest.param( alter_dict( @@ -326,13 +326,13 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", 3 ), ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should " - "be of Python type:" - " (, , )," - " as defined in the NXDL as NX_CHAR." + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should" + " be one of: (, , " + "), as defined in the NXDL as NX_CHAR." ), id="int-instead-of-chars", ), + # TODO add test array of char pytest.param( alter_dict( TEMPLATE, @@ -551,8 +551,7 @@ def test_validate_data_dict(caplog, data_dict, error_message, request): "UTC-with-+00:00", "UTC-with-Z", "no-child-provided-optional-parent", - "int-instead-of-chars", - "link-dict-instead-of-bool", + "link-dict-instead-of-int", "opt-group-completely-removed", "required-field-provided-in-variadic-optional-group", "list-of-char-instead-of-chars", From 00b78aaf90d7ad3c99a38ea9966e0270fe6ca1e4 Mon Sep 17 00:00:00 2001 From: Rubel Date: Thu, 20 Feb 2025 11:24:46 +0100 Subject: [PATCH 04/61] extends tests. --- tests/dataconverter/test_helpers.py | 154 +++++++++++++++++++++++++++- 1 file changed, 152 insertions(+), 2 deletions(-) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 12ce9f0f1..008b14b02 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -289,6 +289,58 @@ def fixture_filled_test_data(template, tmp_path): ), id="string-instead-of-int", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + ["1", "2", "3"], + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should be" + " one of: (, , ," + " , , , <" + "class 'numpy.int32'>, , , <" + "class 'numpy.uint16'>, , , <" + "class 'numpy.uint64'>, , ), as defined in the NXDL as NX_INT." + ), + id="list-of-int-str-instead-of-int", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + np.array([2.0, 3.0, 4.0], dtype=np.float32), + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should be" + " one of: (, , ," + " , , , <" + "class 'numpy.int32'>, , , <" + "class 'numpy.uint16'>, , , <" + "class 'numpy.uint64'>, , ), as defined in the NXDL as NX_INT." + ), + id="array-of-float-instead-of-int", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + [2, 3, 4], + ), + (""), + id="List-of-int-instead-of-int", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + np.array([2, 3, 4], dtype=np.int32), + ), + (""), + id="array-of-int32-instead-of-int", + ), pytest.param( alter_dict( TEMPLATE, @@ -321,6 +373,48 @@ def fixture_filled_test_data(template, tmp_path): ), id="negative-posint", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", + [-1, 2], + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value " + "should be a positive int, but is [-1, 2]." + ), + id="negative-posint-list", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", + np.array([-1, 2], dtype=np.int8), + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value should" + " be a positive int, but is [-1 2]." + ), + id="negative-posint-array", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", + [1, 2], + ), + (""), + id="positive-posint-list", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", + np.array([1, 2], dtype=np.int8), + ), + (""), + id="positive-posint-array", + ), pytest.param( alter_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", 3 @@ -332,7 +426,35 @@ def fixture_filled_test_data(template, tmp_path): ), id="int-instead-of-chars", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + np.array(["1", "2", "3"], dtype=np.str_), + ), + (""), + id="array-of-chars", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + np.array(["1", "2", "3"], dtype=np.bytes_), + ), + (""), + id="array-of-bytes-chars", + ), + # pytest.param( + # alter_dict( + # TEMPLATE, + # "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + # np.char.chararray(["1", "2", "3"]), + # ), + # (""), + # id="numpy-chararray", + # ), # TODO add test array of char + # TODO add test for numpy array of char and chararray pytest.param( alter_dict( TEMPLATE, @@ -340,7 +462,7 @@ def fixture_filled_test_data(template, tmp_path): ["list", "of", "chars"], ), "", - id="list-of-char-instead-of-chars", + id="list-of-string-instead-of-chars", ), pytest.param( alter_dict( @@ -349,6 +471,27 @@ def fixture_filled_test_data(template, tmp_path): "", id="empty-optional-field", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + np.array([2.0, 3.0, 4.0], dtype=np.float32), + ), + "", + id="array-of-float-instead-of-float", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + np.array(["2.0", "3.0"], dtype=np.str_), + ), + " The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should " + "be one of: (, , , , ), as defined in the " + "NXDL as NX_FLOAT.", + id="array-of-str-instead-of-float", + ), pytest.param( alter_dict( TEMPLATE, @@ -554,7 +697,14 @@ def test_validate_data_dict(caplog, data_dict, error_message, request): "link-dict-instead-of-int", "opt-group-completely-removed", "required-field-provided-in-variadic-optional-group", - "list-of-char-instead-of-chars", + "list-of-string-instead-of-chars", + "array-of-int32-instead-of-int", + "List-of-int-instead-of-int", + "positive-posint-list", + "positive-posint-array", + "array-of-chars", + "array-of-bytes-chars", + "array-of-float-instead-of-float", ): with caplog.at_level(logging.WARNING): assert validate_dict_against("NXtest", data_dict)[0] From 3ad40a0f0b4028e40cc6f50882717f3cc5920b37 Mon Sep 17 00:00:00 2001 From: Rubel Date: Thu, 27 Feb 2025 16:04:06 +0100 Subject: [PATCH 05/61] resolve PR comments --- src/pynxtools/dataconverter/helpers.py | 24 +++++++++++------------ src/pynxtools/dataconverter/validation.py | 4 ++-- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 39429ada6..3703faa33 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -597,7 +597,7 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: ) np_float = (np.float16, np.float32, np.float64, np.floating) # Not to be confused with `np.byte` and `np.ubyte`, these store -# and integer of `8bit` and `unsigned 8bit` respectively. +# an integer of `8bit` and `unsigned 8bit` respectively. np_bytes = (np.bytes_,) np_char = (np.str_, np.bytes_) # Only numpy Unicode string and Byte string np_bool = (np.bool_,) @@ -658,16 +658,14 @@ def check_all_children_for_callable( if not checker(*args): return False return True - - # default checker - tmp_arr = None + if isinstance(objects, tuple): + return False if isinstance(objects, list): # Handles list and list of list - tmp_arr = np.array(objects) - elif isinstance(objects, np.ndarray): - tmp_arr = objects - if tmp_arr is not None: - return any([np.issubdtype(tmp_arr.dtype, type_) for type_ in accepted_types]) + return all([type(elem) in accepted_types for elem in objects]) + if isinstance(objects, np.ndarray): + return any([np.issubdtype(objects.dtype, type_) for type_ in accepted_types]) + return False @@ -709,12 +707,12 @@ def is_valid_data_field(value, nxdl_type, path): # working as expected. """Checks whether a given value is valid according to the type defined in the NXDL. - This function also converts bool value comes in str format. In case, it fails to - convert, it raises an Exception. + This function only tries to convert boolean value in str format (e.g. "true" ) to + python Boolean (True). In case, it fails to convert, it raises an Exception. Returns two values: - boolean (True if the the value corresponds to nxdl_type, False otherwise) - converted_value bool value. + Bool: (True if the the value corresponds to nxdl_type, False otherwise) + Any: Converted_value bool value if possible otherwise original value. """ accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type] diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index cf2a4c0f1..b2c14c355 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -534,8 +534,8 @@ def is_documented(key: str, node: NexusNode) -> bool: collector.collect_and_log( f"{key}", ValidationProblem.MissingUnit, node.unit ) - - return is_valid_data_field(mapping[key], node.dtype, key)[0] + is_documented_flag, _ = is_valid_data_field(mapping[key], node.dtype, key) + return is_documented_flag def recurse_tree( node: NexusNode, From 6cf6f6d50153beb956b69c6e919fe72aecfe9fd1 Mon Sep 17 00:00:00 2001 From: Rubel Date: Thu, 27 Feb 2025 16:43:41 +0100 Subject: [PATCH 06/61] Remove unnecessary returned value. --- src/pynxtools/dataconverter/helpers.py | 12 ++++++------ src/pynxtools/dataconverter/validation.py | 6 +++--- tests/dataconverter/test_helpers.py | 4 +++- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 3703faa33..4274dc888 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -723,21 +723,21 @@ def is_valid_data_field(value, nxdl_type, path): value = convert_str_to_bool_safe(value) if value is None: raise ValueError - return True, value + return True collector.collect_and_log( path, ValidationProblem.InvalidType, accepted_types, nxdl_type ) - return False, value + return False except (ValueError, TypeError): collector.collect_and_log( path, ValidationProblem.InvalidType, accepted_types, nxdl_type ) - return False, value + return False if nxdl_type == "NX_POSINT" and not is_positive_int(value): collector.collect_and_log(path, ValidationProblem.IsNotPosInt, value) - return False, value + return False if nxdl_type in ("ISO8601", "NX_DATE_TIME"): iso8601 = re.compile( @@ -747,9 +747,9 @@ def is_valid_data_field(value, nxdl_type, path): results = iso8601.search(value) if results is None: collector.collect_and_log(path, ValidationProblem.InvalidDatetime, value) - return False, value + return False - return True, value + return True @lru_cache(maxsize=None) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index b2c14c355..23d4f4734 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -422,7 +422,7 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): continue # Check general validity - _, _ = is_valid_data_field( + _ = is_valid_data_field( mapping[f"{prev_path}/{variant}"], node.dtype, f"{prev_path}/{variant}" ) @@ -468,7 +468,7 @@ def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str): return for variant in variants: - _, _ = is_valid_data_field( + _ = is_valid_data_field( mapping[ f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}" ], @@ -534,7 +534,7 @@ def is_documented(key: str, node: NexusNode) -> bool: collector.collect_and_log( f"{key}", ValidationProblem.MissingUnit, node.unit ) - is_documented_flag, _ = is_valid_data_field(mapping[key], node.dtype, key) + is_documented_flag = is_valid_data_field(mapping[key], node.dtype, key) return is_documented_flag def recurse_tree( diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 008b14b02..119426c40 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -97,7 +97,9 @@ def listify_template(data_dict: Template): "type", "definition", "date_value", - ): + ) or isinstance( + data_dict[optionality][path], np.ndarray + ): # avoid list numpy array listified_template[optionality][path] = data_dict[optionality][path] else: listified_template[optionality][path] = [data_dict[optionality][path]] From 539bdeaff8847ae25be539ce10589eda355000d5 Mon Sep 17 00:00:00 2001 From: Rubel Date: Thu, 27 Feb 2025 17:49:23 +0100 Subject: [PATCH 07/61] fix np integer and float. --- src/pynxtools/dataconverter/helpers.py | 24 +++------------- tests/dataconverter/test_helpers.py | 38 ++++++++------------------ 2 files changed, 15 insertions(+), 47 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 4274dc888..54751a0c1 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -579,29 +579,14 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: NUMPY_INT_TYPES = (np.short, np.intc, np.int_) NUMPY_UINT_TYPES = (np.ushort, np.uintc, np.uint) # np int for np version 1.26.0 -np_int = ( - np.intc, - np.int_, - np.intp, - np.int8, - np.int16, - np.int32, - np.int64, - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.uint, - np.unsignedinteger, - np.signedinteger, -) -np_float = (np.float16, np.float32, np.float64, np.floating) +np_int = (np.integer,) +np_float = (np.floating,) # Not to be confused with `np.byte` and `np.ubyte`, these store # an integer of `8bit` and `unsigned 8bit` respectively. np_bytes = (np.bytes_,) np_char = (np.str_, np.bytes_) # Only numpy Unicode string and Byte string np_bool = (np.bool_,) -np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle) +np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle, np.complex_) NEXUS_TO_PYTHON_DATA_TYPES = { "ISO8601": (str,), "NX_BINARY": ( @@ -710,9 +695,8 @@ def is_valid_data_field(value, nxdl_type, path): This function only tries to convert boolean value in str format (e.g. "true" ) to python Boolean (True). In case, it fails to convert, it raises an Exception. - Returns two values: + Return: Bool: (True if the the value corresponds to nxdl_type, False otherwise) - Any: Converted_value bool value if possible otherwise original value. """ accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type] diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 119426c40..8059e00e2 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -281,13 +281,9 @@ def fixture_filled_test_data(template, tmp_path): "not_a_num", ), ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should be" - " one of: (, , ," - " , , , <" - "class 'numpy.int32'>, , , <" - "class 'numpy.uint16'>, , , <" - "class 'numpy.uint64'>, , ), as defined in the NXDL as NX_INT." + " The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should" + " be one of: (, ), as defined in the" + " NXDL as NX_INT.\n" ), id="string-instead-of-int", ), @@ -298,13 +294,8 @@ def fixture_filled_test_data(template, tmp_path): ["1", "2", "3"], ), ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should be" - " one of: (, , ," - " , , , <" - "class 'numpy.int32'>, , , <" - "class 'numpy.uint16'>, , , <" - "class 'numpy.uint64'>, , ), as defined in the NXDL as NX_INT." + " The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should" + " be one of: (, )" ), id="list-of-int-str-instead-of-int", ), @@ -316,12 +307,7 @@ def fixture_filled_test_data(template, tmp_path): ), ( "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should be" - " one of: (, , ," - " , , , <" - "class 'numpy.int32'>, , , <" - "class 'numpy.uint16'>, , , <" - "class 'numpy.uint64'>, , ), as defined in the NXDL as NX_INT." + " one of: (, )" ), id="array-of-float-instead-of-int", ), @@ -488,10 +474,9 @@ def fixture_filled_test_data(template, tmp_path): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", np.array(["2.0", "3.0"], dtype=np.str_), ), - " The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should " - "be one of: (, , , , ), as defined in the " - "NXDL as NX_FLOAT.", + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " + "one of: (, ), as defined in the NXDL " + "as NX_FLOAT.\n", id="array-of-str-instead-of-float", ), pytest.param( @@ -501,9 +486,8 @@ def fixture_filled_test_data(template, tmp_path): [2], # pylint: disable=E1126 ), "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be" - " one of: (, , " - ", , ), as defined in the " - "NXDL as NX_FLOAT", + " one of: (, ), as defined in the NXDL" + " as NX_FLOAT.\n", id="list-of-int-instead-of-float", ), pytest.param( From cde7a6e12ab95f7bbcdc4359cc9f1ba3709f0e82 Mon Sep 17 00:00:00 2001 From: Rubel Date: Fri, 28 Feb 2025 11:20:27 +0100 Subject: [PATCH 08/61] minor change. --- src/pynxtools/dataconverter/validation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 23d4f4734..a8ec77f2d 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -534,8 +534,7 @@ def is_documented(key: str, node: NexusNode) -> bool: collector.collect_and_log( f"{key}", ValidationProblem.MissingUnit, node.unit ) - is_documented_flag = is_valid_data_field(mapping[key], node.dtype, key) - return is_documented_flag + return is_valid_data_field(mapping[key], node.dtype, key) def recurse_tree( node: NexusNode, From 4ab4b53545505cb561a7b253fd0f26026414bb18 Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 28 Feb 2025 12:21:23 +0100 Subject: [PATCH 09/61] check enums for documented fields, and don't return False if any issue with documented keys --- src/pynxtools/dataconverter/validation.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index a8ec77f2d..ca467c756 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -526,6 +526,21 @@ def is_documented(key: str, node: NexusNode) -> bool: if "@" in key and node.type != "attribute": return False + # if we arrive here, the key is supposed to be documented. + # We still do some further checks before returning. + + # Check general validity + _ = is_valid_data_field(mapping[key], node.dtype, key) + + # Check enumeration + if node.items is not None and mapping[key] not in node.items: + collector.collect_and_log( + key, + ValidationProblem.InvalidEnum, + node.items, + ) + + # Check main field exists for units if ( isinstance(node, NexusEntity) and node.unit is not None @@ -534,7 +549,8 @@ def is_documented(key: str, node: NexusNode) -> bool: collector.collect_and_log( f"{key}", ValidationProblem.MissingUnit, node.unit ) - return is_valid_data_field(mapping[key], node.dtype, key) + + return True def recurse_tree( node: NexusNode, From ae109162cefcfaa810c394e06ea2d72a082c7817 Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 18 Feb 2025 17:17:18 +0100 Subject: [PATCH 10/61] add enum checking for attributes --- src/pynxtools/dataconverter/validation.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index ca467c756..80a8bc32c 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -476,6 +476,20 @@ def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str): f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}", ) + # Check enumeration + if ( + node.items is not None + and mapping[ + f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}" + ] + not in node.items + ): + collector.collect_and_log( + f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}", + ValidationProblem.InvalidEnum, + node.items, + ) + def handle_choice(node: NexusNode, keys: Mapping[str, Any], prev_path: str): global collector old_collector = collector From 8f4291e9bdb0f123b438d99c690111797e96b521 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Mon, 24 Feb 2025 15:16:49 +0100 Subject: [PATCH 11/61] Adds parsing code for enumeration tree generation during validation --- src/pynxtools/dataconverter/helpers.py | 2 +- src/pynxtools/dataconverter/nexus_tree.py | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 54751a0c1..9e95779ed 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -85,7 +85,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar ) elif log_type == ValidationProblem.InvalidEnum: logger.warning( - f"The value at {path} should be on of the following strings: {value}" + f"The value at {path} should be one of the following: {value}" ) elif log_type == ValidationProblem.MissingRequiredGroup: logger.warning(f"The required group, {path}, hasn't been supplied.") diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index bbba22c09..77349df49 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -761,7 +761,7 @@ class NexusEntity(NexusNode): type: Literal["field", "attribute"] unit: Optional[NexusUnitCategory] = None dtype: NexusType = "NX_CHAR" - items: Optional[List[str]] = None + items: Optional[List[Any]] = None shape: Optional[Tuple[Optional[int], ...]] = None def _set_type(self): @@ -790,14 +790,23 @@ def _set_items(self): based on the values in the inheritance chain. The first vale found is used. """ - if not self.dtype == "NX_CHAR": - return for elem in self.inheritance: enum = elem.find(f"nx:enumeration", namespaces=namespaces) if enum is not None: self.items = [] for items in enum.findall(f"nx:item", namespaces=namespaces): - self.items.append(items.attrib["value"]) + value = items.attrib["value"] + if value[0] == "[" and value[-1] == "]": + import ast + + try: + self.items.append(ast.literal_eval(value)) + except (ValueError, SyntaxError): + raise Exception( + f"Error parsing enumeration item in the provided NXDL: {value}" + ) + else: + self.items.append(value) return def _set_shape(self): From 099b965245253509c18c826449203dcee373565c Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Mon, 24 Feb 2025 15:23:22 +0100 Subject: [PATCH 12/61] Fix typos in old test --- tests/dataconverter/test_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 8059e00e2..afa8ff71e 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -583,8 +583,8 @@ def fixture_filled_test_data(template, tmp_path): ), ( "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type should " - "be on of the following" - " strings: ['1st type', '2nd type', '3rd type', '4th type']" + "be one of the following" + ": ['1st type', '2nd type', '3rd type', '4th type']" ), id="wrong-enum-choice", ), From 4da57353bfec211bce6554aee293fcd9eb339deb Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 25 Feb 2025 23:51:11 +0100 Subject: [PATCH 13/61] always check data types and enums, and check NXdata attributes separately --- src/pynxtools/dataconverter/validation.py | 31 +++++++++++++++-------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 80a8bc32c..db5b02579 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -248,6 +248,14 @@ def check_nxdata(): prev_path=prev_path, ) + # check NXdata attributes + for attr in ("signal", "auxiliary_signals", "axes"): + handle_attribute( + node.search_add_child_for(attr), + keys, + prev_path=prev_path, + ) + for i, axis in enumerate(axes): if axis == ".": continue @@ -392,12 +400,12 @@ def _follow_link( def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): full_path = remove_from_not_visited(f"{prev_path}/{node.name}") variants = get_variations_of(node, keys) - if not variants: - if node.optionality == "required" and node.type in missing_type_err: - collector.collect_and_log( - full_path, missing_type_err.get(node.type), None - ) - + if ( + not variants + and node.optionality == "required" + and node.type in missing_type_err + ): + collector.collect_and_log(full_path, missing_type_err.get(node.type), None) return for variant in variants: @@ -460,11 +468,12 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str): full_path = remove_from_not_visited(f"{prev_path}/@{node.name}") variants = get_variations_of(node, keys) - if not variants: - if node.optionality == "required" and node.type in missing_type_err: - collector.collect_and_log( - full_path, missing_type_err.get(node.type), None - ) + if ( + not variants + and node.optionality == "required" + and node.type in missing_type_err + ): + collector.collect_and_log(full_path, missing_type_err.get(node.type), None) return for variant in variants: From d4f4c2fc3b6a7d8ca791f88e35f6f1067b0b2ba9 Mon Sep 17 00:00:00 2001 From: rettigl Date: Wed, 26 Feb 2025 14:15:03 +0100 Subject: [PATCH 14/61] fix typos --- src/pynxtools/dataconverter/validation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index db5b02579..aade2d9b2 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -168,7 +168,7 @@ def validate_dict_against( appdef: str, mapping: Mapping[str, Any], ignore_undocumented: bool = False ) -> Tuple[bool, List]: """ - Validates a mapping against the NeXus tree for applicationd definition `appdef`. + Validates a mapping against the NeXus tree for application definition `appdef`. Args: appdef (str): The appdef name to validate against. @@ -410,7 +410,7 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): for variant in variants: if node.optionality == "required" and isinstance(keys[variant], Mapping): - # Check if all fields in the dict are actual attributes (startwith @) + # Check if all fields in the dict are actual attributes (startswith @) all_attrs = True for entry in keys[variant]: if not entry.startswith("@"): @@ -594,7 +594,7 @@ def check_attributes_of_nonexisting_field( ) -> list: """ This method runs through the mapping dictionary and checks if there are any - attributes assigned to the fields (not groups!) which are not expicitly + attributes assigned to the fields (not groups!) which are not explicitly present in the mapping. If there are any found, a warning is logged and the corresponding items are added to the list returned by the method. @@ -695,7 +695,7 @@ def check_type_with_tree( if (next_child_class is not None) or (next_child_name is not None): output = None for child in node.children: - # regexs to separarte the class and the name from full name of the child + # regexs to separate the class and the name from full name of the child child_class_from_node = re.sub( r"(\@.*)*(\[.*?\])*(\(.*?\))*([a-z]\_)*(\_[a-z])*[a-z]*\s*", "", From 1925d5df0a9d42f25b79fdbc63c740405865248d Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 28 Feb 2025 13:11:43 +0100 Subject: [PATCH 15/61] move enum checking into is_valid_data_field, and proper bool conversion --- src/pynxtools/dataconverter/helpers.py | 44 +++++++++++---------- src/pynxtools/dataconverter/validation.py | 47 +++++------------------ 2 files changed, 33 insertions(+), 58 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 9e95779ed..307bbbd3d 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -675,20 +675,20 @@ def is_greater_than(num): return value.flat[0] > 0 if isinstance(value, np.ndarray) else value > 0 -def convert_str_to_bool_safe(value): +def convert_str_to_bool_safe(value: str) -> Optional[bool]: """Only returns True or False if someone mistakenly adds quotation marks but mean a bool. - For everything else it returns None. + For everything else it raises a ValueError. """ if value.lower() == "true": return True if value.lower() == "false": return False - return None + raise ValueError(f"Could not interpret string '{value}' as boolean.") -def is_valid_data_field(value, nxdl_type, path): - # todo: Check this funciton and wtire test for it. It seems the funciton is not +def is_valid_data_field(value: Any, nxdl_type: str, nxdl_enum: list, path: str) -> Any: + # todo: Check this function and write test for it. It seems the function is not # working as expected. """Checks whether a given value is valid according to the type defined in the NXDL. @@ -696,32 +696,27 @@ def is_valid_data_field(value, nxdl_type, path): python Boolean (True). In case, it fails to convert, it raises an Exception. Return: - Bool: (True if the the value corresponds to nxdl_type, False otherwise) + value: the possibly converted data value """ accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type] # Do not count the dict as it represents a link value if not isinstance(value, dict) and not is_valid_data_type(value, accepted_types): - try: - if accepted_types[0] is bool and isinstance(value, str): + # try to convert string to bool + if accepted_types[0] is bool and isinstance(value, str): + try: value = convert_str_to_bool_safe(value) - if value is None: - raise ValueError - return True - - collector.collect_and_log( - path, ValidationProblem.InvalidType, accepted_types, nxdl_type - ) - return False - except (ValueError, TypeError): + except (ValueError, TypeError): + collector.collect_and_log( + path, ValidationProblem.InvalidType, accepted_types, nxdl_type + ) + else: collector.collect_and_log( path, ValidationProblem.InvalidType, accepted_types, nxdl_type ) - return False if nxdl_type == "NX_POSINT" and not is_positive_int(value): collector.collect_and_log(path, ValidationProblem.IsNotPosInt, value) - return False if nxdl_type in ("ISO8601", "NX_DATE_TIME"): iso8601 = re.compile( @@ -731,9 +726,16 @@ def is_valid_data_field(value, nxdl_type, path): results = iso8601.search(value) if results is None: collector.collect_and_log(path, ValidationProblem.InvalidDatetime, value) - return False - return True + # Check enumeration + if nxdl_enum is not None and value not in nxdl_enum: + collector.collect_and_log( + path, + ValidationProblem.InvalidEnum, + nxdl_enum, + ) + + return value @lru_cache(maxsize=None) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index aade2d9b2..8202a704f 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -430,21 +430,13 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): continue # Check general validity - _ = is_valid_data_field( - mapping[f"{prev_path}/{variant}"], node.dtype, f"{prev_path}/{variant}" + mapping[f"{prev_path}/{variant}"] = is_valid_data_field( + mapping[f"{prev_path}/{variant}"], + node.dtype, + node.items, + f"{prev_path}/{variant}", ) - # Check enumeration - if ( - node.items is not None - and mapping[f"{prev_path}/{variant}"] not in node.items - ): - collector.collect_and_log( - f"{prev_path}/{variant}", - ValidationProblem.InvalidEnum, - node.items, - ) - # Check unit category if node.unit is not None: remove_from_not_visited(f"{prev_path}/{variant}/@units") @@ -477,28 +469,17 @@ def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str): return for variant in variants: - _ = is_valid_data_field( + mapping[ + f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}" + ] = is_valid_data_field( mapping[ f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}" ], node.dtype, + node.items, f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}", ) - # Check enumeration - if ( - node.items is not None - and mapping[ - f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}" - ] - not in node.items - ): - collector.collect_and_log( - f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}", - ValidationProblem.InvalidEnum, - node.items, - ) - def handle_choice(node: NexusNode, keys: Mapping[str, Any], prev_path: str): global collector old_collector = collector @@ -553,15 +534,7 @@ def is_documented(key: str, node: NexusNode) -> bool: # We still do some further checks before returning. # Check general validity - _ = is_valid_data_field(mapping[key], node.dtype, key) - - # Check enumeration - if node.items is not None and mapping[key] not in node.items: - collector.collect_and_log( - key, - ValidationProblem.InvalidEnum, - node.items, - ) + mapping[key] = is_valid_data_field(mapping[key], node.dtype, node.items, key) # Check main field exists for units if ( From ecb4914e5243e2b906f60671386c88114bdc8482 Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 28 Feb 2025 13:18:58 +0100 Subject: [PATCH 16/61] satisfy mypy --- src/pynxtools/dataconverter/validation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 8202a704f..ae244e701 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -20,7 +20,7 @@ from collections import defaultdict from functools import reduce from operator import getitem -from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union import h5py import lxml.etree as ET @@ -165,14 +165,14 @@ def best_namefit_of(name: str, keys: Iterable[str]) -> Optional[str]: def validate_dict_against( - appdef: str, mapping: Mapping[str, Any], ignore_undocumented: bool = False + appdef: str, mapping: MutableMapping[str, Any], ignore_undocumented: bool = False ) -> Tuple[bool, List]: """ Validates a mapping against the NeXus tree for application definition `appdef`. Args: appdef (str): The appdef name to validate against. - mapping (Mapping[str, Any]): + mapping (MutableMapping[str, Any]): The mapping containing the data to validate. This should be a dict of `/` separated paths. Attributes are denoted with `@` in front of the last element. @@ -813,6 +813,6 @@ def populate_full_tree(node: NexusNode, max_depth: Optional[int] = 5, depth: int # Backwards compatibility def validate_data_dict( - _: Mapping[str, Any], read_data: Mapping[str, Any], root: ET._Element + _: MutableMapping[str, Any], read_data: MutableMapping[str, Any], root: ET._Element ) -> bool: return validate_dict_against(root.attrib["name"], read_data)[0] From 3a1703e82e0c5ee17a1f024b921a8cb71e20ae1f Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 3 Mar 2025 11:11:01 +0100 Subject: [PATCH 17/61] mark namefitted group as undocumented --- src/pynxtools/dataconverter/helpers.py | 4 +-- src/pynxtools/dataconverter/validation.py | 36 +++++++++++++---------- tests/dataconverter/test_validation.py | 14 +++++---- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 71d4a4b9f..016359fd6 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -80,9 +80,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar value = "" if log_type == ValidationProblem.UnitWithoutDocumentation: - logger.warning( - f"The unit, {path} = {value}, is being written but has no documentation" - ) + logger.warning(f"The unit, {path} = {value} written without documentation.") elif log_type == ValidationProblem.InvalidEnum: logger.warning( f"The value at {path} should be on of the following strings: {value}" diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 4b599b43a..ab871c246 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -134,7 +134,7 @@ def split_class_and_name_of(name: str) -> Tuple[Optional[str], str]: ), f"{name_match.group(2)}{'' if prefix is None else prefix}" -def best_namefit_of(name: str, keys: Iterable[str]) -> Optional[str]: +def best_namefit_of(name: str, keys: Iterable[str]) -> Tuple[Optional[str], bool]: """ Get the best namefit of `name` in `keys`. @@ -143,25 +143,26 @@ def best_namefit_of(name: str, keys: Iterable[str]) -> Optional[str]: keys (Iterable[str]): The keys to fit `name` against. Returns: - Optional[str]: The best fitting key. None if no fit was found. + Tuple[Optional[str], bool]: A tuple where the first element is the best fitting key (or None if no fit was found), + and the second element is a boolean indicating if the match was exact. """ + if not keys: - return None + return None, True nx_name, name2fit = split_class_and_name_of(name) if name2fit in keys: - return name2fit + return name2fit, True if nx_name is not None and nx_name in keys: - return nx_name - + return nx_name, True best_match, score = max( map(lambda x: (x, get_nx_namefit(name2fit, x)), keys), key=lambda x: x[1] ) if score < 0: - return None + return None, False - return best_match + return best_match, False def validate_dict_against( @@ -209,6 +210,7 @@ def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]: and key not in node.parent.get_all_direct_children_names() ): variations.append(key) + if nx_name is not None and not variations: collector.collect_and_log( nx_name, ValidationProblem.FailedNamefitting, keys @@ -342,6 +344,7 @@ def handle_group(node: NexusGroup, keys: Mapping[str, Any], prev_path: str): None, ) return + for variant in variants: if variant in [node.name for node in node.parent_of]: # Don't process if this is actually a sub-variant of this group @@ -511,12 +514,15 @@ def is_documented(key: str, node: NexusNode) -> bool: for name in key[1:].replace("@", "").split("/"): children = node.get_all_direct_children_names() - best_name = best_namefit_of(name, children) + best_name, good_name_fit = best_namefit_of(name, children) if best_name is None: return False node = node.search_add_child_for(best_name) + if not good_name_fit: + return False + if isinstance(mapping[key], dict) and "link" in mapping[key]: # TODO: Follow link and check consistency with current field return True @@ -755,12 +761,12 @@ def startswith_with_variations( ValidationProblem.UnitWithoutField, not_visited_key.rsplit("/", 1)[0], ) - if not ignore_undocumented: - collector.collect_and_log( - not_visited_key, - ValidationProblem.UnitWithoutDocumentation, - mapping[not_visited_key], - ) + # if not ignore_undocumented: + # collector.collect_and_log( + # not_visited_key, + # ValidationProblem.UnitWithoutDocumentation, + # mapping[not_visited_key], + # ) if is_documented(not_visited_key, tree): continue diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 2c946a3a1..91c062f8e 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -64,7 +64,7 @@ def get_data_dict(): "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field": 1, "/ENTRY[my_entry]/required_group/description": "An example description", "/ENTRY[my_entry]/required_group2/description": "An example description", - "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/data": 1, + "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]": 1, "/@default": "Some NXroot attribute", } @@ -86,21 +86,25 @@ def alter_dict(new_values: Dict[str, Any], data_dict: Dict[str, Any]) -> Dict[st @pytest.mark.parametrize( - "data_dict", + "data_dict, ignore_undocumented", [ - pytest.param(get_data_dict(), id="valid-unaltered-data-dict"), + pytest.param(get_data_dict(), True, id="valid-unaltered-data-dict"), pytest.param( remove_from_dict( "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value_no_attr", get_data_dict(), ), + False, id="removed-optional-value", ), ], ) -def test_valid_data_dict(caplog, data_dict): +def test_valid_data_dict(caplog, data_dict, ignore_undocumented): with caplog.at_level(logging.WARNING): - assert validate_dict_against("NXtest", data_dict)[0] + assert validate_dict_against( + "NXtest", data_dict, ignore_undocumented=ignore_undocumented + )[0] + assert caplog.text == "" From 4a292517859557dfa92c57ef005106c657d8be2f Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 4 Mar 2025 19:28:13 +0100 Subject: [PATCH 18/61] add tests from branch fix_attribute_enum_check --- src/pynxtools/data/NXtest.nxdl.xml | 10 ++ src/pynxtools/dataconverter/helpers.py | 20 ++- .../dataconverter/readers/example/reader.py | 6 +- .../readers/example/testdata.json | 5 +- tests/dataconverter/test_helpers.py | 170 +++++++++++++++--- tests/dataconverter/test_validation.py | 4 + 6 files changed, 178 insertions(+), 37 deletions(-) diff --git a/src/pynxtools/data/NXtest.nxdl.xml b/src/pynxtools/data/NXtest.nxdl.xml index 8695a20c9..45f37896c 100644 --- a/src/pynxtools/data/NXtest.nxdl.xml +++ b/src/pynxtools/data/NXtest.nxdl.xml @@ -28,9 +28,13 @@ + A dummy entry for a float value. + + A dummy entry for a number value. + A dummy entry for a bool value. @@ -53,6 +57,12 @@ + + + + + + diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 307bbbd3d..247ea4880 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -96,7 +96,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar ) elif log_type == ValidationProblem.InvalidType: logger.warning( - f"The value at {path} should be one of: {value}" + f"The value at {path} should be one of the following Python types: {value}" f", as defined in the NXDL as {args[0] if args else ''}." ) elif log_type == ValidationProblem.InvalidDatetime: @@ -158,9 +158,9 @@ def collect_and_log( "NX_ANY", ): return - if self.logging: + if self.logging and path + str(log_type) + str(value) not in self.data: self._log(path, log_type, value, *args, **kwargs) - self.data.add(path) + self.data.add(path + str(log_type) + str(value)) def has_validation_problems(self): """Returns True if there were any validation problems.""" @@ -584,7 +584,11 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: # Not to be confused with `np.byte` and `np.ubyte`, these store # an integer of `8bit` and `unsigned 8bit` respectively. np_bytes = (np.bytes_,) -np_char = (np.str_, np.bytes_) # Only numpy Unicode string and Byte string +np_char = ( + np.str_, + np.bytes_, + np.chararray, +) # Only numpy Unicode string and Byte string np_bool = (np.bool_,) np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle, np.complex_) NEXUS_TO_PYTHON_DATA_TYPES = { @@ -647,9 +651,11 @@ def check_all_children_for_callable( return False if isinstance(objects, list): # Handles list and list of list - return all([type(elem) in accepted_types for elem in objects]) - if isinstance(objects, np.ndarray): - return any([np.issubdtype(objects.dtype, type_) for type_ in accepted_types]) + tmp_arr = np.array(objects) + elif isinstance(objects, np.ndarray): + tmp_arr = objects + if tmp_arr is not None: + return any([np.issubdtype(tmp_arr.dtype, type_) for type_ in accepted_types]) return False diff --git a/src/pynxtools/dataconverter/readers/example/reader.py b/src/pynxtools/dataconverter/readers/example/reader.py index fefe37f5c..7e368a264 100644 --- a/src/pynxtools/dataconverter/readers/example/reader.py +++ b/src/pynxtools/dataconverter/readers/example/reader.py @@ -58,7 +58,11 @@ def read( # outputs with --generate-template for a provided NXDL file if ( k.startswith("/ENTRY[entry]/required_group") - or k == "/ENTRY[entry]/optional_parent/req_group_in_opt_group" + or k + in ( + "/ENTRY[entry]/optional_parent/req_group_in_opt_group", + "/ENTRY[entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatrenames]", + ) or k.startswith("/ENTRY[entry]/OPTIONAL_group") ): continue diff --git a/tests/data/dataconverter/readers/example/testdata.json b/tests/data/dataconverter/readers/example/testdata.json index 21deb40c3..e66af9962 100644 --- a/tests/data/dataconverter/readers/example/testdata.json +++ b/tests/data/dataconverter/readers/example/testdata.json @@ -7,6 +7,8 @@ "float_value_units": "nm", "int_value": -3, "int_value_units": "eV", + "number_value": 3, + "number_value_units": "eV", "posint_value": 7, "posint_value_units": "kg", "definition": "NXtest", @@ -17,5 +19,6 @@ "date_value_units": "", "required_child": 1, "optional_child": 1, - "@version": "1.0" + "@version": "1.0", + "@array": [0, 1, 2] } \ No newline at end of file diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index afa8ff71e..c61369489 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -25,7 +25,6 @@ import numpy as np import pytest - from pynxtools.dataconverter import helpers from pynxtools.dataconverter.template import Template from pynxtools.dataconverter.validation import validate_dict_against @@ -97,9 +96,7 @@ def listify_template(data_dict: Template): "type", "definition", "date_value", - ) or isinstance( - data_dict[optionality][path], np.ndarray - ): # avoid list numpy array + ) or isinstance(data_dict[optionality][path], list): listified_template[optionality][path] = data_dict[optionality][path] else: listified_template[optionality][path] = [data_dict[optionality][path]] @@ -158,6 +155,9 @@ def fixture_filled_test_data(template, tmp_path): ) template.clear() + template[ + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" + ] = 2 template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value"] = 2.0 template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = "nm" template["/ENTRY[my_entry]/optional_parent/required_child"] = 1 @@ -165,6 +165,8 @@ def fixture_filled_test_data(template, tmp_path): template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value"] = True template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value"] = 2 template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "eV" + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value"] = 2 + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units"] = "eV" template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value"] = np.array( [1, 2, 3], dtype=np.int8 ) @@ -187,6 +189,9 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE = Template() +TEMPLATE["optional"][ + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" +] = 2 TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value"] = 2.0 # pylint: disable=E1126 TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = ( "nm" # pylint: disable=E1126 @@ -197,6 +202,10 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value/@units"] = "" TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value"] = 2 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "eV" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value"] = 2 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units"] = ( + "eV" +) TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value"] = np.array( [1, 2, 3], # pylint: disable=E1126 dtype=np.int8, @@ -212,6 +221,9 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE["required"][ "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value/@units" ] = "" +TEMPLATE["required"][ + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/anamethatRENAMES[anamethatichangetothis]" +] = 2 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value"] = 2 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value/@units"] = ( "eV" # pylint: disable=E1126 @@ -232,6 +244,11 @@ def fixture_filled_test_data(template, tmp_path): "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value/@units" ] = "" TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type"] = "2nd type" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type/@array"] = [ + 0, + 1, + 2, +] TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value"] = ( "2022-01-22T12:14:12.05018+00:00" # pylint: disable=E1126 ) @@ -243,6 +260,7 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE["required"]["/ENTRY[my_entry]/definition/@version"] = "2.4.6" # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/program_name"] = "Testing program" # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type"] = "2nd type" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array"] = [0, 1, 2] TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value"] = ( "2022-01-22T12:14:12.05018+00:00" # pylint: disable=E1126 ) @@ -274,6 +292,19 @@ def fixture_filled_test_data(template, tmp_path): @pytest.mark.parametrize( "data_dict,error_message", [ + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]", + "not_a_num", + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" + " should be one of the following Python types: (, ), as defined in " + "the NXDL as NX_INT." + ), + id="variadic-field-str-instead-of-int", + ), pytest.param( alter_dict( TEMPLATE, @@ -281,12 +312,23 @@ def fixture_filled_test_data(template, tmp_path): "not_a_num", ), ( - " The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should" - " be one of: (, ), as defined in the" - " NXDL as NX_INT.\n" + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/in" + "t_value should be one of the following Python types: (, ), as defined in " + "the NXDL as NX_INT." ), id="string-instead-of-int", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", + "NOT_TRUE_OR_FALSE", + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value should be one of the following Python types: (, ), as defined in the NXDL as NX_BOOLEAN." + ), + id="string-instead-of-bool", + ), pytest.param( alter_dict( TEMPLATE, @@ -294,8 +336,8 @@ def fixture_filled_test_data(template, tmp_path): ["1", "2", "3"], ), ( - " The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should" - " be one of: (, )" + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should" + " be one of the following Python types: (, ), as defined in the NXDL as NX_INT." ), id="list-of-int-str-instead-of-int", ), @@ -307,7 +349,7 @@ def fixture_filled_test_data(template, tmp_path): ), ( "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should be" - " one of: (, )" + " one of the following Python types: (, ), as defined in the NXDL as NX_INT." ), id="array-of-float-instead-of-int", ), @@ -318,7 +360,7 @@ def fixture_filled_test_data(template, tmp_path): [2, 3, 4], ), (""), - id="List-of-int-instead-of-int", + id="list-of-int-instead-of-int", ), pytest.param( alter_dict( @@ -332,15 +374,58 @@ def fixture_filled_test_data(template, tmp_path): pytest.param( alter_dict( TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", - "NOT_TRUE_OR_FALSE", + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", + "2022-01-22T12:14:12.05018-00:00", + ), + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value" + " = 2022-01-22T12:14:12.05018-00:00 should be a timezone aware" + " ISO8601 formatted str. For example, 2022-01-22T12:14:12.05018Z or 2022-01-22" + "T12:14:12.05018+00:00.", + id="int-instead-of-date", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + 0, ), ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value should " - "be one of: (, ), as defined in the " - "NXDL as NX_BOOLEAN" + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be one of the following Python types: (, ), as defined in the NXDL as NX_FLOAT." ), - id="string-instead-of-bool", + id="int-instead-of-float", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value", + "0", + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value should be one of the following Python types: (, , , ), as defined in the NXDL as NX_NUMBER." + ), + id="str-instead-of-number", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + np.array([0.0, 2]), + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one" + " of the following Python types: (, , , ), as" + " defined in the NXDL as NX_CHAR." + ), + id="wrong-type-ndarray-instead-of-char", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + np.array(["x", "2"]), + ), + (""), + id="valid-ndarray-instead-of-char", ), pytest.param( alter_dict( @@ -408,9 +493,9 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", 3 ), ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should" - " be one of: (, , " - "), as defined in the NXDL as NX_CHAR." + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one of the following Python types:" + " (, , , )," + " as defined in the NXDL as NX_CHAR." ), id="int-instead-of-chars", ), @@ -475,8 +560,8 @@ def fixture_filled_test_data(template, tmp_path): np.array(["2.0", "3.0"], dtype=np.str_), ), "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " - "one of: (, ), as defined in the NXDL " - "as NX_FLOAT.\n", + "one of the following Python types: (, ), as defined in the NXDL " + "as NX_FLOAT.", id="array-of-str-instead-of-float", ), pytest.param( @@ -485,9 +570,9 @@ def fixture_filled_test_data(template, tmp_path): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", [2], # pylint: disable=E1126 ), - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be" - " one of: (, ), as defined in the NXDL" - " as NX_FLOAT.\n", + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " + "one of the following Python types: (, ), as defined in the NXDL " + "as NX_FLOAT.", id="list-of-int-instead-of-float", ), pytest.param( @@ -669,10 +754,34 @@ def fixture_filled_test_data(template, tmp_path): pytest.param( remove_optional_parent(TEMPLATE), (""), id="opt-group-completely-removed" ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", + ["0", 1, 2], + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]" + ), + id="wrong-type-array-in-attribute", + ), + pytest.param( + alter_dict( + TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", [1, 2] + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]" + ), + id="wrong-value-array-in-attribute", + ), ], ) def test_validate_data_dict(caplog, data_dict, error_message, request): """Unit test for the data validation routine.""" + + def format_error_message(msg: str) -> str: + return msg[msg.rfind("G: ") + 3 :].rstrip("\n") + if request.node.callspec.id in ( "valid-data-dict", "lists", @@ -683,6 +792,8 @@ def test_validate_data_dict(caplog, data_dict, error_message, request): "link-dict-instead-of-int", "opt-group-completely-removed", "required-field-provided-in-variadic-optional-group", + "valid-ndarray-instead-of-char", + "list-of-int-instead-of-int", "list-of-string-instead-of-chars", "array-of-int32-instead-of-int", "List-of-int-instead-of-int", @@ -706,12 +817,15 @@ def test_validate_data_dict(caplog, data_dict, error_message, request): assert "" == caplog.text captured_logs = caplog.records assert not validate_dict_against("NXtest", data_dict)[0] - assert any(error_message in rec.message for rec in captured_logs) + assert any( + error_message == format_error_message(rec.message) for rec in captured_logs + ) else: with caplog.at_level(logging.WARNING): assert not validate_dict_against("NXtest", data_dict)[0] - - assert error_message in caplog.text + assert any( + error_message == format_error_message(rec.message) for rec in caplog.records + ) @pytest.mark.parametrize( diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 2c946a3a1..8bb892998 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -28,6 +28,7 @@ def get_data_dict(): return { "/ENTRY[my_entry]/optional_parent/required_child": 1, "/ENTRY[my_entry]/optional_parent/optional_child": 1, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]": 2, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value_no_attr": 2.0, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value": 2.0, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units": "nm", @@ -42,8 +43,10 @@ def get_data_dict(): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value": "just chars", "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value/@units": "", "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type": "2nd type", + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array": [0, 1, 2], "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value": "2022-01-22T12:14:12.05018+00:00", "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value/@units": "", + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/anamethatRENAMES[anamethatichangetothis]": 2, "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value": True, "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value/@units": "", "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value": 2, @@ -55,6 +58,7 @@ def get_data_dict(): "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value": "just chars", "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value/@units": "", "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type": "2nd type", + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type/@array": [0, 1, 2], "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value": "2022-01-22T12:14:12.05018+00:00", "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value/@units": "", "/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field": 1, From 31cd1312a2a9177fd0b04477fe6cf86388233c8e Mon Sep 17 00:00:00 2001 From: rettigl Date: Wed, 5 Mar 2025 09:19:21 +0100 Subject: [PATCH 19/61] add review suggestion --- src/pynxtools/data/NXtest.nxdl.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/data/NXtest.nxdl.xml b/src/pynxtools/data/NXtest.nxdl.xml index 45f37896c..2d6547698 100644 --- a/src/pynxtools/data/NXtest.nxdl.xml +++ b/src/pynxtools/data/NXtest.nxdl.xml @@ -28,7 +28,7 @@ - + A dummy entry for a float value. From 3aabe0b4ca485570063ea28b61407ca500f5ae86 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Wed, 5 Mar 2025 11:34:34 +0100 Subject: [PATCH 20/61] Fixes the types and removes bytes from NX_char as that creates failures --- src/pynxtools/dataconverter/helpers.py | 71 +++++++------------------- tests/dataconverter/test_helpers.py | 22 +++----- 2 files changed, 24 insertions(+), 69 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 247ea4880..98cf6c4e6 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -575,60 +575,25 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: return True, [] -NUMPY_FLOAT_TYPES = (np.half, np.float16, np.single, np.double, np.longdouble) -NUMPY_INT_TYPES = (np.short, np.intc, np.int_) -NUMPY_UINT_TYPES = (np.ushort, np.uintc, np.uint) -# np int for np version 1.26.0 -np_int = (np.integer,) -np_float = (np.floating,) -# Not to be confused with `np.byte` and `np.ubyte`, these store -# an integer of `8bit` and `unsigned 8bit` respectively. -np_bytes = (np.bytes_,) -np_char = ( - np.str_, - np.bytes_, - np.chararray, -) # Only numpy Unicode string and Byte string -np_bool = (np.bool_,) -np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle, np.complex_) +nx_char = (str, np.chararray) +nx_int = (int, np.integer) +nx_float = (float, np.floating) +nx_number = nx_int + nx_float + NEXUS_TO_PYTHON_DATA_TYPES = { - "ISO8601": (str,), - "NX_BINARY": ( - bytes, - *np_bytes, - ), - "NX_BOOLEAN": (bool, *np_bool), - "NX_CHAR": (str, *np_char), - "NX_DATE_TIME": (str,), - "NX_FLOAT": (float, *np_float), - "NX_INT": (int, *np_int), - "NX_UINT": ( - np.unsignedinteger, - np.uint, - ), - "NX_NUMBER": ( - int, - float, - *np_int, - *np_float, - ), - "NX_POSINT": ( - int, - np.signedinteger, - ), # > 0 is checked in is_valid_data_field() - "NX_COMPLEX": (complex, *np_complex), - "NXDL_TYPE_UNAVAILABLE": ( - str, - *np_char, - ), # Defaults to a string if a type is not provided. - "NX_CHAR_OR_NUMBER": ( - str, - int, - float, - *np_char, - *np_int, - *np_float, - ), + "ISO8601": (str), + "NX_BINARY": (bytes, bytearray, np.byte, np.ubyte), + "NX_BOOLEAN": (bool, np.bool_), + "NX_CHAR": nx_char, + "NX_DATE_TIME": (str), + "NX_FLOAT": nx_float, + "NX_INT": nx_int, + "NX_UINT": (np.unsignedinteger), + "NX_NUMBER": nx_number, + "NX_POSINT": nx_int, # > 0 is checked in is_valid_data_field() + "NX_COMPLEX": (complex, np.complexfloating), + "NX_CHAR_OR_NUMBER": nx_char + nx_number, + "NXDL_TYPE_UNAVAILABLE": (str), # Defaults to a string if a type is not provided. } diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index c61369489..a14b57bb5 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -401,7 +401,7 @@ def fixture_filled_test_data(template, tmp_path): "0", ), ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value should be one of the following Python types: (, , , ), as defined in the NXDL as NX_NUMBER." + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value should be one of the following Python types: (, , , ), as defined in the NXDL as NX_NUMBER." ), id="str-instead-of-number", ), @@ -413,7 +413,7 @@ def fixture_filled_test_data(template, tmp_path): ), ( "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one" - " of the following Python types: (, , , ), as" + " of the following Python types: (, ), as" " defined in the NXDL as NX_CHAR." ), id="wrong-type-ndarray-instead-of-char", @@ -494,7 +494,7 @@ def fixture_filled_test_data(template, tmp_path): ), ( "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one of the following Python types:" - " (, , , )," + " (, )," " as defined in the NXDL as NX_CHAR." ), id="int-instead-of-chars", @@ -512,22 +512,11 @@ def fixture_filled_test_data(template, tmp_path): alter_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", - np.array(["1", "2", "3"], dtype=np.bytes_), + np.char.array(["1", "2", "3"]), ), (""), - id="array-of-bytes-chars", + id="numpy-chararray", ), - # pytest.param( - # alter_dict( - # TEMPLATE, - # "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", - # np.char.chararray(["1", "2", "3"]), - # ), - # (""), - # id="numpy-chararray", - # ), - # TODO add test array of char - # TODO add test for numpy array of char and chararray pytest.param( alter_dict( TEMPLATE, @@ -802,6 +791,7 @@ def format_error_message(msg: str) -> str: "array-of-chars", "array-of-bytes-chars", "array-of-float-instead-of-float", + "numpy-chararray", ): with caplog.at_level(logging.WARNING): assert validate_dict_against("NXtest", data_dict)[0] From bd478e89e41b2bf8e95df98a5e0eff5601f2fcd3 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Wed, 5 Mar 2025 12:52:08 +0100 Subject: [PATCH 21/61] Fixes for arrays in an array --- src/pynxtools/dataconverter/helpers.py | 39 ++++++-------------------- 1 file changed, 8 insertions(+), 31 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 98cf6c4e6..019bdc0b4 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -24,7 +24,7 @@ from datetime import datetime, timezone from enum import Enum from functools import lru_cache -from typing import Any, Callable, List, Optional, Tuple, Union +from typing import Any, Callable, List, Optional, Tuple, Union, Sequence import h5py import lxml.etree as ET @@ -598,52 +598,29 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: def check_all_children_for_callable( - objects: Union[list, np.ndarray], - checker: Optional[Callable] = None, - accepted_types: Optional[tuple] = None, + objects: Union[list, np.ndarray], checker: Optional[Callable] = None, *args ) -> bool: """Checks whether all objects in list or numpy array are validated by given callable and types. """ + if not isinstance(objects, np.ndarray): + objects = np.array(objects) - if checker is not None: - for obj in objects: - args = (obj, accepted_types) if accepted_types is not None else (obj,) - if not checker(*args): - return False - return True - if isinstance(objects, tuple): - return False - if isinstance(objects, list): - # Handles list and list of list - tmp_arr = np.array(objects) - elif isinstance(objects, np.ndarray): - tmp_arr = objects - if tmp_arr is not None: - return any([np.issubdtype(tmp_arr.dtype, type_) for type_ in accepted_types]) - - return False + return all([checker(o, *args) for o in objects.flat]) def is_valid_data_type(value, accepted_types): """Checks whether the given value or its children are of an accepted type.""" - - if not isinstance(value, (list, np.ndarray)): - return isinstance(value, accepted_types) - - return check_all_children_for_callable(objects=value, accepted_types=accepted_types) + return check_all_children_for_callable(value, isinstance, accepted_types) def is_positive_int(value): """Checks whether the given value or its children are positive.""" def is_greater_than(num): - return num.flat[0] > 0 if isinstance(num, np.ndarray) else num > 0 - - if isinstance(value, list): - return check_all_children_for_callable(objects=value, checker=is_greater_than) + return num > 0 - return value.flat[0] > 0 if isinstance(value, np.ndarray) else value > 0 + return check_all_children_for_callable(objects=value, checker=is_greater_than) def convert_str_to_bool_safe(value: str) -> Optional[bool]: From b336ca54e462446d486624273cef7eabcfba5f07 Mon Sep 17 00:00:00 2001 From: rettigl Date: Wed, 5 Mar 2025 14:00:29 +0100 Subject: [PATCH 22/61] fix mypy error --- src/pynxtools/dataconverter/helpers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 019bdc0b4..fbe7b741a 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -581,19 +581,19 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: nx_number = nx_int + nx_float NEXUS_TO_PYTHON_DATA_TYPES = { - "ISO8601": (str), + "ISO8601": (str,), "NX_BINARY": (bytes, bytearray, np.byte, np.ubyte), "NX_BOOLEAN": (bool, np.bool_), "NX_CHAR": nx_char, - "NX_DATE_TIME": (str), + "NX_DATE_TIME": (str,), "NX_FLOAT": nx_float, "NX_INT": nx_int, - "NX_UINT": (np.unsignedinteger), + "NX_UINT": (np.unsignedinteger,), "NX_NUMBER": nx_number, "NX_POSINT": nx_int, # > 0 is checked in is_valid_data_field() - "NX_COMPLEX": (complex, np.complexfloating), + "NX_COMPLEX": (complex, np.complexfloating,), "NX_CHAR_OR_NUMBER": nx_char + nx_number, - "NXDL_TYPE_UNAVAILABLE": (str), # Defaults to a string if a type is not provided. + "NXDL_TYPE_UNAVAILABLE": (str,), # Defaults to a string if a type is not provided. } From e9b025e8d1f01b403d33bd2310441054a6acea63 Mon Sep 17 00:00:00 2001 From: rettigl Date: Wed, 5 Mar 2025 14:32:28 +0100 Subject: [PATCH 23/61] ruff --- src/pynxtools/dataconverter/helpers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index fbe7b741a..aefe9a804 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -591,7 +591,10 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: "NX_UINT": (np.unsignedinteger,), "NX_NUMBER": nx_number, "NX_POSINT": nx_int, # > 0 is checked in is_valid_data_field() - "NX_COMPLEX": (complex, np.complexfloating,), + "NX_COMPLEX": ( + complex, + np.complexfloating, + ), "NX_CHAR_OR_NUMBER": nx_char + nx_number, "NXDL_TYPE_UNAVAILABLE": (str,), # Defaults to a string if a type is not provided. } From 8c89eef432ab2b75e3ee221ec25b4b6f5d270571 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Tue, 11 Mar 2025 09:16:05 +0100 Subject: [PATCH 24/61] Applies suggested fix Co-authored-by: Laurenz Rettig <53396064+rettigl@users.noreply.github.com> --- src/pynxtools/dataconverter/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index aefe9a804..9dc1770bc 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -575,7 +575,7 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: return True, [] -nx_char = (str, np.chararray) +nx_char = (str, np.character) nx_int = (int, np.integer) nx_float = (float, np.floating) nx_number = nx_int + nx_float From 1c5538da115cc998d04544ebcd323e4cf03d14d0 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Tue, 11 Mar 2025 09:16:27 +0100 Subject: [PATCH 25/61] Update src/pynxtools/dataconverter/helpers.py Co-authored-by: Laurenz Rettig <53396064+rettigl@users.noreply.github.com> --- src/pynxtools/dataconverter/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 9dc1770bc..0278faead 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -596,7 +596,7 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: np.complexfloating, ), "NX_CHAR_OR_NUMBER": nx_char + nx_number, - "NXDL_TYPE_UNAVAILABLE": (str,), # Defaults to a string if a type is not provided. + "NXDL_TYPE_UNAVAILABLE": (nx_char,), # Defaults to a string if a type is not provided. } From 365f061347f7501a264712b04420979abf7dc40a Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Tue, 11 Mar 2025 09:28:44 +0100 Subject: [PATCH 26/61] Applies fixes from suggestions --- src/pynxtools/dataconverter/helpers.py | 6 +++--- tests/dataconverter/test_helpers.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 0278faead..3fe1c58d7 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -582,7 +582,7 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: NEXUS_TO_PYTHON_DATA_TYPES = { "ISO8601": (str,), - "NX_BINARY": (bytes, bytearray, np.byte, np.ubyte), + "NX_BINARY": (bytes, bytearray, np.bytes_), "NX_BOOLEAN": (bool, np.bool_), "NX_CHAR": nx_char, "NX_DATE_TIME": (str,), @@ -601,7 +601,7 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: def check_all_children_for_callable( - objects: Union[list, np.ndarray], checker: Optional[Callable] = None, *args + objects: Union[list, np.ndarray], check_function: Optional[Callable] = None, *args ) -> bool: """Checks whether all objects in list or numpy array are validated by given callable and types. @@ -609,7 +609,7 @@ def check_all_children_for_callable( if not isinstance(objects, np.ndarray): objects = np.array(objects) - return all([checker(o, *args) for o in objects.flat]) + return all([check_function(o, *args) for o in objects.flat]) def is_valid_data_type(value, accepted_types): diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index a14b57bb5..310dfa6e2 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -512,7 +512,7 @@ def fixture_filled_test_data(template, tmp_path): alter_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", - np.char.array(["1", "2", "3"]), + np.array(["1", "2", "3"], dtype=np.bytes_), ), (""), id="numpy-chararray", From bd250cc2ef65bcbdbe9f71f78bc894a6b66f7c3c Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Tue, 11 Mar 2025 09:31:22 +0100 Subject: [PATCH 27/61] Updates --- tests/dataconverter/test_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 310dfa6e2..502bc114a 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -515,7 +515,7 @@ def fixture_filled_test_data(template, tmp_path): np.array(["1", "2", "3"], dtype=np.bytes_), ), (""), - id="numpy-chararray", + id="array-of-bytes-chars", ), pytest.param( alter_dict( From 84426b7dda729e0585df6d9e3ecc21ccab693b06 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Tue, 11 Mar 2025 09:34:17 +0100 Subject: [PATCH 28/61] Ruff --- src/pynxtools/dataconverter/helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 3fe1c58d7..ada958a24 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -596,7 +596,9 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: np.complexfloating, ), "NX_CHAR_OR_NUMBER": nx_char + nx_number, - "NXDL_TYPE_UNAVAILABLE": (nx_char,), # Defaults to a string if a type is not provided. + "NXDL_TYPE_UNAVAILABLE": ( + nx_char, + ), # Defaults to a string if a type is not provided. } From 5a63c3b5a53a76cb2e8998fa2cd0f3bdd27a5cc4 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Tue, 11 Mar 2025 09:56:02 +0100 Subject: [PATCH 29/61] Update src/pynxtools/dataconverter/helpers.py Co-authored-by: Laurenz Rettig <53396064+rettigl@users.noreply.github.com> --- src/pynxtools/dataconverter/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index ada958a24..8699e37c6 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -625,7 +625,7 @@ def is_positive_int(value): def is_greater_than(num): return num > 0 - return check_all_children_for_callable(objects=value, checker=is_greater_than) + return check_all_children_for_callable(objects=value, check_function=is_greater_than) def convert_str_to_bool_safe(value: str) -> Optional[bool]: From 0d056b7bebd46fa139bdf6f660faf3bb2b7ff797 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Tue, 11 Mar 2025 10:40:34 +0100 Subject: [PATCH 30/61] ruff --- src/pynxtools/dataconverter/helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 8699e37c6..030e5c92d 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -625,7 +625,9 @@ def is_positive_int(value): def is_greater_than(num): return num > 0 - return check_all_children_for_callable(objects=value, check_function=is_greater_than) + return check_all_children_for_callable( + objects=value, check_function=is_greater_than + ) def convert_str_to_bool_safe(value: str) -> Optional[bool]: From 70a457cf60e397b35a1f6979e8123a451e1132d4 Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 11 Mar 2025 11:17:37 +0100 Subject: [PATCH 31/61] remove empty string --- src/pynxtools/dataconverter/helpers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 030e5c92d..9813e8898 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -215,7 +215,6 @@ def get_nxdl_name_for(xml_elem: ET._Element) -> Optional[str]: The name of the element. None if the xml element has no name or type attribute. """ - """""" if "name" in xml_elem.attrib: return xml_elem.attrib["name"] if "type" in xml_elem.attrib: From b6b112b49823093edd5d01eddd8f303aeac345cd Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Tue, 11 Mar 2025 12:13:37 +0100 Subject: [PATCH 32/61] Fixes --- tests/dataconverter/test_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 502bc114a..c59467255 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -413,7 +413,7 @@ def fixture_filled_test_data(template, tmp_path): ), ( "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one" - " of the following Python types: (, ), as" + " of the following Python types: (, ), as" " defined in the NXDL as NX_CHAR." ), id="wrong-type-ndarray-instead-of-char", @@ -494,7 +494,7 @@ def fixture_filled_test_data(template, tmp_path): ), ( "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one of the following Python types:" - " (, )," + " (, )," " as defined in the NXDL as NX_CHAR." ), id="int-instead-of-chars", From 467194e93c705c6e2327eba8710b7c1f9b51794d Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 13 Mar 2025 16:00:27 +0100 Subject: [PATCH 33/61] rename original into one of the aux copies --- .../{test_helpers.py => source-aux} | 0 tests/dataconverter/target1-aux | 913 ++++++++++++++++++ 2 files changed, 913 insertions(+) rename tests/dataconverter/{test_helpers.py => source-aux} (100%) create mode 100644 tests/dataconverter/target1-aux diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/source-aux similarity index 100% rename from tests/dataconverter/test_helpers.py rename to tests/dataconverter/source-aux diff --git a/tests/dataconverter/target1-aux b/tests/dataconverter/target1-aux new file mode 100644 index 000000000..c59467255 --- /dev/null +++ b/tests/dataconverter/target1-aux @@ -0,0 +1,913 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Test cases for the helper functions used by the DataConverter.""" + +import logging +import os +import shutil +import xml.etree.ElementTree as ET +from typing import Optional + +import numpy as np +import pytest +from pynxtools.dataconverter import helpers +from pynxtools.dataconverter.template import Template +from pynxtools.dataconverter.validation import validate_dict_against + + +def remove_optional_parent(data_dict: Template): + """Completely removes the optional group from the test Template.""" + internal_dict = Template(data_dict) + del internal_dict["/ENTRY[my_entry]/optional_parent/required_child"] + del internal_dict["/ENTRY[my_entry]/optional_parent/optional_child"] + del internal_dict[ + "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]" + ] + + return internal_dict + + +def alter_dict(data_dict: Template, key: str, value: object): + """Helper function to alter a single entry in dict for parametrize.""" + if data_dict is not None: + internal_dict = Template(data_dict) + internal_dict[key] = value + return internal_dict + + return None + + +def set_to_none_in_dict(data_dict: Optional[Template], key: str, optionality: str): + """Helper function to forcefully set path to 'None'""" + if data_dict is None: + return None + + internal_dict = Template(data_dict) + internal_dict[optionality][key] = None + return internal_dict + + +def set_whole_group_to_none( + data_dict: Optional[Template], key: str, optionality: str +) -> Optional[Template]: + """Set a whole path to None in the dict""" + if data_dict is None: + return None + + internal_dict = Template(data_dict) + for path in data_dict[optionality]: + if path.startswith(key): + internal_dict[optionality][path] = None + return internal_dict + + +def remove_from_dict(data_dict: Template, key: str, optionality: str = "optional"): + """Helper function to remove a key from dict""" + if data_dict is not None and key in data_dict[optionality]: + internal_dict = Template(data_dict) + del internal_dict[optionality][key] + return internal_dict + + return None + + +def listify_template(data_dict: Template): + """Helper function to turn most values in the Template into lists""" + listified_template = Template() + for optionality in ("optional", "recommended", "required", "undocumented"): + for path in data_dict[optionality]: + if path[path.rindex("/") + 1 :] in ( + "@units", + "type", + "definition", + "date_value", + ) or isinstance(data_dict[optionality][path], list): + listified_template[optionality][path] = data_dict[optionality][path] + else: + listified_template[optionality][path] = [data_dict[optionality][path]] + return listified_template + + +@pytest.mark.parametrize( + "input_data, expected_output", + [ + ("2.4E-23", 2.4e-23), + ("28", 28), + ("45.98", 45.98), + ("test", "test"), + (["59", "3.00005", "498E-36"], np.array([59.0, 3.00005, 4.98e-34])), + ("23 34 444 5000", np.array([23.0, 34.0, 444.0, 5000.0])), + ("xrd experiment", "xrd experiment"), + (None, None), + ], +) +def test_transform_to_intended_dt(input_data, expected_output): + """Transform to possible numerical method.""" + result = helpers.transform_to_intended_dt(input_data) + + # Use pytest.approx for comparing floating-point numbers + if isinstance(expected_output, np.ndarray): + np.testing.assert_allclose(result, expected_output, rtol=1e-3) + elif isinstance(expected_output, float): + assert result == pytest.approx(expected_output, rel=1e-5) + else: + assert result == expected_output + + +@pytest.fixture(name="template") +def fixture_template(): + """pytest fixture to use the same template in all tests""" + nxdl_root = ET.parse("src/pynxtools/data/NXtest.nxdl.xml").getroot() + + template = Template() + helpers.generate_template_from_nxdl(nxdl_root, template) + return template + + +@pytest.mark.usefixtures("template") +@pytest.fixture(name="filled_test_data") +def fixture_filled_test_data(template, tmp_path): + """pytest fixture to setup a filled in template.""" + + # Copy original measurement file to tmp dir, + # because h5py.ExternalLink is modifying it while + # linking the nxs file. + shutil.copy( + os.path.join( + os.getcwd(), "src", "pynxtools", "data", "xarray_saved_small_calibration.h5" + ), + tmp_path, + ) + + template.clear() + template[ + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" + ] = 2 + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value"] = 2.0 + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = "nm" + template["/ENTRY[my_entry]/optional_parent/required_child"] = 1 + template["/ENTRY[my_entry]/optional_parent/optional_child"] = 1 + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value"] = True + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value"] = 2 + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "eV" + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value"] = 2 + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units"] = "eV" + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value"] = np.array( + [1, 2, 3], dtype=np.int8 + ) + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value/@units"] = "kg" + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value"] = "just chars" + template["/ENTRY[my_entry]/definition"] = "NXtest" + template["/ENTRY[my_entry]/definition/@version"] = "2.4.6" + template["/ENTRY[my_entry]/program_name"] = "Testing program" + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type"] = "2nd type" + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value"] = ( + "2022-01-22T12:14:12.05018+00:00" + ) + template["/ENTRY[my_entry]/required_group/description"] = "An example description" + template["/ENTRY[my_entry]/required_group2/description"] = "An example description" + template["/ENTRY[my_entry]/does/not/exist"] = "random" + template["/ENTRY[my_entry]/links/ext_link"] = { + "link": f"{tmp_path}/xarray_saved_small_calibration.h5:/axes/ax3" + } + return template + + +TEMPLATE = Template() +TEMPLATE["optional"][ + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" +] = 2 +TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value"] = 2.0 # pylint: disable=E1126 +TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = ( + "nm" # pylint: disable=E1126 +) +TEMPLATE["optional"]["/ENTRY[my_entry]/optional_parent/required_child"] = 1 # pylint: disable=E1126 +TEMPLATE["optional"]["/ENTRY[my_entry]/optional_parent/optional_child"] = 1 # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value"] = True # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value/@units"] = "" +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value"] = 2 # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "eV" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value"] = 2 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units"] = ( + "eV" +) +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value"] = np.array( + [1, 2, 3], # pylint: disable=E1126 + dtype=np.int8, +) # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value/@units"] = ( + "kg" # pylint: disable=E1126 +) +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value"] = ( + "just chars" # pylint: disable=E1126 +) +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value/@units"] = "" +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value"] = True # pylint: disable=E1126 +TEMPLATE["required"][ + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value/@units" +] = "" +TEMPLATE["required"][ + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/anamethatRENAMES[anamethatichangetothis]" +] = 2 # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value"] = 2 # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value/@units"] = ( + "eV" # pylint: disable=E1126 +) +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value"] = ( + np.array( + [1, 2, 3], # pylint: disable=E1126 + dtype=np.int8, + ) +) # pylint: disable=E1126 +TEMPLATE["required"][ + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value/@units" +] = "kg" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value"] = ( + "just chars" # pylint: disable=E1126 +) +TEMPLATE["required"][ + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value/@units" +] = "" +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type"] = "2nd type" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type/@array"] = [ + 0, + 1, + 2, +] +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value"] = ( + "2022-01-22T12:14:12.05018+00:00" # pylint: disable=E1126 +) +TEMPLATE["required"][ + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value/@units" +] = "" +TEMPLATE["required"]["/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field"] = 1 # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/definition"] = "NXtest" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/definition/@version"] = "2.4.6" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/program_name"] = "Testing program" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type"] = "2nd type" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array"] = [0, 1, 2] +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value"] = ( + "2022-01-22T12:14:12.05018+00:00" # pylint: disable=E1126 +) +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value/@units"] = "" +TEMPLATE["optional"]["/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field"] = 1 +TEMPLATE["optional"]["/ENTRY[my_entry]/required_group/description"] = ( + "An example description" +) +TEMPLATE["optional"]["/ENTRY[my_entry]/required_group2/description"] = ( + "An example description" +) +TEMPLATE["required"][ + "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]" +] = 1 +TEMPLATE["lone_groups"] = [ + "/ENTRY[entry]/required_group", + "/ENTRY[entry]/required_group2", + "/ENTRY[entry]/optional_parent/req_group_in_opt_group", +] +TEMPLATE["optional"]["/@default"] = "Some NXroot attribute" + +# "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/in" +# "t_value should be one of: (, , )," +# " as defined in the NXDL as NX_INT." + + +# pylint: disable=too-many-arguments +@pytest.mark.parametrize( + "data_dict,error_message", + [ + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]", + "not_a_num", + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" + " should be one of the following Python types: (, ), as defined in " + "the NXDL as NX_INT." + ), + id="variadic-field-str-instead-of-int", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + "not_a_num", + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/in" + "t_value should be one of the following Python types: (, ), as defined in " + "the NXDL as NX_INT." + ), + id="string-instead-of-int", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", + "NOT_TRUE_OR_FALSE", + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value should be one of the following Python types: (, ), as defined in the NXDL as NX_BOOLEAN." + ), + id="string-instead-of-bool", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + ["1", "2", "3"], + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should" + " be one of the following Python types: (, ), as defined in the NXDL as NX_INT." + ), + id="list-of-int-str-instead-of-int", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + np.array([2.0, 3.0, 4.0], dtype=np.float32), + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should be" + " one of the following Python types: (, ), as defined in the NXDL as NX_INT." + ), + id="array-of-float-instead-of-int", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + [2, 3, 4], + ), + (""), + id="list-of-int-instead-of-int", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + np.array([2, 3, 4], dtype=np.int32), + ), + (""), + id="array-of-int32-instead-of-int", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", + "2022-01-22T12:14:12.05018-00:00", + ), + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value" + " = 2022-01-22T12:14:12.05018-00:00 should be a timezone aware" + " ISO8601 formatted str. For example, 2022-01-22T12:14:12.05018Z or 2022-01-22" + "T12:14:12.05018+00:00.", + id="int-instead-of-date", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + 0, + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be one of the following Python types: (, ), as defined in the NXDL as NX_FLOAT." + ), + id="int-instead-of-float", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value", + "0", + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value should be one of the following Python types: (, , , ), as defined in the NXDL as NX_NUMBER." + ), + id="str-instead-of-number", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + np.array([0.0, 2]), + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one" + " of the following Python types: (, ), as" + " defined in the NXDL as NX_CHAR." + ), + id="wrong-type-ndarray-instead-of-char", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + np.array(["x", "2"]), + ), + (""), + id="valid-ndarray-instead-of-char", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + {"link": "/a-link"}, + ), + (""), + id="link-dict-instead-of-int", + ), + pytest.param( + alter_dict( + TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", -1 + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value " + "should be a positive int, but is -1." + ), + id="negative-posint", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", + [-1, 2], + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value " + "should be a positive int, but is [-1, 2]." + ), + id="negative-posint-list", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", + np.array([-1, 2], dtype=np.int8), + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value should" + " be a positive int, but is [-1 2]." + ), + id="negative-posint-array", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", + [1, 2], + ), + (""), + id="positive-posint-list", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", + np.array([1, 2], dtype=np.int8), + ), + (""), + id="positive-posint-array", + ), + pytest.param( + alter_dict( + TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", 3 + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one of the following Python types:" + " (, )," + " as defined in the NXDL as NX_CHAR." + ), + id="int-instead-of-chars", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + np.array(["1", "2", "3"], dtype=np.str_), + ), + (""), + id="array-of-chars", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + np.array(["1", "2", "3"], dtype=np.bytes_), + ), + (""), + id="array-of-bytes-chars", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + ["list", "of", "chars"], + ), + "", + id="list-of-string-instead-of-chars", + ), + pytest.param( + alter_dict( + TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", None + ), + "", + id="empty-optional-field", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + np.array([2.0, 3.0, 4.0], dtype=np.float32), + ), + "", + id="array-of-float-instead-of-float", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + np.array(["2.0", "3.0"], dtype=np.str_), + ), + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " + "one of the following Python types: (, ), as defined in the NXDL " + "as NX_FLOAT.", + id="array-of-str-instead-of-float", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + [2], # pylint: disable=E1126 + ), + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " + "one of the following Python types: (, ), as defined in the NXDL " + "as NX_FLOAT.", + id="list-of-int-instead-of-float", + ), + pytest.param( + set_to_none_in_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", + "required", + ), + ( + "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]" + "/bool_value is" + " required and hasn't been supplied by the reader." + ), + id="empty-required-field", + ), + pytest.param( + set_to_none_in_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value", + "required", + ), + ( + "The data entry corresponding to /ENTRY[my_entry]/" + "NXODD_name[nxodd_two_name]/bool_value is" + " required and hasn't been supplied by the reader." + ), + id="empty-required-field", + ), + pytest.param( + remove_from_dict( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value", + "required", + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", + "required", + ), + ( + "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]" + "/bool_value is" + " required and hasn't been supplied by the reader." + ), + id="empty-required-field", + ), + pytest.param( + set_whole_group_to_none( + set_whole_group_to_none( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name", + "required", + ), + "/ENTRY[my_entry]/NXODD_name", + "optional", + ), + ("The required group, /ENTRY[my_entry]/NXODD_name, hasn't been supplied."), + id="all-required-fields-set-to-none", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", + "2022-01-22T12:14:12.05018+00:00", + ), + "", + id="UTC-with-+00:00", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", + "2022-01-22T12:14:12.05018Z", + ), + "", + id="UTC-with-Z", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", + "2022-01-22T12:14:12.05018-00:00", + ), + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value" + " = 2022-01-22T12:14:12.05018-00:00 should be a timezone aware" + " ISO8601 formatted str. For example, 2022-01-22T12:14:12.05018Z or 2022-01-22" + "T12:14:12.05018+00:00.", + id="UTC-with--00:00", + ), + pytest.param(listify_template(TEMPLATE), "", id="lists"), + pytest.param( + alter_dict( + TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type", "Wrong option" + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type should " + "be one of the following" + ": ['1st type', '2nd type', '3rd type', '4th type']" + ), + id="wrong-enum-choice", + ), + pytest.param( + set_to_none_in_dict( + TEMPLATE, "/ENTRY[my_entry]/optional_parent/required_child", "optional" + ), + ( + "The data entry corresponding to /ENTRY[my_entry]/optional_parent/" + "required_child is required and hasn't been supplied by the reader." + ), + id="atleast-one-required-child-not-provided-optional-parent", + ), + pytest.param( + set_to_none_in_dict( + TEMPLATE, + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field", + "required", + ), + ( + "The data entry corresponding to /ENTRY[my_entry]/" + "OPTIONAL_group[my_group]/required_field " + "is required and hasn't been supplied by the reader." + ), + id="required-field-not-provided-in-variadic-optional-group", + ), + pytest.param( + set_to_none_in_dict( + TEMPLATE, + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field", + "required", + ), + (""), + id="required-field-provided-in-variadic-optional-group", + ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, "/ENTRY[my_entry]/optional_parent/required_child", None + ), + "/ENTRY[my_entry]/optional_parent/optional_child", + None, + ), + (""), + id="no-child-provided-optional-parent", + ), + pytest.param(TEMPLATE, "", id="valid-data-dict"), + pytest.param( + remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group/description"), + "The required group, /ENTRY[my_entry]/required_group, hasn't been supplied.", + id="missing-empty-yet-required-group", + ), + pytest.param( + remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group2/description"), + "The required group, /ENTRY[my_entry]/required_group2, hasn't been supplied.", + id="missing-empty-yet-required-group2", + ), + pytest.param( + alter_dict( + remove_from_dict( + TEMPLATE, "/ENTRY[my_entry]/required_group/description" + ), + "/ENTRY[entry]/required_group", + None, + ), + "The required group, /ENTRY[my_entry]/required_group, hasn't been supplied.", + id="allow-required-and-empty-group", + ), + pytest.param( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]", + "required", + ), + ( + "The required group, /ENTRY[my_entry]/" + "optional_parent/req_group_in_opt_group, " + "hasn't been supplied." + ), + id="req-group-in-opt-parent-removed", + ), + pytest.param( + remove_optional_parent(TEMPLATE), (""), id="opt-group-completely-removed" + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", + ["0", 1, 2], + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]" + ), + id="wrong-type-array-in-attribute", + ), + pytest.param( + alter_dict( + TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", [1, 2] + ), + ( + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]" + ), + id="wrong-value-array-in-attribute", + ), + ], +) +def test_validate_data_dict(caplog, data_dict, error_message, request): + """Unit test for the data validation routine.""" + + def format_error_message(msg: str) -> str: + return msg[msg.rfind("G: ") + 3 :].rstrip("\n") + + if request.node.callspec.id in ( + "valid-data-dict", + "lists", + "empty-optional-field", + "UTC-with-+00:00", + "UTC-with-Z", + "no-child-provided-optional-parent", + "link-dict-instead-of-int", + "opt-group-completely-removed", + "required-field-provided-in-variadic-optional-group", + "valid-ndarray-instead-of-char", + "list-of-int-instead-of-int", + "list-of-string-instead-of-chars", + "array-of-int32-instead-of-int", + "List-of-int-instead-of-int", + "positive-posint-list", + "positive-posint-array", + "array-of-chars", + "array-of-bytes-chars", + "array-of-float-instead-of-float", + "numpy-chararray", + ): + with caplog.at_level(logging.WARNING): + assert validate_dict_against("NXtest", data_dict)[0] + assert caplog.text == "" + # Missing required fields caught by logger with warning + elif request.node.callspec.id in ( + "empty-required-field", + "allow-required-and-empty-group", + "req-group-in-opt-parent-removed", + "missing-empty-yet-required-group", + "missing-empty-yet-required-group2", + ): + assert "" == caplog.text + captured_logs = caplog.records + assert not validate_dict_against("NXtest", data_dict)[0] + assert any( + error_message == format_error_message(rec.message) for rec in captured_logs + ) + else: + with caplog.at_level(logging.WARNING): + assert not validate_dict_against("NXtest", data_dict)[0] + assert any( + error_message == format_error_message(rec.message) for rec in caplog.records + ) + + +@pytest.mark.parametrize( + "nxdl_path,expected", + [ + pytest.param( + "/ENTRY/definition/@version", + ["/ENTRY[entry]/definition/@version"], + id="path-exists-in-dict", + ), + pytest.param("/RANDOM/does/not/@exist", [], id="path-does-not-exist-in-dict"), + ], +) +def test_path_in_data_dict(nxdl_path, expected, template): + """Unit test for helper function to check if an NXDL path exists in the reader dictionary.""" + assert helpers.path_in_data_dict(nxdl_path, tuple(template.keys())) == expected + + +def test_atom_type_extractor_and_hill_conversion(): + """ + Test atom type extractor and conversion to hill + """ + + test_chemical_formula = "(C38H54S4)n(NaO2)5(CH4)NH3B" + expected_atom_types = ["C", "H", "B", "N", "Na", "O", "S"] + + atom_list = helpers.extract_atom_types(test_chemical_formula) + + assert expected_atom_types == atom_list + + +def test_writing_of_root_attributes(caplog): + """ + Tests if all root attributes are populated + """ + template = Template() + filename = "my_nexus_file.nxs" + with caplog.at_level(logging.WARNING): + helpers.add_default_root_attributes(template, filename) + helpers.write_nexus_def_to_entry(template, "entry", "NXtest") + helpers.write_nexus_def_to_entry(template, "entry1", "NXtest") + + assert "" == caplog.text + + keys_added = template.keys() + assert "/@NX_class" in keys_added + assert template["/@NX_class"] == "NXroot" + assert "/@file_name" in keys_added + assert template["/@file_name"] == filename + assert "/@file_time" in keys_added + assert "/@file_update_time" in keys_added + assert "/@NeXus_repository" in keys_added + assert "/@NeXus_version" in keys_added + assert "/@HDF5_version" in keys_added + assert "/@h5py_version" in keys_added + assert "/ENTRY[entry]/definition" in keys_added + assert "/ENTRY[entry]/definition/@version" in keys_added + assert "/ENTRY[entry1]/definition" in keys_added + assert "/ENTRY[entry1]/definition/@version" in keys_added + + +def test_warning_on_root_attribute_overwrite(caplog): + """ + A warning is emitted when a root attribute is overwritten + by pynxtools. + """ + template = Template() + template["/@NX_class"] = "NXwrong" + filname = "my_nexus_file.nxs" + with caplog.at_level(logging.WARNING): + helpers.add_default_root_attributes(template, filname) + error_text = ( + "The NXroot entry '/@NX_class' (value: NXwrong) should not be changed by the reader. " + "This is overwritten by the actually used value 'NXroot'" + ) + assert error_text in caplog.text + + assert "/@NX_class" in template.keys() + assert template["/@NX_class"] == "NXroot" + + +def test_warning_on_definition_changed_by_reader(caplog): + template = Template() + template["/ENTRY[entry]/definition"] = "NXwrong" + with caplog.at_level(logging.WARNING): + helpers.write_nexus_def_to_entry(template, "entry", "NXtest") + + error_text = ( + "The entry '/ENTRY[entry]/definition' (value: NXtest) should not be changed by the reader. " + "This is overwritten by the actually used value 'NXwrong'" + ) + assert error_text in caplog.text + + assert "/ENTRY[entry]/definition" in template.keys() + assert template["/ENTRY[entry]/definition"] == "NXtest" From 65a177480838a3c0430c8cb2c8d310893a126bd2 Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 13 Mar 2025 16:07:17 +0100 Subject: [PATCH 34/61] clean copies --- tests/dataconverter/source-aux | 731 +------------------------------- tests/dataconverter/target1-aux | 219 ---------- 2 files changed, 19 insertions(+), 931 deletions(-) diff --git a/tests/dataconverter/source-aux b/tests/dataconverter/source-aux index c59467255..b01ba9e3c 100644 --- a/tests/dataconverter/source-aux +++ b/tests/dataconverter/source-aux @@ -30,18 +30,6 @@ from pynxtools.dataconverter.template import Template from pynxtools.dataconverter.validation import validate_dict_against -def remove_optional_parent(data_dict: Template): - """Completely removes the optional group from the test Template.""" - internal_dict = Template(data_dict) - del internal_dict["/ENTRY[my_entry]/optional_parent/required_child"] - del internal_dict["/ENTRY[my_entry]/optional_parent/optional_child"] - del internal_dict[ - "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]" - ] - - return internal_dict - - def alter_dict(data_dict: Template, key: str, value: object): """Helper function to alter a single entry in dict for parametrize.""" if data_dict is not None: @@ -52,83 +40,6 @@ def alter_dict(data_dict: Template, key: str, value: object): return None -def set_to_none_in_dict(data_dict: Optional[Template], key: str, optionality: str): - """Helper function to forcefully set path to 'None'""" - if data_dict is None: - return None - - internal_dict = Template(data_dict) - internal_dict[optionality][key] = None - return internal_dict - - -def set_whole_group_to_none( - data_dict: Optional[Template], key: str, optionality: str -) -> Optional[Template]: - """Set a whole path to None in the dict""" - if data_dict is None: - return None - - internal_dict = Template(data_dict) - for path in data_dict[optionality]: - if path.startswith(key): - internal_dict[optionality][path] = None - return internal_dict - - -def remove_from_dict(data_dict: Template, key: str, optionality: str = "optional"): - """Helper function to remove a key from dict""" - if data_dict is not None and key in data_dict[optionality]: - internal_dict = Template(data_dict) - del internal_dict[optionality][key] - return internal_dict - - return None - - -def listify_template(data_dict: Template): - """Helper function to turn most values in the Template into lists""" - listified_template = Template() - for optionality in ("optional", "recommended", "required", "undocumented"): - for path in data_dict[optionality]: - if path[path.rindex("/") + 1 :] in ( - "@units", - "type", - "definition", - "date_value", - ) or isinstance(data_dict[optionality][path], list): - listified_template[optionality][path] = data_dict[optionality][path] - else: - listified_template[optionality][path] = [data_dict[optionality][path]] - return listified_template - - -@pytest.mark.parametrize( - "input_data, expected_output", - [ - ("2.4E-23", 2.4e-23), - ("28", 28), - ("45.98", 45.98), - ("test", "test"), - (["59", "3.00005", "498E-36"], np.array([59.0, 3.00005, 4.98e-34])), - ("23 34 444 5000", np.array([23.0, 34.0, 444.0, 5000.0])), - ("xrd experiment", "xrd experiment"), - (None, None), - ], -) -def test_transform_to_intended_dt(input_data, expected_output): - """Transform to possible numerical method.""" - result = helpers.transform_to_intended_dt(input_data) - - # Use pytest.approx for comparing floating-point numbers - if isinstance(expected_output, np.ndarray): - np.testing.assert_allclose(result, expected_output, rtol=1e-3) - elif isinstance(expected_output, float): - assert result == pytest.approx(expected_output, rel=1e-5) - else: - assert result == expected_output - - @pytest.fixture(name="template") def fixture_template(): """pytest fixture to use the same template in all tests""" @@ -188,634 +99,30 @@ def fixture_filled_test_data(template, tmp_path): return template -TEMPLATE = Template() -TEMPLATE["optional"][ - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" -] = 2 -TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value"] = 2.0 # pylint: disable=E1126 -TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = ( - "nm" # pylint: disable=E1126 -) -TEMPLATE["optional"]["/ENTRY[my_entry]/optional_parent/required_child"] = 1 # pylint: disable=E1126 -TEMPLATE["optional"]["/ENTRY[my_entry]/optional_parent/optional_child"] = 1 # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value"] = True # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value/@units"] = "" -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value"] = 2 # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "eV" # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value"] = 2 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units"] = ( - "eV" -) -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value"] = np.array( - [1, 2, 3], # pylint: disable=E1126 - dtype=np.int8, -) # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value/@units"] = ( - "kg" # pylint: disable=E1126 -) -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value"] = ( - "just chars" # pylint: disable=E1126 -) -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value/@units"] = "" -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value"] = True # pylint: disable=E1126 -TEMPLATE["required"][ - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value/@units" -] = "" -TEMPLATE["required"][ - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/anamethatRENAMES[anamethatichangetothis]" -] = 2 # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value"] = 2 # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value/@units"] = ( - "eV" # pylint: disable=E1126 -) -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value"] = ( - np.array( - [1, 2, 3], # pylint: disable=E1126 - dtype=np.int8, - ) -) # pylint: disable=E1126 -TEMPLATE["required"][ - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value/@units" -] = "kg" # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value"] = ( - "just chars" # pylint: disable=E1126 -) -TEMPLATE["required"][ - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value/@units" -] = "" -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type"] = "2nd type" # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type/@array"] = [ - 0, - 1, - 2, -] -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value"] = ( - "2022-01-22T12:14:12.05018+00:00" # pylint: disable=E1126 -) -TEMPLATE["required"][ - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value/@units" -] = "" -TEMPLATE["required"]["/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field"] = 1 # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/definition"] = "NXtest" # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/definition/@version"] = "2.4.6" # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/program_name"] = "Testing program" # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type"] = "2nd type" # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array"] = [0, 1, 2] -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value"] = ( - "2022-01-22T12:14:12.05018+00:00" # pylint: disable=E1126 -) -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value/@units"] = "" -TEMPLATE["optional"]["/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field"] = 1 -TEMPLATE["optional"]["/ENTRY[my_entry]/required_group/description"] = ( - "An example description" -) -TEMPLATE["optional"]["/ENTRY[my_entry]/required_group2/description"] = ( - "An example description" -) -TEMPLATE["required"][ - "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]" -] = 1 -TEMPLATE["lone_groups"] = [ - "/ENTRY[entry]/required_group", - "/ENTRY[entry]/required_group2", - "/ENTRY[entry]/optional_parent/req_group_in_opt_group", -] -TEMPLATE["optional"]["/@default"] = "Some NXroot attribute" - -# "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/in" -# "t_value should be one of: (, , )," -# " as defined in the NXDL as NX_INT." - - -# pylint: disable=too-many-arguments @pytest.mark.parametrize( - "data_dict,error_message", + "input_data, expected_output", [ - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]", - "not_a_num", - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" - " should be one of the following Python types: (, ), as defined in " - "the NXDL as NX_INT." - ), - id="variadic-field-str-instead-of-int", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", - "not_a_num", - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/in" - "t_value should be one of the following Python types: (, ), as defined in " - "the NXDL as NX_INT." - ), - id="string-instead-of-int", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", - "NOT_TRUE_OR_FALSE", - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value should be one of the following Python types: (, ), as defined in the NXDL as NX_BOOLEAN." - ), - id="string-instead-of-bool", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", - ["1", "2", "3"], - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should" - " be one of the following Python types: (, ), as defined in the NXDL as NX_INT." - ), - id="list-of-int-str-instead-of-int", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", - np.array([2.0, 3.0, 4.0], dtype=np.float32), - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should be" - " one of the following Python types: (, ), as defined in the NXDL as NX_INT." - ), - id="array-of-float-instead-of-int", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", - [2, 3, 4], - ), - (""), - id="list-of-int-instead-of-int", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", - np.array([2, 3, 4], dtype=np.int32), - ), - (""), - id="array-of-int32-instead-of-int", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", - "2022-01-22T12:14:12.05018-00:00", - ), - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value" - " = 2022-01-22T12:14:12.05018-00:00 should be a timezone aware" - " ISO8601 formatted str. For example, 2022-01-22T12:14:12.05018Z or 2022-01-22" - "T12:14:12.05018+00:00.", - id="int-instead-of-date", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", - 0, - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be one of the following Python types: (, ), as defined in the NXDL as NX_FLOAT." - ), - id="int-instead-of-float", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value", - "0", - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value should be one of the following Python types: (, , , ), as defined in the NXDL as NX_NUMBER." - ), - id="str-instead-of-number", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", - np.array([0.0, 2]), - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one" - " of the following Python types: (, ), as" - " defined in the NXDL as NX_CHAR." - ), - id="wrong-type-ndarray-instead-of-char", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", - np.array(["x", "2"]), - ), - (""), - id="valid-ndarray-instead-of-char", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", - {"link": "/a-link"}, - ), - (""), - id="link-dict-instead-of-int", - ), - pytest.param( - alter_dict( - TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", -1 - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value " - "should be a positive int, but is -1." - ), - id="negative-posint", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", - [-1, 2], - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value " - "should be a positive int, but is [-1, 2]." - ), - id="negative-posint-list", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", - np.array([-1, 2], dtype=np.int8), - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value should" - " be a positive int, but is [-1 2]." - ), - id="negative-posint-array", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", - [1, 2], - ), - (""), - id="positive-posint-list", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", - np.array([1, 2], dtype=np.int8), - ), - (""), - id="positive-posint-array", - ), - pytest.param( - alter_dict( - TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", 3 - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one of the following Python types:" - " (, )," - " as defined in the NXDL as NX_CHAR." - ), - id="int-instead-of-chars", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", - np.array(["1", "2", "3"], dtype=np.str_), - ), - (""), - id="array-of-chars", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", - np.array(["1", "2", "3"], dtype=np.bytes_), - ), - (""), - id="array-of-bytes-chars", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", - ["list", "of", "chars"], - ), - "", - id="list-of-string-instead-of-chars", - ), - pytest.param( - alter_dict( - TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", None - ), - "", - id="empty-optional-field", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", - np.array([2.0, 3.0, 4.0], dtype=np.float32), - ), - "", - id="array-of-float-instead-of-float", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", - np.array(["2.0", "3.0"], dtype=np.str_), - ), - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " - "one of the following Python types: (, ), as defined in the NXDL " - "as NX_FLOAT.", - id="array-of-str-instead-of-float", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", - [2], # pylint: disable=E1126 - ), - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " - "one of the following Python types: (, ), as defined in the NXDL " - "as NX_FLOAT.", - id="list-of-int-instead-of-float", - ), - pytest.param( - set_to_none_in_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", - "required", - ), - ( - "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]" - "/bool_value is" - " required and hasn't been supplied by the reader." - ), - id="empty-required-field", - ), - pytest.param( - set_to_none_in_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value", - "required", - ), - ( - "The data entry corresponding to /ENTRY[my_entry]/" - "NXODD_name[nxodd_two_name]/bool_value is" - " required and hasn't been supplied by the reader." - ), - id="empty-required-field", - ), - pytest.param( - remove_from_dict( - remove_from_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value", - "required", - ), - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", - "required", - ), - ( - "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]" - "/bool_value is" - " required and hasn't been supplied by the reader." - ), - id="empty-required-field", - ), - pytest.param( - set_whole_group_to_none( - set_whole_group_to_none( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name", - "required", - ), - "/ENTRY[my_entry]/NXODD_name", - "optional", - ), - ("The required group, /ENTRY[my_entry]/NXODD_name, hasn't been supplied."), - id="all-required-fields-set-to-none", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", - "2022-01-22T12:14:12.05018+00:00", - ), - "", - id="UTC-with-+00:00", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", - "2022-01-22T12:14:12.05018Z", - ), - "", - id="UTC-with-Z", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", - "2022-01-22T12:14:12.05018-00:00", - ), - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value" - " = 2022-01-22T12:14:12.05018-00:00 should be a timezone aware" - " ISO8601 formatted str. For example, 2022-01-22T12:14:12.05018Z or 2022-01-22" - "T12:14:12.05018+00:00.", - id="UTC-with--00:00", - ), - pytest.param(listify_template(TEMPLATE), "", id="lists"), - pytest.param( - alter_dict( - TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type", "Wrong option" - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type should " - "be one of the following" - ": ['1st type', '2nd type', '3rd type', '4th type']" - ), - id="wrong-enum-choice", - ), - pytest.param( - set_to_none_in_dict( - TEMPLATE, "/ENTRY[my_entry]/optional_parent/required_child", "optional" - ), - ( - "The data entry corresponding to /ENTRY[my_entry]/optional_parent/" - "required_child is required and hasn't been supplied by the reader." - ), - id="atleast-one-required-child-not-provided-optional-parent", - ), - pytest.param( - set_to_none_in_dict( - TEMPLATE, - "/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field", - "required", - ), - ( - "The data entry corresponding to /ENTRY[my_entry]/" - "OPTIONAL_group[my_group]/required_field " - "is required and hasn't been supplied by the reader." - ), - id="required-field-not-provided-in-variadic-optional-group", - ), - pytest.param( - set_to_none_in_dict( - TEMPLATE, - "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field", - "required", - ), - (""), - id="required-field-provided-in-variadic-optional-group", - ), - pytest.param( - alter_dict( - alter_dict( - TEMPLATE, "/ENTRY[my_entry]/optional_parent/required_child", None - ), - "/ENTRY[my_entry]/optional_parent/optional_child", - None, - ), - (""), - id="no-child-provided-optional-parent", - ), - pytest.param(TEMPLATE, "", id="valid-data-dict"), - pytest.param( - remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group/description"), - "The required group, /ENTRY[my_entry]/required_group, hasn't been supplied.", - id="missing-empty-yet-required-group", - ), - pytest.param( - remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group2/description"), - "The required group, /ENTRY[my_entry]/required_group2, hasn't been supplied.", - id="missing-empty-yet-required-group2", - ), - pytest.param( - alter_dict( - remove_from_dict( - TEMPLATE, "/ENTRY[my_entry]/required_group/description" - ), - "/ENTRY[entry]/required_group", - None, - ), - "The required group, /ENTRY[my_entry]/required_group, hasn't been supplied.", - id="allow-required-and-empty-group", - ), - pytest.param( - remove_from_dict( - TEMPLATE, - "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]", - "required", - ), - ( - "The required group, /ENTRY[my_entry]/" - "optional_parent/req_group_in_opt_group, " - "hasn't been supplied." - ), - id="req-group-in-opt-parent-removed", - ), - pytest.param( - remove_optional_parent(TEMPLATE), (""), id="opt-group-completely-removed" - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", - ["0", 1, 2], - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]" - ), - id="wrong-type-array-in-attribute", - ), - pytest.param( - alter_dict( - TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", [1, 2] - ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]" - ), - id="wrong-value-array-in-attribute", - ), + ("2.4E-23", 2.4e-23), + ("28", 28), + ("45.98", 45.98), + ("test", "test"), + (["59", "3.00005", "498E-36"], np.array([59.0, 3.00005, 4.98e-34])), + ("23 34 444 5000", np.array([23.0, 34.0, 444.0, 5000.0])), + ("xrd experiment", "xrd experiment"), + (None, None), ], ) -def test_validate_data_dict(caplog, data_dict, error_message, request): - """Unit test for the data validation routine.""" - - def format_error_message(msg: str) -> str: - return msg[msg.rfind("G: ") + 3 :].rstrip("\n") - - if request.node.callspec.id in ( - "valid-data-dict", - "lists", - "empty-optional-field", - "UTC-with-+00:00", - "UTC-with-Z", - "no-child-provided-optional-parent", - "link-dict-instead-of-int", - "opt-group-completely-removed", - "required-field-provided-in-variadic-optional-group", - "valid-ndarray-instead-of-char", - "list-of-int-instead-of-int", - "list-of-string-instead-of-chars", - "array-of-int32-instead-of-int", - "List-of-int-instead-of-int", - "positive-posint-list", - "positive-posint-array", - "array-of-chars", - "array-of-bytes-chars", - "array-of-float-instead-of-float", - "numpy-chararray", - ): - with caplog.at_level(logging.WARNING): - assert validate_dict_against("NXtest", data_dict)[0] - assert caplog.text == "" - # Missing required fields caught by logger with warning - elif request.node.callspec.id in ( - "empty-required-field", - "allow-required-and-empty-group", - "req-group-in-opt-parent-removed", - "missing-empty-yet-required-group", - "missing-empty-yet-required-group2", - ): - assert "" == caplog.text - captured_logs = caplog.records - assert not validate_dict_against("NXtest", data_dict)[0] - assert any( - error_message == format_error_message(rec.message) for rec in captured_logs - ) +def test_transform_to_intended_dt(input_data, expected_output): + """Transform to possible numerical method.""" + result = helpers.transform_to_intended_dt(input_data) + + # Use pytest.approx for comparing floating-point numbers + if isinstance(expected_output, np.ndarray): + np.testing.assert_allclose(result, expected_output, rtol=1e-3) + elif isinstance(expected_output, float): + assert result == pytest.approx(expected_output, rel=1e-5) else: - with caplog.at_level(logging.WARNING): - assert not validate_dict_against("NXtest", data_dict)[0] - assert any( - error_message == format_error_message(rec.message) for rec in caplog.records - ) + assert result == expected_output @pytest.mark.parametrize( diff --git a/tests/dataconverter/target1-aux b/tests/dataconverter/target1-aux index c59467255..601c3a4d8 100644 --- a/tests/dataconverter/target1-aux +++ b/tests/dataconverter/target1-aux @@ -1,33 +1,4 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Test cases for the helper functions used by the DataConverter.""" - -import logging -import os -import shutil -import xml.etree.ElementTree as ET -from typing import Optional - -import numpy as np -import pytest -from pynxtools.dataconverter import helpers from pynxtools.dataconverter.template import Template -from pynxtools.dataconverter.validation import validate_dict_against def remove_optional_parent(data_dict: Template): @@ -42,16 +13,6 @@ def remove_optional_parent(data_dict: Template): return internal_dict -def alter_dict(data_dict: Template, key: str, value: object): - """Helper function to alter a single entry in dict for parametrize.""" - if data_dict is not None: - internal_dict = Template(data_dict) - internal_dict[key] = value - return internal_dict - - return None - - def set_to_none_in_dict(data_dict: Optional[Template], key: str, optionality: str): """Helper function to forcefully set path to 'None'""" if data_dict is None: @@ -103,91 +64,6 @@ def listify_template(data_dict: Template): return listified_template -@pytest.mark.parametrize( - "input_data, expected_output", - [ - ("2.4E-23", 2.4e-23), - ("28", 28), - ("45.98", 45.98), - ("test", "test"), - (["59", "3.00005", "498E-36"], np.array([59.0, 3.00005, 4.98e-34])), - ("23 34 444 5000", np.array([23.0, 34.0, 444.0, 5000.0])), - ("xrd experiment", "xrd experiment"), - (None, None), - ], -) -def test_transform_to_intended_dt(input_data, expected_output): - """Transform to possible numerical method.""" - result = helpers.transform_to_intended_dt(input_data) - - # Use pytest.approx for comparing floating-point numbers - if isinstance(expected_output, np.ndarray): - np.testing.assert_allclose(result, expected_output, rtol=1e-3) - elif isinstance(expected_output, float): - assert result == pytest.approx(expected_output, rel=1e-5) - else: - assert result == expected_output - - -@pytest.fixture(name="template") -def fixture_template(): - """pytest fixture to use the same template in all tests""" - nxdl_root = ET.parse("src/pynxtools/data/NXtest.nxdl.xml").getroot() - - template = Template() - helpers.generate_template_from_nxdl(nxdl_root, template) - return template - - -@pytest.mark.usefixtures("template") -@pytest.fixture(name="filled_test_data") -def fixture_filled_test_data(template, tmp_path): - """pytest fixture to setup a filled in template.""" - - # Copy original measurement file to tmp dir, - # because h5py.ExternalLink is modifying it while - # linking the nxs file. - shutil.copy( - os.path.join( - os.getcwd(), "src", "pynxtools", "data", "xarray_saved_small_calibration.h5" - ), - tmp_path, - ) - - template.clear() - template[ - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" - ] = 2 - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value"] = 2.0 - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = "nm" - template["/ENTRY[my_entry]/optional_parent/required_child"] = 1 - template["/ENTRY[my_entry]/optional_parent/optional_child"] = 1 - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value"] = True - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value"] = 2 - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "eV" - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value"] = 2 - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units"] = "eV" - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value"] = np.array( - [1, 2, 3], dtype=np.int8 - ) - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value/@units"] = "kg" - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value"] = "just chars" - template["/ENTRY[my_entry]/definition"] = "NXtest" - template["/ENTRY[my_entry]/definition/@version"] = "2.4.6" - template["/ENTRY[my_entry]/program_name"] = "Testing program" - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type"] = "2nd type" - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value"] = ( - "2022-01-22T12:14:12.05018+00:00" - ) - template["/ENTRY[my_entry]/required_group/description"] = "An example description" - template["/ENTRY[my_entry]/required_group2/description"] = "An example description" - template["/ENTRY[my_entry]/does/not/exist"] = "random" - template["/ENTRY[my_entry]/links/ext_link"] = { - "link": f"{tmp_path}/xarray_saved_small_calibration.h5:/axes/ax3" - } - return template - - TEMPLATE = Template() TEMPLATE["optional"][ "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" @@ -816,98 +692,3 @@ def test_validate_data_dict(caplog, data_dict, error_message, request): assert any( error_message == format_error_message(rec.message) for rec in caplog.records ) - - -@pytest.mark.parametrize( - "nxdl_path,expected", - [ - pytest.param( - "/ENTRY/definition/@version", - ["/ENTRY[entry]/definition/@version"], - id="path-exists-in-dict", - ), - pytest.param("/RANDOM/does/not/@exist", [], id="path-does-not-exist-in-dict"), - ], -) -def test_path_in_data_dict(nxdl_path, expected, template): - """Unit test for helper function to check if an NXDL path exists in the reader dictionary.""" - assert helpers.path_in_data_dict(nxdl_path, tuple(template.keys())) == expected - - -def test_atom_type_extractor_and_hill_conversion(): - """ - Test atom type extractor and conversion to hill - """ - - test_chemical_formula = "(C38H54S4)n(NaO2)5(CH4)NH3B" - expected_atom_types = ["C", "H", "B", "N", "Na", "O", "S"] - - atom_list = helpers.extract_atom_types(test_chemical_formula) - - assert expected_atom_types == atom_list - - -def test_writing_of_root_attributes(caplog): - """ - Tests if all root attributes are populated - """ - template = Template() - filename = "my_nexus_file.nxs" - with caplog.at_level(logging.WARNING): - helpers.add_default_root_attributes(template, filename) - helpers.write_nexus_def_to_entry(template, "entry", "NXtest") - helpers.write_nexus_def_to_entry(template, "entry1", "NXtest") - - assert "" == caplog.text - - keys_added = template.keys() - assert "/@NX_class" in keys_added - assert template["/@NX_class"] == "NXroot" - assert "/@file_name" in keys_added - assert template["/@file_name"] == filename - assert "/@file_time" in keys_added - assert "/@file_update_time" in keys_added - assert "/@NeXus_repository" in keys_added - assert "/@NeXus_version" in keys_added - assert "/@HDF5_version" in keys_added - assert "/@h5py_version" in keys_added - assert "/ENTRY[entry]/definition" in keys_added - assert "/ENTRY[entry]/definition/@version" in keys_added - assert "/ENTRY[entry1]/definition" in keys_added - assert "/ENTRY[entry1]/definition/@version" in keys_added - - -def test_warning_on_root_attribute_overwrite(caplog): - """ - A warning is emitted when a root attribute is overwritten - by pynxtools. - """ - template = Template() - template["/@NX_class"] = "NXwrong" - filname = "my_nexus_file.nxs" - with caplog.at_level(logging.WARNING): - helpers.add_default_root_attributes(template, filname) - error_text = ( - "The NXroot entry '/@NX_class' (value: NXwrong) should not be changed by the reader. " - "This is overwritten by the actually used value 'NXroot'" - ) - assert error_text in caplog.text - - assert "/@NX_class" in template.keys() - assert template["/@NX_class"] == "NXroot" - - -def test_warning_on_definition_changed_by_reader(caplog): - template = Template() - template["/ENTRY[entry]/definition"] = "NXwrong" - with caplog.at_level(logging.WARNING): - helpers.write_nexus_def_to_entry(template, "entry", "NXtest") - - error_text = ( - "The entry '/ENTRY[entry]/definition' (value: NXtest) should not be changed by the reader. " - "This is overwritten by the actually used value 'NXwrong'" - ) - assert error_text in caplog.text - - assert "/ENTRY[entry]/definition" in template.keys() - assert template["/ENTRY[entry]/definition"] == "NXtest" From a93300feef807ac3574aed3f8303ac2304c25f1e Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 13 Mar 2025 16:08:08 +0100 Subject: [PATCH 35/61] revert name of original file --- tests/dataconverter/{source-aux => test_helpers.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/dataconverter/{source-aux => test_helpers.py} (100%) diff --git a/tests/dataconverter/source-aux b/tests/dataconverter/test_helpers.py similarity index 100% rename from tests/dataconverter/source-aux rename to tests/dataconverter/test_helpers.py From 9023d8d92ae272e73f484b8678f3272a28e21497 Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 13 Mar 2025 16:11:06 +0100 Subject: [PATCH 36/61] rename original targets --- tests/dataconverter/{test_validation.py => target1-ren} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/dataconverter/{test_validation.py => target1-ren} (100%) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/target1-ren similarity index 100% rename from tests/dataconverter/test_validation.py rename to tests/dataconverter/target1-ren From 36b6517b3991d9c97936c75d6922b5139881d3ae Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 13 Mar 2025 16:11:40 +0100 Subject: [PATCH 37/61] rename aux targets --- tests/dataconverter/{target1-aux => target1-ren} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/dataconverter/{target1-aux => target1-ren} (100%) diff --git a/tests/dataconverter/target1-aux b/tests/dataconverter/target1-ren similarity index 100% rename from tests/dataconverter/target1-aux rename to tests/dataconverter/target1-ren From d878bf04cca04f35193781637d428b138af55215 Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 13 Mar 2025 16:20:06 +0100 Subject: [PATCH 38/61] restore original target filenames --- tests/dataconverter/{target1-ren => test_validation.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/dataconverter/{target1-ren => test_validation.py} (100%) diff --git a/tests/dataconverter/target1-ren b/tests/dataconverter/test_validation.py similarity index 100% rename from tests/dataconverter/target1-ren rename to tests/dataconverter/test_validation.py From 5bba3d30ec03bb0e966ae6ec7a50abe3dfcbbb6e Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 11 Mar 2025 20:44:56 +0100 Subject: [PATCH 39/61] fix converted tests --- tests/dataconverter/test_validation.py | 117 +++++++++---------------- 1 file changed, 40 insertions(+), 77 deletions(-) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 3d159d821..47e1956aa 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -31,18 +31,6 @@ ) -def remove_optional_parent(data_dict: Template): - """Completely removes the optional group from the test Template.""" - internal_dict = Template(data_dict) - del internal_dict["/ENTRY[my_entry]/optional_parent/required_child"] - del internal_dict["/ENTRY[my_entry]/optional_parent/optional_child"] - del internal_dict[ - "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]" - ] - - return internal_dict - - def set_to_none_in_dict(data_dict: Optional[Template], key: str, optionality: str): """Helper function to forcefully set path to 'None'""" if data_dict is None: @@ -151,6 +139,9 @@ def get_data_dict(): TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = ( "nm" # pylint: disable=E1126 ) +TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value_no_attr"] = ( + 2.0, +) TEMPLATE["optional"]["/ENTRY[my_entry]/optional_parent/required_child"] = 1 # pylint: disable=E1126 TEMPLATE["optional"]["/ENTRY[my_entry]/optional_parent/optional_child"] = 1 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value"] = True # pylint: disable=E1126 @@ -557,6 +548,42 @@ def get_data_dict(): ), id="empty-required-field", ), + pytest.param( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value_no_attr", + "optional", + ), + "", + id="removed-optional-value", + ), + pytest.param( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + "optional", + ), + "There were attributes set for the field /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value, but the field does not exist.", + id="removed-optional-value-with-attribute-remaining", + ), + pytest.param( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + "optional", + ), + "The attribute /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units will not be written.", + id="removed-optional-value-with-attribute-remaining", + ), + pytest.param( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", + "required", + ), + "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value is required and hasn't been supplied by the reader.", + id="missing-required-value", + ), pytest.param( set_whole_group_to_none( set_whole_group_to_none( @@ -740,6 +767,7 @@ def format_error_message(msg: str) -> str: "array-of-bytes-chars", "array-of-float-instead-of-float", "numpy-chararray", + "removed-optional-value", ): with caplog.at_level(logging.WARNING): assert validate_dict_against("NXtest", data_dict)[0] @@ -764,68 +792,3 @@ def format_error_message(msg: str) -> str: assert any( error_message == format_error_message(rec.message) for rec in caplog.records ) - - -@pytest.mark.parametrize( - "data_dict", - [ - pytest.param(get_data_dict(), id="valid-unaltered-data-dict"), - pytest.param( - remove_from_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value_no_attr", - "optional", - ), - id="removed-optional-value", - ), - ], -) -def test_valid_data_dict(caplog, data_dict): - with caplog.at_level(logging.WARNING): - assert validate_dict_against("NXtest", data_dict)[0] - assert caplog.text == "" - - -@pytest.mark.parametrize( - "data_dict, error_message_1, error_message_2", - [ - pytest.param( - remove_from_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", - "optional", - ), - "The attribute /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units will not be written.", - "There were attributes set for the field /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value, but the field does not exist.", - id="removed-optional-value-with-attribute-remaining", - ), - ], -) -def test_data_dict_attr_with_no_field( - caplog, data_dict, error_message_1, error_message_2 -): - with caplog.at_level(logging.WARNING): - assert not validate_dict_against("NXtest", data_dict)[0] - assert error_message_1 in caplog.text - assert error_message_2 in caplog.text - - -@pytest.mark.parametrize( - "data_dict, error_message", - [ - pytest.param( - remove_from_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", - "required", - ), - "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value is required and hasn't been supplied by the reader.", - id="missing-required-value", - ) - ], -) -def test_validation_shows_warning(caplog, data_dict, error_message): - with caplog.at_level(logging.WARNING): - assert not validate_dict_against("NXtest", data_dict)[0] - - assert error_message in caplog.text From 6520ffa08460bc1fdc47a52d87423ff1ade82367 Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 11 Mar 2025 21:39:09 +0100 Subject: [PATCH 40/61] add additional tests for base class elements --- tests/dataconverter/test_validation.py | 134 ++++++++++++++++--------- 1 file changed, 85 insertions(+), 49 deletions(-) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 47e1956aa..052771503 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -82,55 +82,6 @@ def listify_template(data_dict: Template): return listified_template -def get_data_dict(): - return { - "/ENTRY[my_entry]/optional_parent/required_child": 1, - "/ENTRY[my_entry]/optional_parent/optional_child": 1, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]": 2, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value_no_attr": 2.0, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value": 2.0, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units": "nm", - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value": True, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value/@units": "", - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value": 2, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units": "eV", - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value": np.array( - [1, 2, 3], dtype=np.int8 - ), - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value/@units": "kg", - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value": "just chars", - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value/@units": "", - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type": "2nd type", - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array": [0, 1, 2], - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value": "2022-01-22T12:14:12.05018+00:00", - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value/@units": "", - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/anamethatRENAMES[anamethatichangetothis]": 2, - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value": True, - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value/@units": "", - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value": 2, - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value/@units": "eV", - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value": np.array( - [1, 2, 3], dtype=np.int8 - ), - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value/@units": "kg", - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value": "just chars", - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value/@units": "", - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type": "2nd type", - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/type/@array": [0, 1, 2], - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value": "2022-01-22T12:14:12.05018+00:00", - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value/@units": "", - "/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field": 1, - "/ENTRY[my_entry]/definition": "NXtest", - "/ENTRY[my_entry]/definition/@version": "2.4.6", - "/ENTRY[my_entry]/program_name": "Testing program", - "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field": 1, - "/ENTRY[my_entry]/required_group/description": "An example description", - "/ENTRY[my_entry]/required_group2/description": "An example description", - "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/data": 1, - "/@default": "Some NXroot attribute", - } - - TEMPLATE = Template() TEMPLATE["optional"][ "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" @@ -227,6 +178,12 @@ def get_data_dict(): "/ENTRY[entry]/optional_parent/req_group_in_opt_group", ] TEMPLATE["optional"]["/@default"] = "Some NXroot attribute" +# keys not registered in appdef +TEMPLATE["required"]["/ENTRY[my_entry]/duration"] = 1 # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/duration/@units"] = "s" # pylint: disable=E1126 +TEMPLATE["required"][ + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type" +] = "Ion Source" # pylint: disable=E1126 # pylint: disable=too-many-arguments @@ -738,6 +695,85 @@ def get_data_dict(): ), id="wrong-value-array-in-attribute", ), + pytest.param( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units", + "required", + ), + "Field /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value requires a unit in the unit category NX_ENERGY.", + id="missing-unit", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/duration", + np.array([2.0, 3.0, 4.0], dtype=np.float32), + ), + ( + "The value at /ENTRY[my_entry]/duration should be" + " one of the following Python types: (, ), as defined in the NXDL as NX_INT." + ), + id="baseclass-wrong-dtype", + ), + pytest.param( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/duration/@units", + "required", + ), + "Field /ENTRY[my_entry]/duration requires a unit in the unit category NX_TIME.", + id="baseclass-missing-unit", + ), + pytest.param( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/duration", + "required", + ), + ( + "There were attributes set for the field /ENTRY[my_entry]/duration, but the field does not exist." + ), + id="baseclass-attribute-missing-field", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type", + "Wrong source type", + ), + ( + "The value at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type " + "should be one of the following: ['Spallation Neutron Source', 'Pulsed Reactor Neutron Source', " + "'Reactor Neutron Source', 'Synchrotron X-ray Source', 'Pulsed Muon Source', 'Rotating Anode X-ray', " + "'Fixed Tube X-ray', 'UV Laser', 'Free-Electron Laser', 'Optical Laser', 'Ion Source', 'UV Plasma Source', " + "'Metal Jet X-ray', 'Laser', 'Dye-Laser', 'Broadband Tunable Light Source', 'Halogen lamp', 'LED', " + "'Mercury Cadmium Telluride', 'Deuterium Lamp', 'Xenon Lamp', 'Globar', 'other']" + ), + id="baseclass-wrong-enum", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal_name", + 1, + ), + ( + "Field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal_name written without documentation." + ), + id="add-undocumented-field", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type/@illegal", + "illegal_attribute", + ), + ( + "Attribute /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type/illegal written without documentation." + ), + id="add-undocumented-attribute", + ), ], ) def test_validate_data_dict(caplog, data_dict, error_message, request): From 67cb8c143c423154e22ff861fb6bb401311b8f77 Mon Sep 17 00:00:00 2001 From: rettigl Date: Wed, 12 Mar 2025 22:27:50 +0100 Subject: [PATCH 41/61] fix validation issues and add further tests --- src/pynxtools/dataconverter/helpers.py | 7 +- src/pynxtools/dataconverter/validation.py | 59 ++++++++--- tests/dataconverter/test_validation.py | 115 ++++++++++++++++++++-- 3 files changed, 157 insertions(+), 24 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 9813e8898..e6851033d 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -114,7 +114,10 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar f"Expected a group at {path} but found a field or attribute." ) elif log_type == ValidationProblem.MissingDocumentation: - logger.warning(f"Field {path} written without documentation.") + if "@" in path.rsplit("/")[-1]: + logger.warning(f"Attribute {path} written without documentation.") + else: + logger.warning(f"Field {path} written without documentation.") elif log_type == ValidationProblem.MissingUnit: logger.warning( f"Field {path} requires a unit in the unit category {value}." @@ -122,7 +125,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar elif log_type == ValidationProblem.MissingRequiredAttribute: logger.warning(f'Missing attribute: "{path}"') elif log_type == ValidationProblem.UnitWithoutField: - logger.warning(f"Unit {path} in dataset without its field {value}") + logger.warning(f"Unit {path} in dataset without its field {value}.") elif log_type == ValidationProblem.AttributeForNonExistingField: logger.warning( f"There were attributes set for the field {path}, " diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index ae244e701..543eb3ce6 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -508,19 +508,26 @@ def handle_unknown_type(node: NexusNode, keys: Mapping[str, Any], prev_path: str # TODO: Raise error or log the issue? pass - def is_documented(key: str, node: NexusNode) -> bool: - if mapping.get(key) is None: - # This value is not really set. Skip checking it's documentation. - return True - + def add_best_matches_for(key: str, node: NexusNode) -> Optional[NexusNode]: for name in key[1:].replace("@", "").split("/"): children = node.get_all_direct_children_names() best_name = best_namefit_of(name, children) if best_name is None: - return False + return None node = node.search_add_child_for(best_name) + return node + + def is_documented(key: str, node: NexusNode) -> bool: + if mapping.get(key) is None: + # This value is not really set. Skip checking it's documentation. + return True + + node = add_best_matches_for(key, node) + if node is None: + return False + if isinstance(mapping[key], dict) and "link" in mapping[key]: # TODO: Follow link and check consistency with current field return True @@ -584,9 +591,10 @@ def check_attributes_of_nonexisting_field( for key in mapping: last_index = key.rfind("/") - if key[last_index + 1] == "@": + if key[last_index + 1] == "@" and key[last_index + 1 :] != "@units": # key is an attribute. Find a corresponding parent, check all the other # children of this parent + # ignore units here, they are checked separately attribute_parent_checked = False for key_iterating in mapping: # check if key_iterating starts with parent of the key OR any @@ -756,22 +764,46 @@ def startswith_with_variations( not_visited = list(mapping) recurse_tree(tree, nested_keys) + keys_to_remove = check_attributes_of_nonexisting_field(tree) + for not_visited_key in not_visited: if not_visited_key.endswith("/@units"): - if is_documented(not_visited_key.rsplit("/", 1)[0], tree): - continue - if not_visited_key.rsplit("/", 1)[0] not in not_visited: + # check that parent exists + if not_visited_key.rsplit("/", 1)[0] not in mapping.keys(): collector.collect_and_log( not_visited_key, ValidationProblem.UnitWithoutField, not_visited_key.rsplit("/", 1)[0], ) - if not ignore_undocumented: collector.collect_and_log( not_visited_key, - ValidationProblem.UnitWithoutDocumentation, - mapping[not_visited_key], + ValidationProblem.KeyToBeRemoved, + None, ) + keys_to_remove.append(not_visited_key) + + # parent key will be checked on its own if it exists, because it is in the list + continue + + if "@" in not_visited_key.rsplit("/")[-1]: + # check that parent exists + if not_visited_key.rsplit("/", 1)[0] not in mapping.keys(): + # check that parent is not a group + node = add_best_matches_for(not_visited_key.rsplit("/", 1)[0], tree) + if node.type != "group": + collector.collect_and_log( + not_visited_key.rsplit("/", 1)[0], + ValidationProblem.AttributeForNonExistingField, + None, + ) + collector.collect_and_log( + not_visited_key, + ValidationProblem.KeyToBeRemoved, + None, + ) + keys_to_remove.append(not_visited_key) + continue + if is_documented(not_visited_key, tree): continue @@ -780,7 +812,6 @@ def startswith_with_variations( not_visited_key, ValidationProblem.MissingDocumentation, None ) - keys_to_remove = check_attributes_of_nonexisting_field(tree) return (not collector.has_validation_problems(), keys_to_remove) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 052771503..0e7a6055e 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -520,7 +520,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", "optional", ), - "There were attributes set for the field /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value, but the field does not exist.", + "Unit /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units in dataset without its field /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value.", id="removed-optional-value-with-attribute-remaining", ), pytest.param( @@ -704,6 +704,70 @@ def listify_template(data_dict: Template): "Field /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value requires a unit in the unit category NX_ENERGY.", id="missing-unit", ), + pytest.param( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value", + "required", + ), + "Unit /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units in dataset without its field /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value.", + id="unit-missing-field", + ), + pytest.param( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value", + "required", + ), + "The attribute /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units will not be written.", + id="unit-missing-field", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/required_group/illegal_name", + 1, + ), + ( + "Field /ENTRY[my_entry]/required_group/illegal_name written without documentation." + ), + id="add-undocumented-field", + ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/required_group/author", + "author", + ), + "/ENTRY[my_entry]/required_group/author/@illegal", + "illegal_attribute", + ), + ( + "Attribute /ENTRY[my_entry]/required_group/author/@illegal written without documentation." + ), + id="add-undocumented-attribute", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/BEAM[my_beam]/@default", + "unknown", + ), + "", + id="group-with-only-attributes", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/BEAM[my_beam]/@illegal", + "unknown", + ), + ( + "Attribute /ENTRY[my_entry]/INSTRUMENT[my_instrument]/BEAM[my_beam]/@illegal written without documentation." + ), + id="group-with-illegal-attributes", + ), pytest.param( alter_dict( TEMPLATE, @@ -726,13 +790,24 @@ def listify_template(data_dict: Template): id="baseclass-missing-unit", ), pytest.param( - remove_from_dict( + alter_dict( TEMPLATE, - "/ENTRY[my_entry]/duration", - "required", + "/ENTRY[my_entry]/collection_time/@illegal", + "s", + ), + ( + "There were attributes set for the field /ENTRY[my_entry]/collection_time, but the field does not exist." + ), + id="baseclass-attribute-missing-field", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/collection_time/@illegal", + "s", ), ( - "There were attributes set for the field /ENTRY[my_entry]/duration, but the field does not exist." + "The attribute /ENTRY[my_entry]/collection_time/@illegal will not be written." ), id="baseclass-attribute-missing-field", ), @@ -761,7 +836,7 @@ def listify_template(data_dict: Template): ( "Field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal_name written without documentation." ), - id="add-undocumented-field", + id="baseclass-add-undocumented-field", ), pytest.param( alter_dict( @@ -770,9 +845,32 @@ def listify_template(data_dict: Template): "illegal_attribute", ), ( - "Attribute /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type/illegal written without documentation." + "Attribute /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type/@illegal written without documentation." ), - id="add-undocumented-attribute", + id="baseclass-add-undocumented-attribute", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal/@units", + "illegal_attribute", + ), + ( + "Unit /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal/@units " + "in dataset without its field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal." + ), + id="baseclass-add-unit-of-missing-undocumented-field", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal/@units", + "illegal_attribute", + ), + ( + "The attribute /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal/@units will not be written." + ), + id="baseclass-add-unit-of-missing-undocumented-field", ), ], ) @@ -804,6 +902,7 @@ def format_error_message(msg: str) -> str: "array-of-float-instead-of-float", "numpy-chararray", "removed-optional-value", + "group-with-only-attributes", ): with caplog.at_level(logging.WARNING): assert validate_dict_against("NXtest", data_dict)[0] From 25fe60edba7c2677f8e94ff1a37b116a04fbd23d Mon Sep 17 00:00:00 2001 From: rettigl Date: Wed, 12 Mar 2025 22:38:46 +0100 Subject: [PATCH 42/61] add case and tests for undocumented units --- src/pynxtools/dataconverter/helpers.py | 2 +- src/pynxtools/dataconverter/validation.py | 9 ++++++++ tests/dataconverter/test_validation.py | 26 +++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index e6851033d..3cc5ce652 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -81,7 +81,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar if log_type == ValidationProblem.UnitWithoutDocumentation: logger.warning( - f"The unit, {path} = {value}, is being written but has no documentation" + f"The unit, {path} = {value}, is being written but has no documentation." ) elif log_type == ValidationProblem.InvalidEnum: logger.warning( diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 543eb3ce6..4c4d35cc8 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -781,6 +781,15 @@ def startswith_with_variations( None, ) keys_to_remove.append(not_visited_key) + else: + # check that parent has units + node = add_best_matches_for(not_visited_key.rsplit("/", 1)[0], tree) + if node.unit is None: + collector.collect_and_log( + not_visited_key, + ValidationProblem.UnitWithoutDocumentation, + mapping[not_visited_key], + ) # parent key will be checked on its own if it exists, because it is in the list continue diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 0e7a6055e..b5a309131 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -768,6 +768,17 @@ def listify_template(data_dict: Template): ), id="group-with-illegal-attributes", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/optional_parent/required_child/@units", + "s", + ), + ( + "The unit, /ENTRY[my_entry]/optional_parent/required_child/@units = s, is being written but has no documentation." + ), + id="field-with-illegal-unit", + ), pytest.param( alter_dict( TEMPLATE, @@ -872,6 +883,21 @@ def listify_template(data_dict: Template): ), id="baseclass-add-unit-of-missing-undocumented-field", ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/required_group/author", + "author", + ), + "/ENTRY[my_entry]/required_group/author/@units", + "s", + ), + ( + "The unit, /ENTRY[my_entry]/required_group/author/@units = s, is being written but has no documentation." + ), + id="baseclass-field-with-illegal-unit", + ), ], ) def test_validate_data_dict(caplog, data_dict, error_message, request): From 3a4ecd8a11ea0cb7e9489f13abfcc3d287d4597f Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 13 Mar 2025 17:33:34 +0100 Subject: [PATCH 43/61] reset definitions --- src/pynxtools/definitions | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index 3f66054d6..6e3134567 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit 3f66054d6b1651617fdfbb24d4b2bfa33f75de66 +Subproject commit 6e3134567d715198a03499d2ff1811389b2e6462 From 74bb0daad10a5e1956bd733934cbc6c5eff1d766 Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 13 Mar 2025 17:43:30 +0100 Subject: [PATCH 44/61] revert tests and reflog --- tests/data/nexus/Ref_nexus_test.log | 1184 ++++++++++++--------------- tests/nexus/test_nexus.py | 6 +- 2 files changed, 509 insertions(+), 681 deletions(-) diff --git a/tests/data/nexus/Ref_nexus_test.log b/tests/data/nexus/Ref_nexus_test.log index 87b245a0a..54541649f 100644 --- a/tests/data/nexus/Ref_nexus_test.log +++ b/tests/data/nexus/Ref_nexus_test.log @@ -32,34 +32,24 @@ DEBUG - classpath: ['NXentry'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY NXentry.nxdl.xml: -NXobject.nxdl.xml: DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY): DEBUG - DEBUG - documentation (NXentry.nxdl.xml:): DEBUG - - (**required**) :ref:`NXentry` describes the measurement. - - The top-level NeXus group which contains all the data and associated - information that comprise a single measurement. - It is mandatory that there is at least one - group of this type in the NeXus file. -DEBUG - documentation (NXobject.nxdl.xml:): -DEBUG - - This is the base object of NeXus. The groups and fields contained - within this file are allowed to be present in any derived base class. - - If nameType="partial", the placeholders (e.g., FIELDNAME or GROUPNAME) - can be replaced by the name of any object (field or group, - respectively) that exists within the same group. - + (**required**) :ref:`NXentry` describes the measurement. + + The top-level NeXus group which contains all the data and associated + information that comprise a single measurement. + It is mandatory that there is at least one + group of this type in the NeXus file. + DEBUG - ===== ATTRS (//entry@NX_class) DEBUG - value: NXentry DEBUG - classpath: ['NXentry'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY NXentry.nxdl.xml: -NXobject.nxdl.xml: DEBUG - @NX_class [NX_CHAR] DEBUG - DEBUG - ===== FIELD (//entry/collection_time): @@ -70,9 +60,9 @@ NXentry.nxdl.xml:/collection_time DEBUG - <> DEBUG - documentation (NXentry.nxdl.xml:/collection_time): DEBUG - - Time transpired actually collecting data i.e. taking out time when collection was - suspended due to e.g. temperature out of range - + Time transpired actually collecting data i.e. taking out time when collection was + suspended due to e.g. temperature out of range + DEBUG - ===== ATTRS (//entry/collection_time@units) DEBUG - value: s DEBUG - classpath: ['NXentry', 'NX_FLOAT'] @@ -84,247 +74,158 @@ DEBUG - classpath: ['NXentry', 'NXdata'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/DATA NXentry.nxdl.xml:/DATA -NXobject.nxdl.xml:/DATA NXdata.nxdl.xml: -NXobject.nxdl.xml: DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/DATA): DEBUG - DEBUG - documentation (NXentry.nxdl.xml:/DATA): DEBUG - - The data group - - .. note:: Before the NIAC2016 meeting [#]_, at least one - :ref:`NXdata` group was required in each :ref:`NXentry` group. - At the NIAC2016 meeting, it was decided to make :ref:`NXdata` - an optional group in :ref:`NXentry` groups for data files that - do not use an application definition. - It is recommended strongly that all NeXus data files provide - a NXdata group. - It is permissable to omit the NXdata group only when - defining the default plot is not practical or possible - from the available data. - - For example, neutron event data may not have anything that - makes a useful plot without extensive processing. - - Certain application definitions override this decision and - require an :ref:`NXdata` group - in the :ref:`NXentry` group. The ``minOccurs=0`` attribute - in the application definition will indicate the - :ref:`NXdata` group - is optional, otherwise, it is required. - - .. [#] NIAC2016: - https://www.nexusformat.org/NIAC2016.html, - https://github.com/nexusformat/NIAC/issues/16 - - -DEBUG - documentation (NXobject.nxdl.xml:/DATA): -DEBUG - + The data group + + .. note:: Before the NIAC2016 meeting [#]_, at least one + :ref:`NXdata` group was required in each :ref:`NXentry` group. + At the NIAC2016 meeting, it was decided to make :ref:`NXdata` + an optional group in :ref:`NXentry` groups for data files that + do not use an application definition. + It is recommended strongly that all NeXus data files provide + a NXdata group. + It is permissable to omit the NXdata group only when + defining the default plot is not practical or possible + from the available data. + + For example, neutron event data may not have anything that + makes a useful plot without extensive processing. + + Certain application definitions override this decision and + require an :ref:`NXdata` group + in the :ref:`NXentry` group. The ``minOccurs=0`` attribute + in the application definition will indicate the + :ref:`NXdata` group + is optional, otherwise, it is required. + + .. [#] NIAC2016: + https://www.nexusformat.org/NIAC2016.html, + https://github.com/nexusformat/NIAC/issues/16 + DEBUG - documentation (NXdata.nxdl.xml:): DEBUG - - The :ref:`NXdata` class is designed to encapsulate all the information required for a set of data to be plotted. - NXdata groups contain plottable data (sometimes referred to as *signals* or *dependent variables*) and their - associated axis coordinates (sometimes referred to as *axes* or *independent variables*). - - The actual names of the :ref:`DATA ` and :ref:`AXISNAME ` fields - can be chosen :ref:`freely `, as indicated by the upper case (this is a common convention in all NeXus classes). - - .. note:: ``NXdata`` provides data and coordinates to be plotted but - does not describe how the data is to be plotted or even the dimensionality of the plot. - https://www.nexusformat.org/NIAC2018Minutes.html#nxdata-plottype--attribute - - **Signals:** - - .. index:: plotting - - The :ref:`DATA ` fields contain the signal values to be plotted. The name of the field - to be used as the *default plot signal* is provided by the :ref:`signal ` attribute. - The names of the fields to be used as *secondary plot signals* are provided by the - :ref:`auxiliary_signals` attribute. - - An example with three signals, one of which being the default - - .. code-block:: - - data:NXdata - @signal = "data1" - @auxiliary_signals = ["data2", "data3"] - data1: float[10,20,30] --> the default signal - data2: float[10,20,30] - data3: float[10,20,30] - - **Axes:** - - .. index:: axes (attribute) - .. index:: coordinates - - The :ref:`AXISNAME ` fields contain the axis coordinates associated with the data values. - The names of all :ref:`AXISNAME ` fields are listed in the - :ref:`axes ` attribute. - - `Rank` - - :ref:`AXISNAME ` fields are typically one-dimensional arrays, which annotate one of the dimensions. - - An example of this would be - - .. code-block:: - - data:NXdata - @signal = "data" - @axes = ["x", "y"] --> the order matters - data: float[10,20] - x: float[10] --> coordinates along the first dimension - y: float[20] --> coordinates along the second dimension - - In this example each data point ``data[i,j]`` has axis coordinates ``[x[i], y[j]]``. - - However, the fields can also have a rank greater than 1, in which case the rank of each - :ref:`AXISNAME ` must be equal to the number of data dimensions it spans. - - An example of this would be - - .. code-block:: - - data:NXdata - @signal = "data" - @axes = ["x", "y"] --> the order does NOT matter - @x_indices = [0, 1] - @y_indices = [0, 1] - data: float[10,20] - x: float[10,20] --> coordinates along both dimensions - y: float[10,20] --> coordinates along both dimensions - - In this example each data point ``data[i,j]`` has axis coordinates ``[x[i,j], y[i,j]]``. - - `Dimensions` - - The data dimensions annotated by an :ref:`AXISNAME ` field are defined by the - :ref:`AXISNAME_indices ` attribute. When this attribute is missing, - the position(s) of the :ref:`AXISNAME ` string in the - :ref:`axes ` attribute are used. - - When all :ref:`AXISNAME ` fields are one-dimensional, and none of the data dimensions - have more than one axis, the :ref:`AXISNAME_indices ` attributes - are often omitted. If one of the data dimensions has no :ref:`AXISNAME ` field, - the string “.” can be used in the corresponding index of the axes list. - - An example of this would be - - .. code-block:: - - data:NXdata - @signal = "data" - @axes = ["x", ".", "z"] --> the order matters - data: float[10,20,30] - x: float[10] --> coordinates along the first dimension - z: float[30] --> coordinates along the third dimension - - When using :ref:`AXISNAME_indices ` this becomes - - .. code-block:: - - data:NXdata - @signal = "data" - @axes = ["x", "z"] --> the order does NOT matter - data: float[10,20,30] - @x_indices = 0 - @z_indices = 2 - x: float[10] --> coordinates along the first dimension - z: float[30] --> coordinates along the third dimension - - When providing :ref:`AXISNAME_indices ` attributes it is recommended - to do it for all axes. - - `Non-trivial axes` - - What follows are two examples where :ref:`AXISNAME_indices ` attributes - cannot be omitted. - - The first is an example where data dimensions have alternative axis coordinates. The NXdata group represents - a stack of images collected at different energies. The ``wavelength`` is an alternative axis of ``energy`` - for the last dimension (or vice versa). - - .. code-block:: - - data:NXdata - @signal = "data" - @axes = ["x", "y", "energy", "wavelength"] --> the order does NOT matter - @x_indices = 0 - @y_indices = 1 - @energy_indices = 2 - @wavelength_indices = 2 - data: float[10,20,30] - x: float[10] --> coordinates along the first dimension - y: float[20] --> coordinates along the second dimension - energy: float[30] --> coordinates along the third dimension - wavelength: float[30] --> coordinates along the third dimension - - The second is an example with coordinates that span more than one dimension. The NXdata group represents data - from 2D mesh scans performed at multiple energies. Each data point ``data[i,j,k]`` has axis coordinates - ``[x[i,j,k], y[i,j,k], energy[k]]``. - - .. code-block:: - - data:NXdata - @signal = "data" - @axes = ["x", "y", "energy"] --> the order does NOT matter - @x_indices = [0, 1, 2] - @y_indices = [0, 1, 2] - @energy_indices = 2 - data: float[10,20,30] - x: float[10,20,30] --> coordinates along all dimensions - y: float[10,20,30] --> coordinates along all dimensions - energy: float[30] --> coordinates along the third dimension - - **Uncertainties:** - - Standard deviations on data values as well as coordinates can be provided by - :ref:`FIELDNAME_errors ` fields where ``FIELDNAME`` is the name of a - :ref:`DATA ` field or an :ref:`AXISNAME ` field. - - An example of uncertainties on the signal, auxiliary signals and axis coordinates - - .. code-block:: - - data:NXdata - @signal = "data1" - @auxiliary_signals = ["data2", "data3"] - @axes = ["x", "z"] - @x_indices = 0 - @z_indices = 2 - data1: float[10,20,30] - data2: float[10,20,30] - data3: float[10,20,30] - x: float[10] - z: float[30] - data1_errors: float[10,20,30] - data2_errors: float[10,20,30] - data3_errors: float[10,20,30] - x_errors: float[10] - z_errors: float[30] - - -DEBUG - documentation (NXobject.nxdl.xml:): -DEBUG - - This is the base object of NeXus. The groups and fields contained - within this file are allowed to be present in any derived base class. - - If nameType="partial", the placeholders (e.g., FIELDNAME or GROUPNAME) - can be replaced by the name of any object (field or group, - respectively) that exists within the same group. - + :ref:`NXdata` describes the plottable data and related dimension scales. + + .. index:: plotting + + It is strongly recommended that there is at least one :ref:`NXdata` + group in each :ref:`NXentry` group. + Note that the fields named ``AXISNAME`` and ``DATA`` + can be defined with different names. + (Upper case is used to indicate that the actual name is left to the user.) + The ``signal`` and ``axes`` attributes of the + ``data`` group define which items + are plottable data and which are *dimension scales*, respectively. + + :ref:`NXdata` is used to implement one of the basic motivations in NeXus, + to provide a default plot for the data of this :ref:`NXentry`. The actual data + might be stored in another group and (hard) linked to the :ref:`NXdata` group. + + * Each :ref:`NXdata` group will define one field as the default + plottable data. The value of the ``signal`` attribute names this field. + Additional fields may be used to describe the dimension scales and + uncertainities. + The ``auxiliary_signals`` attribute is a list of the other fields + to be plotted with the ``signal`` data. + * The plottable data may be of arbitrary rank up to a maximum + of ``NX_MAXRANK=32`` (for compatibility with backend file formats). + * The plottable data will be named as the value of + the group ``signal`` attribute, such as:: + + data:NXdata + @signal = "counts" + @axes = "mr" + @mr_indices = 0 + counts: float[100] --> the default dependent data + mr: float[100] --> the default independent data + + The field named in the ``signal`` attribute **must** exist, either + directly as a NeXus field or defined through a link. + + * The group ``axes`` attribute will name the + *dimension scale* associated with the plottable data. + + If available, the standard deviations of the data are to be + stored in a data set of the same rank and dimensions, with the name ``errors``. + + * For each data dimension, there should be a one-dimensional array + of the same length. + * These one-dimensional arrays are the *dimension scales* of the + data, *i.e*. the values of the independent variables at which the data + is measured, such as scattering angle or energy transfer. + + .. index:: link + .. index:: axes (attribute) + + The preferred method to associate each data dimension with + its respective dimension scale is to specify the field name + of each dimension scale in the group ``axes`` attribute as a string list. + Here is an example for a 2-D data set *data* plotted + against *time*, and *pressure*. (An additional *temperature* data set + is provided and could be selected as an alternate for the *pressure* axis.):: + + data_2d:NXdata + @signal="data" + @axes=["time", "pressure"] + @pressure_indices=1 + @temperature_indices=1 + @time_indices=0 + data: float[1000,20] + pressure: float[20] + temperature: float[20] + time: float[1000] + + .. rubric:: Old methods to identify the plottable data + + There are two older methods of associating + each data dimension to its respective dimension scale. + Both are now out of date and + should not be used when writing new data files. + However, client software should expect to see data files + written with any of these methods. + + * One method uses the ``axes`` + attribute to specify the names of each *dimension scale*. + + * The oldest method uses the ``axis`` attribute on each + *dimension scale* to identify + with an integer the axis whose value is the number of the dimension. + + .. index: !plot; axis label + plot, axis units + units + dimension scale + + Each axis of the plot may be labeled with information from the + dimension scale for that axis. The optional ``@long_name`` attribute + is provided as the axis label default. If ``@long_name`` is not + defined, then use the name of the dimension scale. A ``@units`` attribute, + if available, may be added to the axis label for further description. + See the section :ref:`Design-Units` for more information. + + .. index: !plot; axis title + + The optional ``title`` field, if available, provides a suggested + title for the plot. If no ``title`` field is found in the :ref:`NXdata` + group, look for a ``title`` field in the parent :ref:`NXentry` group, + with a fallback to displaying the path to the :ref:`NXdata` group. + + NeXus is about how to find and annotate the data to be plotted + but not to describe how the data is to be plotted. + (https://www.nexusformat.org/NIAC2018Minutes.html#nxdata-plottype--attribute) + DEBUG - ===== ATTRS (//entry/data@NX_class) DEBUG - value: NXdata DEBUG - classpath: ['NXentry', 'NXdata'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/DATA NXentry.nxdl.xml:/DATA -NXobject.nxdl.xml:/DATA NXdata.nxdl.xml: -NXobject.nxdl.xml: DEBUG - @NX_class [NX_CHAR] DEBUG - DEBUG - ===== ATTRS (//entry/data@axes) @@ -333,85 +234,96 @@ DEBUG - classpath: ['NXentry', 'NXdata'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/DATA NXentry.nxdl.xml:/DATA -NXobject.nxdl.xml:/DATA NXdata.nxdl.xml: -NXobject.nxdl.xml: DEBUG - NXdata.nxdl.xml:@axes - [NX_CHAR] DEBUG - <> DEBUG - documentation (NXdata.nxdl.xml:/axes): DEBUG - - .. index:: plotting - - The ``axes`` attribute is a list of strings which are the names of the :ref:`AXISNAME ` fields - that contain the values of the coordinates along the :ref:`data ` dimensions. - - .. note:: When ``axes`` contains multiple strings, it must be saved as an actual array - of strings and not a single comma separated string. - + .. index:: plotting + + Array of strings holding the :ref:`names ` of + the independent data fields used in the default plot for all of + the dimensions of the :ref:`signal ` + as well as any :ref:`auxiliary signals `. + + One name is provided for every dimension in the *signal* or *auxiliary signal* fields. + + The *axes* values are the names of fields or links that *must* exist and be direct + children of this NXdata group. + + An axis slice is specified using a field named ``AXISNAME_indices`` + as described below (where the text shown here as ``AXISNAME`` is to be + replaced by the actual field name). + + When no default axis is available for a particular dimension + of the plottable data, use a "." in that position. + Such as:: + + @axes=["time", ".", "."] + + Since there are three items in the list, the *signal* field + must be a three-dimensional array (rank=3). The first dimension + is described by the values of a one-dimensional array named ``time`` + while the other two dimensions have no fields to be used as dimension scales. + + See examples provided on the NeXus wiki: + https://www.nexusformat.org/2014_axes_and_uncertainties.html + + If there are no axes at all (such as with a stack of images), + the axes attribute can be omitted. + DEBUG - ===== ATTRS (//entry/data@signal) DEBUG - value: data DEBUG - classpath: ['NXentry', 'NXdata'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/DATA NXentry.nxdl.xml:/DATA -NXobject.nxdl.xml:/DATA NXdata.nxdl.xml: -NXobject.nxdl.xml: DEBUG - NXdata.nxdl.xml:@signal - [NX_CHAR] DEBUG - <> DEBUG - documentation (NXdata.nxdl.xml:/signal): DEBUG - - .. index:: find the default plottable data - .. index:: plotting - .. index:: signal attribute value - - The value is the :ref:`name ` of the signal that contains - the default plottable data. This field or link *must* exist and be a direct child - of this NXdata group. - - It is recommended (as of NIAC2014) to use this attribute - rather than adding a signal attribute to the field. - See https://www.nexusformat.org/2014_How_to_find_default_data.html - for a summary of the discussion. - + .. index:: find the default plottable data + .. index:: plotting + .. index:: signal attribute value + + Declares which NeXus field is the default. + The value is the :ref:`name ` of the data field to be plotted. + This field or link *must* exist and be a direct child of this NXdata group. + + It is recommended (as of NIAC2014) to use this attribute + rather than adding a signal attribute to the field. + See https://www.nexusformat.org/2014_How_to_find_default_data.html + for a summary of the discussion. + DEBUG - ===== FIELD (//entry/data/angles): DEBUG - value: [-1.96735314 -1.91500657 -1.86266001 -1.81031344 -1.75796688 -1.70562031 ... -DEBUG - classpath: ['NXentry', 'NXdata', 'NX_CHAR_OR_NUMBER'] +DEBUG - classpath: ['NXentry', 'NXdata', 'NX_NUMBER'] DEBUG - classes: NXdata.nxdl.xml:/AXISNAME DEBUG - <> DEBUG - Dataset referenced as NXdata AXIS #0 DEBUG - documentation (NXdata.nxdl.xml:/AXISNAME): DEBUG - - Coordinate values along one or more :ref:`data ` dimensions. The rank must be equal - to the number of dimensions it spans. - - As the upper case ``AXISNAME`` indicates, the names of the ``AXISNAME`` fields can be chosen :ref:`freely `. - The :ref:`axes ` attribute can be used to find all datasets in the - ``NXdata`` that contain coordinate values. - - Most AXISNAME fields will be sequences of numbers but if an axis is better represented using names, such as channel names, - an array of NX_CHAR can be provided. - + Dimension scale defining an axis of the data. + Client is responsible for defining the dimensions of the data. + The name of this field may be changed to fit the circumstances. + Standard NeXus client tools will use the attributes to determine + how to use this field. + DEBUG - ===== ATTRS (//entry/data/angles@target) DEBUG - value: /entry/instrument/analyser/angles -DEBUG - classpath: ['NXentry', 'NXdata', 'NX_CHAR_OR_NUMBER'] +DEBUG - classpath: ['NXentry', 'NXdata', 'NX_NUMBER'] DEBUG - classes: NXdata.nxdl.xml:/AXISNAME DEBUG - @target - IS NOT IN SCHEMA DEBUG - DEBUG - ===== ATTRS (//entry/data/angles@units) DEBUG - value: 1/Å -DEBUG - classpath: ['NXentry', 'NXdata', 'NX_CHAR_OR_NUMBER'] +DEBUG - classpath: ['NXentry', 'NXdata', 'NX_NUMBER'] DEBUG - classes: NXdata.nxdl.xml:/AXISNAME -DEBUG - NXdata.nxdl.xml:/AXISNAME@units - [NX_CHAR] -DEBUG - Dataset referenced as NXdata AXIS #0 -DEBUG - documentation (NXdata.nxdl.xml:/AXISNAME/units): -DEBUG - - Unit in which the coordinate values are expressed. - See the section :ref:`Design-Units` for more information. - +DEBUG - NXdata.nxdl.xml:/AXISNAME@units - REQUIRED, but undefined unit category DEBUG - ===== FIELD (//entry/data/data): DEBUG - value: [[0. 0. 0. ... 0. 0. 0.] ... DEBUG - classpath: ['NXentry', 'NXdata', 'NX_NUMBER'] @@ -421,15 +333,15 @@ DEBUG - <> DEBUG - Dataset referenced as NXdata SIGNAL DEBUG - documentation (NXdata.nxdl.xml:/DATA): DEBUG - - .. index:: plotting - - Data values to be used as the NeXus *plottable data*. As the upper case ``DATA`` - indicates, the names of the ``DATA`` fields can be chosen :ref:`freely `. The :ref:`signal attribute ` - and :ref:`auxiliary_signals attribute` can be used to find all datasets in the ``NXdata`` - that contain data values. - - The maximum rank is ``32`` for compatibility with backend file formats. - + .. index:: plotting + + This field contains the data values to be used as the + NeXus *plottable data*. + Client is responsible for defining the dimensions of the data. + The name of this field may be changed to fit the circumstances. + Standard NeXus client tools will use the attributes to determine + how to use this field. + DEBUG - ===== ATTRS (//entry/data/data@target) DEBUG - value: /entry/instrument/analyser/data DEBUG - classpath: ['NXentry', 'NXdata', 'NX_NUMBER'] @@ -445,80 +357,60 @@ NXdata.nxdl.xml:/DATA DEBUG - NXdata.nxdl.xml:/DATA@units - REQUIRED, but undefined unit category DEBUG - ===== FIELD (//entry/data/delays): DEBUG - value: [-1.1 -1.08041237 -1.06082474 -1.04123711 -1.02164948 -1.00206186 ... -DEBUG - classpath: ['NXentry', 'NXdata', 'NX_CHAR_OR_NUMBER'] +DEBUG - classpath: ['NXentry', 'NXdata', 'NX_NUMBER'] DEBUG - classes: NXdata.nxdl.xml:/AXISNAME DEBUG - <> DEBUG - Dataset referenced as NXdata AXIS #2 DEBUG - documentation (NXdata.nxdl.xml:/AXISNAME): DEBUG - - Coordinate values along one or more :ref:`data ` dimensions. The rank must be equal - to the number of dimensions it spans. - - As the upper case ``AXISNAME`` indicates, the names of the ``AXISNAME`` fields can be chosen :ref:`freely `. - The :ref:`axes ` attribute can be used to find all datasets in the - ``NXdata`` that contain coordinate values. - - Most AXISNAME fields will be sequences of numbers but if an axis is better represented using names, such as channel names, - an array of NX_CHAR can be provided. - + Dimension scale defining an axis of the data. + Client is responsible for defining the dimensions of the data. + The name of this field may be changed to fit the circumstances. + Standard NeXus client tools will use the attributes to determine + how to use this field. + DEBUG - ===== ATTRS (//entry/data/delays@target) DEBUG - value: /entry/instrument/analyser/delays -DEBUG - classpath: ['NXentry', 'NXdata', 'NX_CHAR_OR_NUMBER'] +DEBUG - classpath: ['NXentry', 'NXdata', 'NX_NUMBER'] DEBUG - classes: NXdata.nxdl.xml:/AXISNAME DEBUG - @target - IS NOT IN SCHEMA DEBUG - DEBUG - ===== ATTRS (//entry/data/delays@units) DEBUG - value: fs -DEBUG - classpath: ['NXentry', 'NXdata', 'NX_CHAR_OR_NUMBER'] +DEBUG - classpath: ['NXentry', 'NXdata', 'NX_NUMBER'] DEBUG - classes: NXdata.nxdl.xml:/AXISNAME -DEBUG - NXdata.nxdl.xml:/AXISNAME@units - [NX_CHAR] -DEBUG - Dataset referenced as NXdata AXIS #2 -DEBUG - documentation (NXdata.nxdl.xml:/AXISNAME/units): -DEBUG - - Unit in which the coordinate values are expressed. - See the section :ref:`Design-Units` for more information. - +DEBUG - NXdata.nxdl.xml:/AXISNAME@units - REQUIRED, but undefined unit category DEBUG - ===== FIELD (//entry/data/energies): DEBUG - value: [ 2.5 2.46917808 2.43835616 2.40753425 2.37671233 2.34589041 ... -DEBUG - classpath: ['NXentry', 'NXdata', 'NX_CHAR_OR_NUMBER'] +DEBUG - classpath: ['NXentry', 'NXdata', 'NX_NUMBER'] DEBUG - classes: NXdata.nxdl.xml:/AXISNAME DEBUG - <> DEBUG - Dataset referenced as NXdata AXIS #1 DEBUG - documentation (NXdata.nxdl.xml:/AXISNAME): DEBUG - - Coordinate values along one or more :ref:`data ` dimensions. The rank must be equal - to the number of dimensions it spans. - - As the upper case ``AXISNAME`` indicates, the names of the ``AXISNAME`` fields can be chosen :ref:`freely `. - The :ref:`axes ` attribute can be used to find all datasets in the - ``NXdata`` that contain coordinate values. - - Most AXISNAME fields will be sequences of numbers but if an axis is better represented using names, such as channel names, - an array of NX_CHAR can be provided. - + Dimension scale defining an axis of the data. + Client is responsible for defining the dimensions of the data. + The name of this field may be changed to fit the circumstances. + Standard NeXus client tools will use the attributes to determine + how to use this field. + DEBUG - ===== ATTRS (//entry/data/energies@target) DEBUG - value: /entry/instrument/analyser/energies -DEBUG - classpath: ['NXentry', 'NXdata', 'NX_CHAR_OR_NUMBER'] +DEBUG - classpath: ['NXentry', 'NXdata', 'NX_NUMBER'] DEBUG - classes: NXdata.nxdl.xml:/AXISNAME DEBUG - @target - IS NOT IN SCHEMA DEBUG - DEBUG - ===== ATTRS (//entry/data/energies@units) DEBUG - value: eV -DEBUG - classpath: ['NXentry', 'NXdata', 'NX_CHAR_OR_NUMBER'] +DEBUG - classpath: ['NXentry', 'NXdata', 'NX_NUMBER'] DEBUG - classes: NXdata.nxdl.xml:/AXISNAME -DEBUG - NXdata.nxdl.xml:/AXISNAME@units - [NX_CHAR] -DEBUG - Dataset referenced as NXdata AXIS #1 -DEBUG - documentation (NXdata.nxdl.xml:/AXISNAME/units): -DEBUG - - Unit in which the coordinate values are expressed. - See the section :ref:`Design-Units` for more information. - +DEBUG - NXdata.nxdl.xml:/AXISNAME@units - REQUIRED, but undefined unit category DEBUG - ===== FIELD (//entry/definition): DEBUG - value: NXarpes DEBUG - classpath: ['NXentry', 'NX_CHAR'] @@ -529,24 +421,26 @@ DEBUG - <> DEBUG - enumeration (NXarpes.nxdl.xml:/ENTRY/definition): DEBUG - -> NXarpes DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/definition): -DEBUG - Official NeXus NXDL schema to which this file conforms. +DEBUG - + Official NeXus NXDL schema to which this file conforms. + DEBUG - documentation (NXentry.nxdl.xml:/definition): DEBUG - - (alternate use: see same field in :ref:`NXsubentry` for preferred) - - Official NeXus NXDL schema to which this entry conforms which must be - the name of the NXDL file (case sensitive without the file extension) - that the NXDL schema is defined in. - - For example the ``definition`` field for a file that conformed to the - *NXarpes.nxdl.xml* definition must contain the string **NXarpes**. - - This field is provided so that :ref:`NXentry` can be the overlay position - in a NeXus data file for an application definition and its - set of groups, fields, and attributes. - - *It is advised* to use :ref:`NXsubentry`, instead, as the overlay position. - + (alternate use: see same field in :ref:`NXsubentry` for preferred) + + Official NeXus NXDL schema to which this entry conforms which must be + the name of the NXDL file (case sensitive without the file extension) + that the NXDL schema is defined in. + + For example the ``definition`` field for a file that conformed to the + *NXarpes.nxdl.xml* definition must contain the string **NXarpes**. + + This field is provided so that :ref:`NXentry` can be the overlay position + in a NeXus data file for an application definition and its + set of groups, fields, and attributes. + + *It is advised* to use :ref:`NXsubentry`, instead, as the overlay position. + DEBUG - ===== FIELD (//entry/duration): DEBUG - value: 7200 DEBUG - classpath: ['NXentry', 'NX_INT'] @@ -554,7 +448,9 @@ DEBUG - classes: NXentry.nxdl.xml:/duration DEBUG - <> DEBUG - documentation (NXentry.nxdl.xml:/duration): -DEBUG - Duration of measurement +DEBUG - + Duration of measurement + DEBUG - ===== ATTRS (//entry/duration@units) DEBUG - value: s DEBUG - classpath: ['NXentry', 'NX_INT'] @@ -568,36 +464,25 @@ DEBUG - classes: NXentry.nxdl.xml:/end_time DEBUG - <> DEBUG - documentation (NXentry.nxdl.xml:/end_time): -DEBUG - Ending time of measurement +DEBUG - + Ending time of measurement + DEBUG - ===== FIELD (//entry/entry_identifier): DEBUG - value: Run 22118 -DEBUG - classpath: ['NXentry', 'NX_CHAR'] -DEBUG - classes: -NXentry.nxdl.xml:/entry_identifier -DEBUG - <> -DEBUG - DEPRECATED - Use the field :ref:`identifier_entry ` instead. -DEBUG - documentation (NXentry.nxdl.xml:/entry_identifier): -DEBUG - unique identifier for the measurement, defined by the facility. +DEBUG - classpath: ['NXentry'] +DEBUG - NOT IN SCHEMA +DEBUG - DEBUG - ===== FIELD (//entry/experiment_identifier): DEBUG - value: F-20170538 -DEBUG - classpath: ['NXentry', 'NX_CHAR'] -DEBUG - classes: -NXentry.nxdl.xml:/experiment_identifier -DEBUG - <> -DEBUG - DEPRECATED - Use the field :ref:`identifier_experiment ` instead. -DEBUG - documentation (NXentry.nxdl.xml:/experiment_identifier): +DEBUG - classpath: ['NXentry'] +DEBUG - NOT IN SCHEMA DEBUG - - Unique identifier for the experiment, - defined by the facility, - possibly linked to the proposals - DEBUG - ===== GROUP (//entry/instrument [NXarpes::/NXentry/NXinstrument]): DEBUG - classpath: ['NXentry', 'NXinstrument'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT NXentry.nxdl.xml:/INSTRUMENT NXinstrument.nxdl.xml: -NXobject.nxdl.xml: DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT): DEBUG - @@ -605,24 +490,15 @@ DEBUG - documentation (NXentry.nxdl.xml:/INSTRUMENT): DEBUG - DEBUG - documentation (NXinstrument.nxdl.xml:): DEBUG - - Collection of the components of the instrument or beamline. - - Template of instrument descriptions comprising various beamline components. - Each component will also be a NeXus group defined by its distance from the - sample. Negative distances represent beamline components that are before the - sample while positive distances represent components that are after the sample. - This device allows the unique identification of beamline components in a way - that is valid for both reactor and pulsed instrumentation. - -DEBUG - documentation (NXobject.nxdl.xml:): -DEBUG - - This is the base object of NeXus. The groups and fields contained - within this file are allowed to be present in any derived base class. - - If nameType="partial", the placeholders (e.g., FIELDNAME or GROUPNAME) - can be replaced by the name of any object (field or group, - respectively) that exists within the same group. - + Collection of the components of the instrument or beamline. + + Template of instrument descriptions comprising various beamline components. + Each component will also be a NeXus group defined by its distance from the + sample. Negative distances represent beamline components that are before the + sample while positive distances represent components that are after the sample. + This device allows the unique identification of beamline components in a way + that is valid for both reactor and pulsed instrumentation. + DEBUG - ===== ATTRS (//entry/instrument@NX_class) DEBUG - value: NXinstrument DEBUG - classpath: ['NXentry', 'NXinstrument'] @@ -630,7 +506,6 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT NXentry.nxdl.xml:/INSTRUMENT NXinstrument.nxdl.xml: -NXobject.nxdl.xml: DEBUG - @NX_class [NX_CHAR] DEBUG - DEBUG - ===== GROUP (//entry/instrument/analyser [NXarpes::/NXentry/NXinstrument/NXdetector]): @@ -639,8 +514,6 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser NXinstrument.nxdl.xml:/DETECTOR NXdetector.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser): DEBUG - @@ -648,21 +521,8 @@ DEBUG - documentation (NXinstrument.nxdl.xml:/DETECTOR): DEBUG - DEBUG - documentation (NXdetector.nxdl.xml:): DEBUG - - A detector, detector bank, or multidetector. - -DEBUG - documentation (NXcomponent.nxdl.xml:): -DEBUG - - Base class for components of an instrument - real ones or simulated ones. + A detector, detector bank, or multidetector. -DEBUG - documentation (NXobject.nxdl.xml:): -DEBUG - - This is the base object of NeXus. The groups and fields contained - within this file are allowed to be present in any derived base class. - - If nameType="partial", the placeholders (e.g., FIELDNAME or GROUPNAME) - can be replaced by the name of any object (field or group, - respectively) that exists within the same group. - DEBUG - ===== ATTRS (//entry/instrument/analyser@NX_class) DEBUG - value: NXdetector DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector'] @@ -670,8 +530,6 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser NXinstrument.nxdl.xml:/DETECTOR NXdetector.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - @NX_class [NX_CHAR] DEBUG - DEBUG - ===== FIELD (//entry/instrument/analyser/acquisition_mode): @@ -695,12 +553,19 @@ DEBUG - -> pulse counting DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/acquisition_mode): DEBUG - DEBUG - documentation (NXdetector.nxdl.xml:/acquisition_mode): -DEBUG - The acquisition mode of the detector. +DEBUG - + The acquisition mode of the detector. + DEBUG - ===== FIELD (//entry/instrument/analyser/amplifier_type): DEBUG - value: MCP -DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector'] -DEBUG - NOT IN SCHEMA +DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_CHAR'] +DEBUG - classes: +NXdetector.nxdl.xml:/amplifier_type +DEBUG - <> +DEBUG - documentation (NXdetector.nxdl.xml:/amplifier_type): DEBUG - + Type of electron amplifier, MCP, channeltron, etc. + DEBUG - ===== FIELD (//entry/instrument/analyser/angles): DEBUG - value: [-1.96735314 -1.91500657 -1.86266001 -1.81031344 -1.75796688 -1.70562031 ... DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_NUMBER'] @@ -709,10 +574,10 @@ NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/angles DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/angles): DEBUG - - Angular axis of the analyser data - which dimension the axis applies to is defined - using the normal NXdata methods. - + Angular axis of the analyser data + which dimension the axis applies to is defined + using the normal NXdata methods. + DEBUG - ===== ATTRS (//entry/instrument/analyser/angles@target) DEBUG - value: /entry/instrument/analyser/angles DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_NUMBER'] @@ -742,29 +607,29 @@ DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/data): DEBUG - DEBUG - documentation (NXdetector.nxdl.xml:/data): DEBUG - - Data values from the detector. The rank and dimension ordering should follow a principle of - slowest to fastest measurement axes and may be explicitly specified in application definitions. - - Mechanical scanning of objects (e.g. sample position/angle, incident beam energy, etc) tends to be - the slowest part of an experiment and so any such scan axes should be allocated to the first dimensions - of the array. Note that in some cases it may be useful to represent a 2D set of scan points as a single - scan-axis in the data array, especially if the scan pattern doesn't fit a rectangular array nicely. - Repetition of an experiment in a time series tends to be used similar to a slow scan axis - and so will often be in the first dimension of the data array. - - The next fastest axes are typically the readout of the detector. A point detector will not add any dimensions - (as it is just a single value per scan point) to the data array, a strip detector will add one dimension, an - imaging detector will add two dimensions (e.g. X, Y axes) and detectors outputting higher dimensional data - will add the corresponding number of dimensions. Note that the detector dimensions don't necessarily have to - be written in order of the actual readout speeds - the slowest to fastest rule principle is only a guide. - - Finally, detectors that operate in a time-of-flight mode, such as a neutron spectrometer or a silicon drift - detector (used for X-ray fluorescence) tend to have their dimension(s) added to the last dimensions in the data array. - - The type of each dimension should should follow the order of scan points, detector pixels, - then time-of-flight (i.e. spectroscopy, spectrometry). The rank and dimension sizes (see symbol list) - shown here are merely illustrative of coordination between related datasets. - + Data values from the detector. The rank and dimension ordering should follow a principle of + slowest to fastest measurement axes and may be explicitly specified in application definitions. + + Mechanical scanning of objects (e.g. sample position/angle, incident beam energy, etc) tends to be + the slowest part of an experiment and so any such scan axes should be allocated to the first dimensions + of the array. Note that in some cases it may be useful to represent a 2D set of scan points as a single + scan-axis in the data array, especially if the scan pattern doesn't fit a rectangular array nicely. + Repetition of an experiment in a time series tends to be used similar to a slow scan axis + and so will often be in the first dimension of the data array. + + The next fastest axes are typically the readout of the detector. A point detector will not add any dimensions + (as it is just a single value per scan point) to the data array, a strip detector will add one dimension, an + imaging detector will add two dimensions (e.g. X, Y axes) and detectors outputting higher dimensional data + will add the corresponding number of dimensions. Note that the detector dimensions don't necessarily have to + be written in order of the actual readout speeds - the slowest to fastest rule principle is only a guide. + + Finally, detectors that operate in a time-of-flight mode, such as a neutron spectrometer or a silicon drift + detector (used for X-ray fluorescence) tend to have their dimension(s) added to the last dimensions in the data array. + + The type of each dimension should should follow the order of scan points, detector pixels, + then time-of-flight (i.e. spectroscopy, spectrometry). The rank and dimension sizes (see symbol list) + shown here are merely illustrative of coordination between related datasets. + DEBUG - ===== ATTRS (//entry/instrument/analyser/data@target) DEBUG - value: /entry/instrument/analyser/data DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_NUMBER'] @@ -798,9 +663,14 @@ DEBUG - NOT IN SCHEMA DEBUG - DEBUG - ===== FIELD (//entry/instrument/analyser/detector_type): DEBUG - value: DLD -DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector'] -DEBUG - NOT IN SCHEMA +DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_CHAR'] +DEBUG - classes: +NXdetector.nxdl.xml:/detector_type +DEBUG - <> +DEBUG - documentation (NXdetector.nxdl.xml:/detector_type): DEBUG - + Description of the detector type, DLD, Phosphor+CCD, CMOS. + DEBUG - ===== FIELD (//entry/instrument/analyser/dispersion_scheme): DEBUG - value: Time of flight DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector'] @@ -814,10 +684,10 @@ NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/energies DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/energies): DEBUG - - Energy axis of the analyser data - which dimension the axis applies to is defined - using the normal NXdata methods. - + Energy axis of the analyser data + which dimension the axis applies to is defined + using the normal NXdata methods. + DEBUG - ===== ATTRS (//entry/instrument/analyser/energies@target) DEBUG - value: /entry/instrument/analyser/energies DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_NUMBER'] @@ -838,7 +708,9 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/entrance_slit_setting DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/entrance_slit_setting): -DEBUG - dial setting of the entrance slit +DEBUG - + dial setting of the entrance slit + DEBUG - ===== FIELD (//entry/instrument/analyser/entrance_slit_shape): DEBUG - value: straight DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_CHAR'] @@ -857,7 +729,9 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/entrance_slit_size DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/entrance_slit_size): -DEBUG - size of the entrance slit +DEBUG - + size of the entrance slit + DEBUG - ===== ATTRS (//entry/instrument/analyser/entrance_slit_size@units) DEBUG - value: um DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_NUMBER'] @@ -901,7 +775,9 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/lens_mode DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/lens_mode): -DEBUG - setting for the electron analyser lens +DEBUG - + setting for the electron analyser lens + DEBUG - ===== FIELD (//entry/instrument/analyser/magnification): DEBUG - value: -1.5 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector'] @@ -914,7 +790,9 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/pass_energy DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/pass_energy): -DEBUG - energy of the electrons on the mean path of the analyser +DEBUG - + energy of the electrons on the mean path of the analyser + DEBUG - ===== ATTRS (//entry/instrument/analyser/pass_energy@units) DEBUG - value: eV DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_NUMBER'] @@ -933,7 +811,9 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/region_origin DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/region_origin): -DEBUG - origin of rectangular region selected for readout +DEBUG - + origin of rectangular region selected for readout + DEBUG - ===== FIELD (//entry/instrument/analyser/region_size): DEBUG - value: [ 80 146] DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_INT'] @@ -941,12 +821,19 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/region_size DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/region_size): -DEBUG - size of rectangular region selected for readout +DEBUG - + size of rectangular region selected for readout + DEBUG - ===== FIELD (//entry/instrument/analyser/sensor_count): DEBUG - value: 4 -DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector'] -DEBUG - NOT IN SCHEMA +DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_INT'] +DEBUG - classes: +NXdetector.nxdl.xml:/sensor_count +DEBUG - <> +DEBUG - documentation (NXdetector.nxdl.xml:/sensor_count): DEBUG - + Number of imaging sensor chips on the detector. + DEBUG - ===== FIELD (//entry/instrument/analyser/sensor_size): DEBUG - value: [ 80 146] DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_INT'] @@ -954,7 +841,9 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/sensor_size DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/sensor_size): -DEBUG - number of raw active elements in each dimension +DEBUG - + number of raw active elements in each dimension + DEBUG - ===== FIELD (//entry/instrument/analyser/time_per_channel): DEBUG - value: 7200 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_NUMBER'] @@ -962,7 +851,9 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/time_per_channel DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/time_per_channel): -DEBUG - todo: define more clearly +DEBUG - + todo: define more clearly + DEBUG - ===== ATTRS (//entry/instrument/analyser/time_per_channel@units) DEBUG - value: s DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_NUMBER'] @@ -984,44 +875,33 @@ DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam'] DEBUG - classes: NXinstrument.nxdl.xml:/BEAM NXbeam.nxdl.xml: -NXobject.nxdl.xml: DEBUG - <> DEBUG - documentation (NXinstrument.nxdl.xml:/BEAM): DEBUG - DEBUG - documentation (NXbeam.nxdl.xml:): DEBUG - - Properties of the neutron or X-ray beam at a given location. - - This group is intended to be referenced - by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is - especially valuable in storing the results of instrument simulations in which it is useful - to specify the beam profile, time distribution etc. at each beamline component. Otherwise, - its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron - scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is - considered as a beamline component and this group may be defined as a subgroup directly inside - :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an - :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample). - - Note that ``incident_wavelength``, ``incident_energy``, and related fields can be a scalar values or arrays, depending on the use case. - To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred - by the presence of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam. - -DEBUG - documentation (NXobject.nxdl.xml:): -DEBUG - - This is the base object of NeXus. The groups and fields contained - within this file are allowed to be present in any derived base class. - - If nameType="partial", the placeholders (e.g., FIELDNAME or GROUPNAME) - can be replaced by the name of any object (field or group, - respectively) that exists within the same group. - + Properties of the neutron or X-ray beam at a given location. + + This group is intended to be referenced + by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is + especially valuable in storing the results of instrument simulations in which it is useful + to specify the beam profile, time distribution etc. at each beamline component. Otherwise, + its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron + scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is + considered as a beamline component and this group may be defined as a subgroup directly inside + :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an + :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample). + + Note that incident_wavelength and related fields can be a scalar values or arrays, depending on the use case. + To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred + by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam. + DEBUG - ===== ATTRS (//entry/instrument/beam_probe_0@NX_class) DEBUG - value: NXbeam DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam'] DEBUG - classes: NXinstrument.nxdl.xml:/BEAM NXbeam.nxdl.xml: -NXobject.nxdl.xml: DEBUG - @NX_class [NX_CHAR] DEBUG - DEBUG - ===== FIELD (//entry/instrument/beam_probe_0/distance): @@ -1031,7 +911,9 @@ DEBUG - classes: NXbeam.nxdl.xml:/distance DEBUG - <> DEBUG - documentation (NXbeam.nxdl.xml:/distance): -DEBUG - Distance from sample. Note, it is recommended to use NXtransformations instead. +DEBUG - + Distance from sample. Note, it is recommended to use NXtransformations instead. + DEBUG - ===== ATTRS (//entry/instrument/beam_probe_0/distance@units) DEBUG - value: cm DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT'] @@ -1071,7 +953,7 @@ NXbeam.nxdl.xml:/pulse_duration DEBUG - <> DEBUG - documentation (NXbeam.nxdl.xml:/pulse_duration): DEBUG - - FWHM duration of the pulses at the given location. + FWHM duration of the pulses at the diagnostic point DEBUG - ===== ATTRS (//entry/instrument/beam_probe_0/pulse_duration@units) DEBUG - value: fs @@ -1104,44 +986,33 @@ DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam'] DEBUG - classes: NXinstrument.nxdl.xml:/BEAM NXbeam.nxdl.xml: -NXobject.nxdl.xml: DEBUG - <> DEBUG - documentation (NXinstrument.nxdl.xml:/BEAM): DEBUG - DEBUG - documentation (NXbeam.nxdl.xml:): DEBUG - - Properties of the neutron or X-ray beam at a given location. - - This group is intended to be referenced - by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is - especially valuable in storing the results of instrument simulations in which it is useful - to specify the beam profile, time distribution etc. at each beamline component. Otherwise, - its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron - scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is - considered as a beamline component and this group may be defined as a subgroup directly inside - :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an - :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample). - - Note that ``incident_wavelength``, ``incident_energy``, and related fields can be a scalar values or arrays, depending on the use case. - To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred - by the presence of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam. - -DEBUG - documentation (NXobject.nxdl.xml:): -DEBUG - - This is the base object of NeXus. The groups and fields contained - within this file are allowed to be present in any derived base class. - - If nameType="partial", the placeholders (e.g., FIELDNAME or GROUPNAME) - can be replaced by the name of any object (field or group, - respectively) that exists within the same group. - + Properties of the neutron or X-ray beam at a given location. + + This group is intended to be referenced + by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is + especially valuable in storing the results of instrument simulations in which it is useful + to specify the beam profile, time distribution etc. at each beamline component. Otherwise, + its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron + scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is + considered as a beamline component and this group may be defined as a subgroup directly inside + :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an + :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample). + + Note that incident_wavelength and related fields can be a scalar values or arrays, depending on the use case. + To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred + by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam. + DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0@NX_class) DEBUG - value: NXbeam DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam'] DEBUG - classes: NXinstrument.nxdl.xml:/BEAM NXbeam.nxdl.xml: -NXobject.nxdl.xml: DEBUG - @NX_class [NX_CHAR] DEBUG - DEBUG - ===== FIELD (//entry/instrument/beam_pump_0/average_power): @@ -1152,7 +1023,7 @@ NXbeam.nxdl.xml:/average_power DEBUG - <> DEBUG - documentation (NXbeam.nxdl.xml:/average_power): DEBUG - - Average power at the at the given location. + Average power at the diagnostic point DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0/average_power@units) DEBUG - value: uW @@ -1177,7 +1048,9 @@ DEBUG - classes: NXbeam.nxdl.xml:/distance DEBUG - <> DEBUG - documentation (NXbeam.nxdl.xml:/distance): -DEBUG - Distance from sample. Note, it is recommended to use NXtransformations instead. +DEBUG - + Distance from sample. Note, it is recommended to use NXtransformations instead. + DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0/distance@units) DEBUG - value: cm DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT'] @@ -1192,14 +1065,14 @@ NXbeam.nxdl.xml:/fluence DEBUG - <> DEBUG - documentation (NXbeam.nxdl.xml:/fluence): DEBUG - - Incident energy fluence at the given location. + Incident fluence at the diagnostic point DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0/fluence@units) DEBUG - value: mJ/cm^2 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT'] DEBUG - classes: NXbeam.nxdl.xml:/fluence -DEBUG - NXbeam.nxdl.xml:/fluence@units [mJ/cm^2] +DEBUG - NXbeam.nxdl.xml:/fluence@units [NX_ANY] DEBUG - ===== FIELD (//entry/instrument/beam_pump_0/photon_energy): DEBUG - value: 1.55 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam'] @@ -1233,7 +1106,7 @@ NXbeam.nxdl.xml:/pulse_duration DEBUG - <> DEBUG - documentation (NXbeam.nxdl.xml:/pulse_duration): DEBUG - - FWHM duration of the pulses at the given location. + FWHM duration of the pulses at the diagnostic point DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0/pulse_duration@units) DEBUG - value: fs @@ -1249,7 +1122,7 @@ NXbeam.nxdl.xml:/pulse_energy DEBUG - <> DEBUG - documentation (NXbeam.nxdl.xml:/pulse_energy): DEBUG - - Energy of a single pulse at the given location. + Energy of a single pulse at the diagnostic point DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0/pulse_energy@units) DEBUG - value: nJ @@ -1292,36 +1165,19 @@ DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXpositioner'] DEBUG - classes: NXinstrument.nxdl.xml:/POSITIONER NXpositioner.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - <> DEBUG - documentation (NXinstrument.nxdl.xml:/POSITIONER): DEBUG - DEBUG - documentation (NXpositioner.nxdl.xml:): DEBUG - - A generic positioner such as a motor or piezo-electric transducer. - -DEBUG - documentation (NXcomponent.nxdl.xml:): -DEBUG - - Base class for components of an instrument - real ones or simulated ones. + A generic positioner such as a motor or piezo-electric transducer. -DEBUG - documentation (NXobject.nxdl.xml:): -DEBUG - - This is the base object of NeXus. The groups and fields contained - within this file are allowed to be present in any derived base class. - - If nameType="partial", the placeholders (e.g., FIELDNAME or GROUPNAME) - can be replaced by the name of any object (field or group, - respectively) that exists within the same group. - DEBUG - ===== ATTRS (//entry/instrument/manipulator@NX_class) DEBUG - value: NXpositioner DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXpositioner'] DEBUG - classes: NXinstrument.nxdl.xml:/POSITIONER NXpositioner.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - @NX_class [NX_CHAR] DEBUG - DEBUG - ===== FIELD (//entry/instrument/manipulator/pos_x1): @@ -1425,8 +1281,6 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/monochromator NXinstrument.nxdl.xml:/MONOCHROMATOR NXmonochromator.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/monochromator): DEBUG - @@ -1434,32 +1288,18 @@ DEBUG - documentation (NXinstrument.nxdl.xml:/MONOCHROMATOR): DEBUG - DEBUG - documentation (NXmonochromator.nxdl.xml:): DEBUG - - A wavelength defining device. - - This is a base class for everything which - selects a wavelength or energy, be it a - monochromator crystal, a velocity selector, - an undulator or whatever. - - The expected units are: - - * wavelength: angstrom - * energy: eV - - -DEBUG - documentation (NXcomponent.nxdl.xml:): -DEBUG - - Base class for components of an instrument - real ones or simulated ones. + A wavelength defining device. + + This is a base class for everything which + selects a wavelength or energy, be it a + monochromator crystal, a velocity selector, + an undulator or whatever. + + The expected units are: + + * wavelength: angstrom + * energy: eV -DEBUG - documentation (NXobject.nxdl.xml:): -DEBUG - - This is the base object of NeXus. The groups and fields contained - within this file are allowed to be present in any derived base class. - - If nameType="partial", the placeholders (e.g., FIELDNAME or GROUPNAME) - can be replaced by the name of any object (field or group, - respectively) that exists within the same group. - DEBUG - ===== ATTRS (//entry/instrument/monochromator@NX_class) DEBUG - value: NXmonochromator DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXmonochromator'] @@ -1467,8 +1307,6 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/monochromator NXinstrument.nxdl.xml:/MONOCHROMATOR NXmonochromator.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - @NX_class [NX_CHAR] DEBUG - DEBUG - ===== FIELD (//entry/instrument/monochromator/energy): @@ -1493,7 +1331,9 @@ NXmonochromator.nxdl.xml:/energy_error DEBUG - <> DEBUG - DEPRECATED - see https://github.com/nexusformat/definitions/issues/820 DEBUG - documentation (NXmonochromator.nxdl.xml:/energy_error): -DEBUG - energy standard deviation +DEBUG - + energy standard deviation + DEBUG - ===== ATTRS (//entry/instrument/monochromator/energy_error@units) DEBUG - value: eV DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXmonochromator', 'NX_FLOAT'] @@ -1526,15 +1366,15 @@ DEBUG - classes: NXinstrument.nxdl.xml:/name DEBUG - <> DEBUG - documentation (NXinstrument.nxdl.xml:/name): -DEBUG - Name of instrument +DEBUG - + Name of instrument + DEBUG - ===== GROUP (//entry/instrument/source [NXarpes::/NXentry/NXinstrument/NXsource]): DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE NXinstrument.nxdl.xml:/SOURCE NXsource.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE): DEBUG - @@ -1542,24 +1382,11 @@ DEBUG - documentation (NXinstrument.nxdl.xml:/SOURCE): DEBUG - DEBUG - documentation (NXsource.nxdl.xml:): DEBUG - - Radiation source emitting a beam. - - Examples include particle sources (electrons, neutrons, protons) or sources for electromagnetic radiation (photons). - This base class can also be used to describe neutron or x-ray storage ring/facilities. - -DEBUG - documentation (NXcomponent.nxdl.xml:): -DEBUG - - Base class for components of an instrument - real ones or simulated ones. + Radiation source emitting a beam. + + Examples include particle sources (electrons, neutrons, protons) or sources for electromagnetic radiation (photons). + This base class can also be used to describe neutron or x-ray storage ring/facilities. -DEBUG - documentation (NXobject.nxdl.xml:): -DEBUG - - This is the base object of NeXus. The groups and fields contained - within this file are allowed to be present in any derived base class. - - If nameType="partial", the placeholders (e.g., FIELDNAME or GROUPNAME) - can be replaced by the name of any object (field or group, - respectively) that exists within the same group. - DEBUG - ===== ATTRS (//entry/instrument/source@NX_class) DEBUG - value: NXsource DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource'] @@ -1567,8 +1394,6 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE NXinstrument.nxdl.xml:/SOURCE NXsource.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - @NX_class [NX_CHAR] DEBUG - DEBUG - ===== FIELD (//entry/instrument/source/bunch_distance): @@ -1578,7 +1403,9 @@ DEBUG - classes: NXsource.nxdl.xml:/bunch_distance DEBUG - <> DEBUG - documentation (NXsource.nxdl.xml:/bunch_distance): -DEBUG - For storage rings, time between bunches +DEBUG - + For storage rings, time between bunches + DEBUG - ===== ATTRS (//entry/instrument/source/bunch_distance@units) DEBUG - value: us DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT'] @@ -1592,7 +1419,9 @@ DEBUG - classes: NXsource.nxdl.xml:/bunch_length DEBUG - <> DEBUG - documentation (NXsource.nxdl.xml:/bunch_length): -DEBUG - For storage rings, temporal length of the bunch +DEBUG - + For storage rings, temporal length of the bunch + DEBUG - ===== ATTRS (//entry/instrument/source/bunch_length@units) DEBUG - value: fs DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT'] @@ -1636,7 +1465,9 @@ DEBUG - classes: NXsource.nxdl.xml:/current DEBUG - <> DEBUG - documentation (NXsource.nxdl.xml:/current): -DEBUG - Accelerator, X-ray tube, or storage ring current +DEBUG - + Accelerator, X-ray tube, or storage ring current + DEBUG - ===== ATTRS (//entry/instrument/source/current@units) DEBUG - value: uA DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT'] @@ -1651,10 +1482,10 @@ NXsource.nxdl.xml:/energy DEBUG - <> DEBUG - documentation (NXsource.nxdl.xml:/energy): DEBUG - - Source energy. Typically, this would be the energy of - the emitted beam. For storage rings, this would be - the particle beam energy. - + Source energy. Typically, this would be the energy of + the emitted beam. For storage rings, this would be + the particle beam energy. + DEBUG - ===== ATTRS (//entry/instrument/source/energy@units) DEBUG - value: MeV DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT'] @@ -1668,7 +1499,9 @@ DEBUG - classes: NXsource.nxdl.xml:/frequency DEBUG - <> DEBUG - documentation (NXsource.nxdl.xml:/frequency): -DEBUG - Frequency of pulsed source +DEBUG - + Frequency of pulsed source + DEBUG - ===== ATTRS (//entry/instrument/source/frequency@units) DEBUG - value: Hz DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT'] @@ -1685,22 +1518,21 @@ DEBUG - enumeration (NXsource.nxdl.xml:/mode): DEBUG - -> Single Bunch DEBUG - -> Multi Bunch DEBUG - documentation (NXsource.nxdl.xml:/mode): -DEBUG - source operating mode +DEBUG - + source operating mode + DEBUG - ===== FIELD (//entry/instrument/source/name): DEBUG - value: FLASH DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/name NXsource.nxdl.xml:/name -NXcomponent.nxdl.xml:/name DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/name): DEBUG - DEBUG - documentation (NXsource.nxdl.xml:/name): -DEBUG - Name of source -DEBUG - documentation (NXcomponent.nxdl.xml:/name): DEBUG - - Name of the component. + Name of source DEBUG - ===== FIELD (//entry/instrument/source/number_of_bunches): DEBUG - value: 500 @@ -1709,7 +1541,9 @@ DEBUG - classes: NXsource.nxdl.xml:/number_of_bunches DEBUG - <> DEBUG - documentation (NXsource.nxdl.xml:/number_of_bunches): -DEBUG - For storage rings, the number of bunches in use. +DEBUG - + For storage rings, the number of bunches in use. + DEBUG - ===== FIELD (//entry/instrument/source/number_of_bursts): DEBUG - value: 1 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource'] @@ -1737,7 +1571,9 @@ DEBUG - -> proton DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/probe): DEBUG - DEBUG - documentation (NXsource.nxdl.xml:/probe): -DEBUG - type of radiation probe (pick one from the enumerated list and spell exactly) +DEBUG - + type of radiation probe (pick one from the enumerated list and spell exactly) + DEBUG - ===== FIELD (//entry/instrument/source/top_up): DEBUG - value: True DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_BOOLEAN'] @@ -1745,7 +1581,9 @@ DEBUG - classes: NXsource.nxdl.xml:/top_up DEBUG - <> DEBUG - documentation (NXsource.nxdl.xml:/top_up): -DEBUG - Is the synchrotron operating in top_up mode? +DEBUG - + Is the synchrotron operating in top_up mode? + DEBUG - ===== FIELD (//entry/instrument/source/type): DEBUG - value: Free Electron Laser DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR'] @@ -1768,26 +1606,27 @@ DEBUG - -> Ion Source DEBUG - -> UV Plasma Source DEBUG - -> Metal Jet X-ray DEBUG - -> Laser -DEBUG - -> Dye Laser +DEBUG - -> Dye-Laser DEBUG - -> Broadband Tunable Light Source -DEBUG - -> Halogen Lamp +DEBUG - -> Halogen lamp DEBUG - -> LED -DEBUG - -> Mercury Cadmium Telluride Lamp +DEBUG - -> Mercury Cadmium Telluride DEBUG - -> Deuterium Lamp DEBUG - -> Xenon Lamp DEBUG - -> Globar +DEBUG - -> other DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/type): DEBUG - DEBUG - documentation (NXsource.nxdl.xml:/type): -DEBUG - type of radiation source (pick one from the enumerated list and spell exactly) +DEBUG - + type of radiation source (pick one from the enumerated list and spell exactly) + DEBUG - ===== GROUP (//entry/instrument/source_pump [NXarpes::/NXentry/NXinstrument/NXsource]): DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE NXinstrument.nxdl.xml:/SOURCE NXsource.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE): DEBUG - @@ -1795,24 +1634,11 @@ DEBUG - documentation (NXinstrument.nxdl.xml:/SOURCE): DEBUG - DEBUG - documentation (NXsource.nxdl.xml:): DEBUG - - Radiation source emitting a beam. - - Examples include particle sources (electrons, neutrons, protons) or sources for electromagnetic radiation (photons). - This base class can also be used to describe neutron or x-ray storage ring/facilities. - -DEBUG - documentation (NXcomponent.nxdl.xml:): -DEBUG - - Base class for components of an instrument - real ones or simulated ones. + Radiation source emitting a beam. + + Examples include particle sources (electrons, neutrons, protons) or sources for electromagnetic radiation (photons). + This base class can also be used to describe neutron or x-ray storage ring/facilities. -DEBUG - documentation (NXobject.nxdl.xml:): -DEBUG - - This is the base object of NeXus. The groups and fields contained - within this file are allowed to be present in any derived base class. - - If nameType="partial", the placeholders (e.g., FIELDNAME or GROUPNAME) - can be replaced by the name of any object (field or group, - respectively) that exists within the same group. - DEBUG - ===== ATTRS (//entry/instrument/source_pump@NX_class) DEBUG - value: NXsource DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource'] @@ -1820,8 +1646,6 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE NXinstrument.nxdl.xml:/SOURCE NXsource.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - @NX_class [NX_CHAR] DEBUG - DEBUG - ===== FIELD (//entry/instrument/source_pump/bunch_distance): @@ -1831,7 +1655,9 @@ DEBUG - classes: NXsource.nxdl.xml:/bunch_distance DEBUG - <> DEBUG - documentation (NXsource.nxdl.xml:/bunch_distance): -DEBUG - For storage rings, time between bunches +DEBUG - + For storage rings, time between bunches + DEBUG - ===== ATTRS (//entry/instrument/source_pump/bunch_distance@units) DEBUG - value: us DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT'] @@ -1845,7 +1671,9 @@ DEBUG - classes: NXsource.nxdl.xml:/bunch_length DEBUG - <> DEBUG - documentation (NXsource.nxdl.xml:/bunch_length): -DEBUG - For storage rings, temporal length of the bunch +DEBUG - + For storage rings, temporal length of the bunch + DEBUG - ===== ATTRS (//entry/instrument/source_pump/bunch_length@units) DEBUG - value: fs DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT'] @@ -1879,7 +1707,9 @@ DEBUG - classes: NXsource.nxdl.xml:/frequency DEBUG - <> DEBUG - documentation (NXsource.nxdl.xml:/frequency): -DEBUG - Frequency of pulsed source +DEBUG - + Frequency of pulsed source + DEBUG - ===== ATTRS (//entry/instrument/source_pump/frequency@units) DEBUG - value: Hz DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT'] @@ -1896,22 +1726,21 @@ DEBUG - enumeration (NXsource.nxdl.xml:/mode): DEBUG - -> Single Bunch DEBUG - -> Multi Bunch DEBUG - documentation (NXsource.nxdl.xml:/mode): -DEBUG - source operating mode +DEBUG - + source operating mode + DEBUG - ===== FIELD (//entry/instrument/source_pump/name): DEBUG - value: User Laser @ FLASH DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/name NXsource.nxdl.xml:/name -NXcomponent.nxdl.xml:/name DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/name): DEBUG - DEBUG - documentation (NXsource.nxdl.xml:/name): -DEBUG - Name of source -DEBUG - documentation (NXcomponent.nxdl.xml:/name): DEBUG - - Name of the component. + Name of source DEBUG - ===== FIELD (//entry/instrument/source_pump/number_of_bunches): DEBUG - value: 400 @@ -1920,7 +1749,9 @@ DEBUG - classes: NXsource.nxdl.xml:/number_of_bunches DEBUG - <> DEBUG - documentation (NXsource.nxdl.xml:/number_of_bunches): -DEBUG - For storage rings, the number of bunches in use. +DEBUG - + For storage rings, the number of bunches in use. + DEBUG - ===== FIELD (//entry/instrument/source_pump/number_of_bursts): DEBUG - value: 1 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource'] @@ -1948,7 +1779,9 @@ DEBUG - -> proton DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/probe): DEBUG - DEBUG - documentation (NXsource.nxdl.xml:/probe): -DEBUG - type of radiation probe (pick one from the enumerated list and spell exactly) +DEBUG - + type of radiation probe (pick one from the enumerated list and spell exactly) + DEBUG - ===== FIELD (//entry/instrument/source_pump/rms_jitter): DEBUG - value: 204.68816194453154 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource'] @@ -1981,18 +1814,21 @@ DEBUG - -> Ion Source DEBUG - -> UV Plasma Source DEBUG - -> Metal Jet X-ray DEBUG - -> Laser -DEBUG - -> Dye Laser +DEBUG - -> Dye-Laser DEBUG - -> Broadband Tunable Light Source -DEBUG - -> Halogen Lamp +DEBUG - -> Halogen lamp DEBUG - -> LED -DEBUG - -> Mercury Cadmium Telluride Lamp +DEBUG - -> Mercury Cadmium Telluride DEBUG - -> Deuterium Lamp DEBUG - -> Xenon Lamp DEBUG - -> Globar +DEBUG - -> other DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/type): DEBUG - DEBUG - documentation (NXsource.nxdl.xml:/type): -DEBUG - type of radiation source (pick one from the enumerated list and spell exactly) +DEBUG - + type of radiation source (pick one from the enumerated list and spell exactly) + DEBUG - ===== FIELD (//entry/instrument/spatial_resolution): DEBUG - value: 500 DEBUG - classpath: ['NXentry', 'NXinstrument'] @@ -2020,15 +1856,15 @@ DEBUG - classes: NXentry.nxdl.xml:/run_cycle DEBUG - <> DEBUG - documentation (NXentry.nxdl.xml:/run_cycle): -DEBUG - Such as "2007-3". Some user facilities organize their beam time into run cycles. +DEBUG - + Such as "2007-3". Some user facilities organize their beam time into run cycles. + DEBUG - ===== GROUP (//entry/sample [NXarpes::/NXentry/NXsample]): DEBUG - classpath: ['NXentry', 'NXsample'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/SAMPLE NXentry.nxdl.xml:/SAMPLE NXsample.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/SAMPLE): DEBUG - @@ -2036,25 +1872,12 @@ DEBUG - documentation (NXentry.nxdl.xml:/SAMPLE): DEBUG - DEBUG - documentation (NXsample.nxdl.xml:): DEBUG - - Any information on the sample. - - This could include scanned variables that - are associated with one of the data dimensions, e.g. the magnetic field, or - logged data, e.g. monitored temperature vs elapsed time. - -DEBUG - documentation (NXcomponent.nxdl.xml:): -DEBUG - - Base class for components of an instrument - real ones or simulated ones. + Any information on the sample. + + This could include scanned variables that + are associated with one of the data dimensions, e.g. the magnetic field, or + logged data, e.g. monitored temperature vs elapsed time. -DEBUG - documentation (NXobject.nxdl.xml:): -DEBUG - - This is the base object of NeXus. The groups and fields contained - within this file are allowed to be present in any derived base class. - - If nameType="partial", the placeholders (e.g., FIELDNAME or GROUPNAME) - can be replaced by the name of any object (field or group, - respectively) that exists within the same group. - DEBUG - ===== ATTRS (//entry/sample@NX_class) DEBUG - value: NXsample DEBUG - classpath: ['NXentry', 'NXsample'] @@ -2062,8 +1885,6 @@ DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/SAMPLE NXentry.nxdl.xml:/SAMPLE NXsample.nxdl.xml: -NXcomponent.nxdl.xml: -NXobject.nxdl.xml: DEBUG - @NX_class [NX_CHAR] DEBUG - DEBUG - ===== FIELD (//entry/sample/bias): @@ -2107,15 +1928,14 @@ DEBUG - classpath: ['NXentry', 'NXsample', 'NX_CHAR'] DEBUG - classes: NXarpes.nxdl.xml:/ENTRY/SAMPLE/name NXsample.nxdl.xml:/name -NXcomponent.nxdl.xml:/name DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/SAMPLE/name): -DEBUG - Descriptive name of sample +DEBUG - + Descriptive name of sample + DEBUG - documentation (NXsample.nxdl.xml:/name): -DEBUG - Descriptive name of sample -DEBUG - documentation (NXcomponent.nxdl.xml:/name): DEBUG - - Name of the component. + Descriptive name of sample DEBUG - ===== FIELD (//entry/sample/preparation_method): DEBUG - value: in-vacuum cleave @@ -2129,7 +1949,9 @@ DEBUG - classes: NXsample.nxdl.xml:/pressure DEBUG - <> DEBUG - documentation (NXsample.nxdl.xml:/pressure): -DEBUG - Applied pressure +DEBUG - + Applied pressure + DEBUG - ===== ATTRS (//entry/sample/pressure@units) DEBUG - value: mbar DEBUG - classpath: ['NXentry', 'NXsample', 'NX_FLOAT'] @@ -2179,7 +2001,9 @@ DEBUG - classes: NXsample.nxdl.xml:/thickness DEBUG - <> DEBUG - documentation (NXsample.nxdl.xml:/thickness): -DEBUG - sample thickness +DEBUG - + sample thickness + DEBUG - ===== ATTRS (//entry/sample/thickness@units) DEBUG - value: mm DEBUG - classpath: ['NXentry', 'NXsample', 'NX_FLOAT'] @@ -2201,7 +2025,9 @@ DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/start_time): DEBUG - DEBUG - documentation (NXentry.nxdl.xml:/start_time): -DEBUG - Starting time of measurement +DEBUG - + Starting time of measurement + DEBUG - ===== FIELD (//entry/title): DEBUG - value: Excited-state dynamics of WSe2 in the Valence Band and Core-Levels DEBUG - classpath: ['NXentry', 'NX_CHAR'] @@ -2212,7 +2038,9 @@ DEBUG - <> DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/title): DEBUG - DEBUG - documentation (NXentry.nxdl.xml:/title): -DEBUG - Extended title for entry +DEBUG - + Extended title for entry + DEBUG - ======================== DEBUG - === Default Plotable === DEBUG - ======================== diff --git a/tests/nexus/test_nexus.py b/tests/nexus/test_nexus.py index f2e341a1c..2069f4dd1 100644 --- a/tests/nexus/test_nexus.py +++ b/tests/nexus/test_nexus.py @@ -297,18 +297,18 @@ def test_get_inherited_nodes(): (_, _, elist) = get_inherited_nodes( nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT", elem=elem ) - assert len(elist) == 4 + assert len(elist) == 3 (_, _, elist) = get_inherited_nodes( nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller", elem=elem ) - assert len(elist) == 6 + assert len(elist) == 4 (_, _, elist) = get_inherited_nodes( nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller", nx_name="NXiv_temp", ) - assert len(elist) == 6 + assert len(elist) == 4 def test_c_option(tmp_path): From 88451d7234a8abe1f0cfcee77de06fa35b19bc6b Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 13 Mar 2025 17:49:56 +0100 Subject: [PATCH 45/61] update definitions to ref --- src/pynxtools/definitions | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index 6e3134567..f7ba53f4f 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit 6e3134567d715198a03499d2ff1811389b2e6462 +Subproject commit f7ba53f4fb409b03fde6af6ccf29146392a2c142 From 3cc7a01cc4819fb1b7a703ea03bea642083443e9 Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 13 Mar 2025 18:26:18 +0100 Subject: [PATCH 46/61] update and fix merge errors, add test --- src/pynxtools/dataconverter/validation.py | 2 +- tests/dataconverter/test_validation.py | 23 +++++++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index f2f592033..d8b937933 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -521,7 +521,7 @@ def add_best_matches_for(key: str, node: NexusNode) -> Optional[NexusNode]: node = node.search_add_child_for(best_name) if not good_name_fit: - return False + return None return node diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index b5a309131..e15777609 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -90,9 +90,9 @@ def listify_template(data_dict: Template): TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = ( "nm" # pylint: disable=E1126 ) -TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value_no_attr"] = ( - 2.0, -) +TEMPLATE["optional"][ + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/DATA[float_value_no_attr]" +] = (2.0,) TEMPLATE["optional"]["/ENTRY[my_entry]/optional_parent/required_child"] = 1 # pylint: disable=E1126 TEMPLATE["optional"]["/ENTRY[my_entry]/optional_parent/optional_child"] = 1 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value"] = True # pylint: disable=E1126 @@ -508,7 +508,7 @@ def listify_template(data_dict: Template): pytest.param( remove_from_dict( TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value_no_attr", + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/DATA[float_value_no_attr]", "optional", ), "", @@ -775,10 +775,21 @@ def listify_template(data_dict: Template): "s", ), ( - "The unit, /ENTRY[my_entry]/optional_parent/required_child/@units = s, is being written but has no documentation." + "The unit, /ENTRY[my_entry]/optional_parent/required_child/@units = s written without documentation." ), id="field-with-illegal-unit", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/ILLEGAL[my_source]/type", + 1, + ), + ( + "Field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/ILLEGAL[my_source]/type written without documentation." + ), + id="bad-namefitting", + ), pytest.param( alter_dict( TEMPLATE, @@ -894,7 +905,7 @@ def listify_template(data_dict: Template): "s", ), ( - "The unit, /ENTRY[my_entry]/required_group/author/@units = s, is being written but has no documentation." + "The unit, /ENTRY[my_entry]/required_group/author/@units = s written without documentation." ), id="baseclass-field-with-illegal-unit", ), From 7de7d5aaa1d529d92993733c9fe003b9069b022b Mon Sep 17 00:00:00 2001 From: Laurenz Rettig Date: Thu, 13 Mar 2025 22:36:20 +0000 Subject: [PATCH 47/61] revert nomad example test - open enums not working yet --- tests/nomad/test_parsing.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py index 8db054bcd..e050448a7 100644 --- a/tests/nomad/test_parsing.py +++ b/tests/nomad/test_parsing.py @@ -57,25 +57,24 @@ def test_nexus_example(): ) # good ENUM - x-ray assert instrument.SOURCE[0].probe__field == "x-ray" - # wrong inherited ENUM - Burst (accepted for open enum) - assert instrument.SOURCE[0].mode__field == "Burst" - # wrong inherited ENUM for extended field - 'Free Electron Laser' (accepted for open enum) - assert instrument.SOURCE[0].type__field == "Free Electron Laser" + # wrong inherited ENUM - Burst + assert instrument.SOURCE[0].mode__field is None + # wrong inherited ENUM for extended field - 'Free Electron Laser' + assert instrument.SOURCE[0].type__field is None data = arpes_obj.ENTRY[0].DATA[0] assert len(data.AXISNAME__field) == 3 # there is still a bug in the variadic name resolution, so skip these # assert data.delays__field is not None # assert data.angles__field.check("1/Å") # assert data.delays__field.check("fs") - # assert data.energies__field is not None - # assert data.energies__field.check("eV") + # but the following still works + assert data.energies__field is not None + assert data.energies__field.check("eV") # manual name resolution assert data.AXISNAME__field["angles__field"] is not None assert data.AXISNAME__max["angles__max"].value == 2.168025463513032 assert (1 * data.AXISNAME__field["angles__field"].unit).check("1/Å") assert (1 * data.AXISNAME__field["delays__field"].unit).check("fs") - assert (1 * data.AXISNAME__field["energies__field"].unit).check("eV") - assert (1 * data.DATA__field["data__field"].unit).check("1") assert data.___axes == "['angles', 'energies', 'delays']" From 3f256a41911db0acc4f2ac898fa76b0c64e7e57f Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 14 Mar 2025 00:54:14 +0100 Subject: [PATCH 48/61] don't check for units if no good namefit or a group --- src/pynxtools/dataconverter/validation.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index d8b937933..1d06a8acb 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -790,12 +790,13 @@ def startswith_with_variations( else: # check that parent has units node = add_best_matches_for(not_visited_key.rsplit("/", 1)[0], tree) - if node.unit is None: - collector.collect_and_log( - not_visited_key, - ValidationProblem.UnitWithoutDocumentation, - mapping[not_visited_key], - ) + if node is not None: + if node.type != "field" or node.unit is None: + collector.collect_and_log( + not_visited_key, + ValidationProblem.UnitWithoutDocumentation, + mapping[not_visited_key], + ) # parent key will be checked on its own if it exists, because it is in the list continue From bdf8997438226c76c59cba3ee96325776d60788a Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 14 Mar 2025 09:42:39 +0100 Subject: [PATCH 49/61] allow multiple error messages and test for additional error messages --- tests/dataconverter/test_validation.py | 372 ++++++++++--------------- 1 file changed, 152 insertions(+), 220 deletions(-) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index e15777609..8e1467cb5 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -188,7 +188,7 @@ def listify_template(data_dict: Template): # pylint: disable=too-many-arguments @pytest.mark.parametrize( - "data_dict,error_message", + "data_dict,error_messages", [ pytest.param( alter_dict( @@ -196,11 +196,11 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]", "not_a_num", ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/anamethatRENAMES[anamethatichangetothis]" " should be one of the following Python types: (, ), as defined in " "the NXDL as NX_INT." - ), + ], id="variadic-field-str-instead-of-int", ), pytest.param( @@ -209,11 +209,11 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", "not_a_num", ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/in" "t_value should be one of the following Python types: (, ), as defined in " "the NXDL as NX_INT." - ), + ], id="string-instead-of-int", ), pytest.param( @@ -222,9 +222,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", "NOT_TRUE_OR_FALSE", ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value should be one of the following Python types: (, ), as defined in the NXDL as NX_BOOLEAN." - ), + ], id="string-instead-of-bool", ), pytest.param( @@ -233,10 +233,10 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", ["1", "2", "3"], ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should" " be one of the following Python types: (, ), as defined in the NXDL as NX_INT." - ), + ], id="list-of-int-str-instead-of-int", ), pytest.param( @@ -245,10 +245,10 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", np.array([2.0, 3.0, 4.0], dtype=np.float32), ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value should be" " one of the following Python types: (, ), as defined in the NXDL as NX_INT." - ), + ], id="array-of-float-instead-of-int", ), pytest.param( @@ -257,7 +257,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", [2, 3, 4], ), - (""), + [], id="list-of-int-instead-of-int", ), pytest.param( @@ -266,7 +266,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", np.array([2, 3, 4], dtype=np.int32), ), - (""), + [], id="array-of-int32-instead-of-int", ), pytest.param( @@ -275,10 +275,12 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", "2022-01-22T12:14:12.05018-00:00", ), - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value" - " = 2022-01-22T12:14:12.05018-00:00 should be a timezone aware" - " ISO8601 formatted str. For example, 2022-01-22T12:14:12.05018Z or 2022-01-22" - "T12:14:12.05018+00:00.", + [ + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value" + " = 2022-01-22T12:14:12.05018-00:00 should be a timezone aware" + " ISO8601 formatted str. For example, 2022-01-22T12:14:12.05018Z or 2022-01-22" + "T12:14:12.05018+00:00." + ], id="int-instead-of-date", ), pytest.param( @@ -287,9 +289,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", 0, ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be one of the following Python types: (, ), as defined in the NXDL as NX_FLOAT." - ), + ], id="int-instead-of-float", ), pytest.param( @@ -298,9 +300,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value", "0", ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value should be one of the following Python types: (, , , ), as defined in the NXDL as NX_NUMBER." - ), + ], id="str-instead-of-number", ), pytest.param( @@ -309,11 +311,11 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", np.array([0.0, 2]), ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one" " of the following Python types: (, ), as" " defined in the NXDL as NX_CHAR." - ), + ], id="wrong-type-ndarray-instead-of-char", ), pytest.param( @@ -322,7 +324,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", np.array(["x", "2"]), ), - (""), + [], id="valid-ndarray-instead-of-char", ), pytest.param( @@ -331,17 +333,17 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", {"link": "/a-link"}, ), - (""), + [], id="link-dict-instead-of-int", ), pytest.param( alter_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", -1 ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value " "should be a positive int, but is -1." - ), + ], id="negative-posint", ), pytest.param( @@ -350,10 +352,10 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", [-1, 2], ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value " "should be a positive int, but is [-1, 2]." - ), + ], id="negative-posint-list", ), pytest.param( @@ -362,10 +364,10 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", np.array([-1, 2], dtype=np.int8), ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value should" " be a positive int, but is [-1 2]." - ), + ], id="negative-posint-array", ), pytest.param( @@ -374,7 +376,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", [1, 2], ), - (""), + [], id="positive-posint-list", ), pytest.param( @@ -383,18 +385,18 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", np.array([1, 2], dtype=np.int8), ), - (""), + [], id="positive-posint-array", ), pytest.param( alter_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", 3 ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one of the following Python types:" " (, )," " as defined in the NXDL as NX_CHAR." - ), + ], id="int-instead-of-chars", ), pytest.param( @@ -403,7 +405,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", np.array(["1", "2", "3"], dtype=np.str_), ), - (""), + [], id="array-of-chars", ), pytest.param( @@ -412,7 +414,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", np.array(["1", "2", "3"], dtype=np.bytes_), ), - (""), + [], id="array-of-bytes-chars", ), pytest.param( @@ -421,14 +423,14 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", ["list", "of", "chars"], ), - "", + [], id="list-of-string-instead-of-chars", ), pytest.param( alter_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", None ), - "", + [], id="empty-optional-field", ), pytest.param( @@ -437,7 +439,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", np.array([2.0, 3.0, 4.0], dtype=np.float32), ), - "", + [], id="array-of-float-instead-of-float", ), pytest.param( @@ -446,9 +448,11 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", np.array(["2.0", "3.0"], dtype=np.str_), ), - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " - "one of the following Python types: (, ), as defined in the NXDL " - "as NX_FLOAT.", + [ + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " + "one of the following Python types: (, ), as defined in the NXDL " + "as NX_FLOAT." + ], id="array-of-str-instead-of-float", ), pytest.param( @@ -457,9 +461,11 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", [2], # pylint: disable=E1126 ), - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " - "one of the following Python types: (, ), as defined in the NXDL " - "as NX_FLOAT.", + [ + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " + "one of the following Python types: (, ), as defined in the NXDL " + "as NX_FLOAT." + ], id="list-of-int-instead-of-float", ), pytest.param( @@ -468,11 +474,12 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", "required", ), - ( + [ "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]" "/bool_value is" - " required and hasn't been supplied by the reader." - ), + " required and hasn't been supplied by the reader.", + "There were attributes set for the field /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value, but the field does not exist.", + ], id="empty-required-field", ), pytest.param( @@ -481,64 +488,39 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value", "required", ), - ( + [ "The data entry corresponding to /ENTRY[my_entry]/" "NXODD_name[nxodd_two_name]/bool_value is" - " required and hasn't been supplied by the reader." - ), - id="empty-required-field", - ), - pytest.param( - remove_from_dict( - remove_from_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value", - "required", - ), - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", - "required", - ), - ( - "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]" - "/bool_value is" - " required and hasn't been supplied by the reader." - ), + " required and hasn't been supplied by the reader.", + "There were attributes set for the field /ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value, but the field does not exist.", + ], id="empty-required-field", ), - pytest.param( - remove_from_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/DATA[float_value_no_attr]", - "optional", - ), - "", - id="removed-optional-value", - ), pytest.param( remove_from_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", "optional", ), - "Unit /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units in dataset without its field /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value.", - id="removed-optional-value-with-attribute-remaining", + [ + "Unit /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units in dataset without its field /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value.", + "The attribute /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units will not be written.", + ], + id="removed-optional-value-with-unit-remaining", ), pytest.param( remove_from_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", - "optional", - ), - "The attribute /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units will not be written.", - id="removed-optional-value-with-attribute-remaining", - ), - pytest.param( - remove_from_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", + "required", + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value/@units", "required", ), - "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value is required and hasn't been supplied by the reader.", + [ + "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value is required and hasn't been supplied by the reader." + ], id="missing-required-value", ), pytest.param( @@ -551,7 +533,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name", "optional", ), - ("The required group, /ENTRY[my_entry]/NXODD_name, hasn't been supplied."), + ["The required group, /ENTRY[my_entry]/NXODD_name, hasn't been supplied."], id="all-required-fields-set-to-none", ), pytest.param( @@ -560,7 +542,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", "2022-01-22T12:14:12.05018+00:00", ), - "", + [], id="UTC-with-+00:00", ), pytest.param( @@ -569,7 +551,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", "2022-01-22T12:14:12.05018Z", ), - "", + [], id="UTC-with-Z", ), pytest.param( @@ -578,10 +560,12 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", "2022-01-22T12:14:12.05018-00:00", ), - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value" - " = 2022-01-22T12:14:12.05018-00:00 should be a timezone aware" - " ISO8601 formatted str. For example, 2022-01-22T12:14:12.05018Z or 2022-01-22" - "T12:14:12.05018+00:00.", + [ + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value" + " = 2022-01-22T12:14:12.05018-00:00 should be a timezone aware" + " ISO8601 formatted str. For example, 2022-01-22T12:14:12.05018Z or 2022-01-22" + "T12:14:12.05018+00:00." + ], id="UTC-with--00:00", ), pytest.param(listify_template(TEMPLATE), "", id="lists"), @@ -589,21 +573,21 @@ def listify_template(data_dict: Template): alter_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type", "Wrong option" ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type should " "be one of the following" ": ['1st type', '2nd type', '3rd type', '4th type']" - ), + ], id="wrong-enum-choice", ), pytest.param( set_to_none_in_dict( TEMPLATE, "/ENTRY[my_entry]/optional_parent/required_child", "optional" ), - ( + [ "The data entry corresponding to /ENTRY[my_entry]/optional_parent/" "required_child is required and hasn't been supplied by the reader." - ), + ], id="atleast-one-required-child-not-provided-optional-parent", ), pytest.param( @@ -612,11 +596,11 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field", "required", ), - ( + [ "The data entry corresponding to /ENTRY[my_entry]/" "OPTIONAL_group[my_group]/required_field " "is required and hasn't been supplied by the reader." - ), + ], id="required-field-not-provided-in-variadic-optional-group", ), pytest.param( @@ -625,7 +609,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/OPTIONAL_group[my_group]/optional_field", "required", ), - (""), + [], id="required-field-provided-in-variadic-optional-group", ), pytest.param( @@ -636,18 +620,22 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/optional_parent/optional_child", None, ), - (""), + [], id="no-child-provided-optional-parent", ), pytest.param(TEMPLATE, "", id="valid-data-dict"), pytest.param( remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group/description"), - "The required group, /ENTRY[my_entry]/required_group, hasn't been supplied.", + [ + "The required group, /ENTRY[my_entry]/required_group, hasn't been supplied." + ], id="missing-empty-yet-required-group", ), pytest.param( remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group2/description"), - "The required group, /ENTRY[my_entry]/required_group2, hasn't been supplied.", + [ + "The required group, /ENTRY[my_entry]/required_group2, hasn't been supplied." + ], id="missing-empty-yet-required-group2", ), pytest.param( @@ -658,7 +646,9 @@ def listify_template(data_dict: Template): "/ENTRY[entry]/required_group", None, ), - "The required group, /ENTRY[my_entry]/required_group, hasn't been supplied.", + [ + "The required group, /ENTRY[my_entry]/required_group, hasn't been supplied." + ], id="allow-required-and-empty-group", ), pytest.param( @@ -667,11 +657,11 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]", "required", ), - ( + [ "The required group, /ENTRY[my_entry]/" "optional_parent/req_group_in_opt_group, " "hasn't been supplied." - ), + ], id="req-group-in-opt-parent-removed", ), pytest.param((TEMPLATE), (""), id="opt-group-completely-removed"), @@ -681,18 +671,19 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", ["0", 1, 2], ), - ( - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]" - ), + [ + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following Python types: (, ), as defined in the NXDL as NX_INT.", + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]", + ], id="wrong-type-array-in-attribute", ), pytest.param( alter_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", [1, 2] ), - ( + [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]" - ), + ], id="wrong-value-array-in-attribute", ), pytest.param( @@ -701,7 +692,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units", "required", ), - "Field /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value requires a unit in the unit category NX_ENERGY.", + [ + "Field /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value requires a unit in the unit category NX_ENERGY." + ], id="missing-unit", ), pytest.param( @@ -710,16 +703,10 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value", "required", ), - "Unit /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units in dataset without its field /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value.", - id="unit-missing-field", - ), - pytest.param( - remove_from_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value", - "required", - ), - "The attribute /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units will not be written.", + [ + "Unit /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units in dataset without its field /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value.", + "The attribute /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value/@units will not be written.", + ], id="unit-missing-field", ), pytest.param( @@ -728,9 +715,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/required_group/illegal_name", 1, ), - ( + [ "Field /ENTRY[my_entry]/required_group/illegal_name written without documentation." - ), + ], id="add-undocumented-field", ), pytest.param( @@ -743,9 +730,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/required_group/author/@illegal", "illegal_attribute", ), - ( + [ "Attribute /ENTRY[my_entry]/required_group/author/@illegal written without documentation." - ), + ], id="add-undocumented-attribute", ), pytest.param( @@ -754,7 +741,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/BEAM[my_beam]/@default", "unknown", ), - "", + [], id="group-with-only-attributes", ), pytest.param( @@ -763,9 +750,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/BEAM[my_beam]/@illegal", "unknown", ), - ( + [ "Attribute /ENTRY[my_entry]/INSTRUMENT[my_instrument]/BEAM[my_beam]/@illegal written without documentation." - ), + ], id="group-with-illegal-attributes", ), pytest.param( @@ -774,9 +761,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/optional_parent/required_child/@units", "s", ), - ( + [ "The unit, /ENTRY[my_entry]/optional_parent/required_child/@units = s written without documentation." - ), + ], id="field-with-illegal-unit", ), pytest.param( @@ -785,9 +772,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/ILLEGAL[my_source]/type", 1, ), - ( + [ "Field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/ILLEGAL[my_source]/type written without documentation." - ), + ], id="bad-namefitting", ), pytest.param( @@ -796,10 +783,10 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/duration", np.array([2.0, 3.0, 4.0], dtype=np.float32), ), - ( + [ "The value at /ENTRY[my_entry]/duration should be" " one of the following Python types: (, ), as defined in the NXDL as NX_INT." - ), + ], id="baseclass-wrong-dtype", ), pytest.param( @@ -808,7 +795,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/duration/@units", "required", ), - "Field /ENTRY[my_entry]/duration requires a unit in the unit category NX_TIME.", + [ + "Field /ENTRY[my_entry]/duration requires a unit in the unit category NX_TIME." + ], id="baseclass-missing-unit", ), pytest.param( @@ -817,20 +806,10 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/collection_time/@illegal", "s", ), - ( - "There were attributes set for the field /ENTRY[my_entry]/collection_time, but the field does not exist." - ), - id="baseclass-attribute-missing-field", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/collection_time/@illegal", - "s", - ), - ( - "The attribute /ENTRY[my_entry]/collection_time/@illegal will not be written." - ), + [ + "There were attributes set for the field /ENTRY[my_entry]/collection_time, but the field does not exist.", + "The attribute /ENTRY[my_entry]/collection_time/@illegal will not be written.", + ], id="baseclass-attribute-missing-field", ), pytest.param( @@ -839,14 +818,14 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type", "Wrong source type", ), - ( + [ "The value at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type " "should be one of the following: ['Spallation Neutron Source', 'Pulsed Reactor Neutron Source', " "'Reactor Neutron Source', 'Synchrotron X-ray Source', 'Pulsed Muon Source', 'Rotating Anode X-ray', " "'Fixed Tube X-ray', 'UV Laser', 'Free-Electron Laser', 'Optical Laser', 'Ion Source', 'UV Plasma Source', " "'Metal Jet X-ray', 'Laser', 'Dye-Laser', 'Broadband Tunable Light Source', 'Halogen lamp', 'LED', " "'Mercury Cadmium Telluride', 'Deuterium Lamp', 'Xenon Lamp', 'Globar', 'other']" - ), + ], id="baseclass-wrong-enum", ), pytest.param( @@ -855,9 +834,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal_name", 1, ), - ( + [ "Field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal_name written without documentation." - ), + ], id="baseclass-add-undocumented-field", ), pytest.param( @@ -866,9 +845,9 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type/@illegal", "illegal_attribute", ), - ( + [ "Attribute /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type/@illegal written without documentation." - ), + ], id="baseclass-add-undocumented-attribute", ), pytest.param( @@ -877,21 +856,11 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal/@units", "illegal_attribute", ), - ( + [ "Unit /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal/@units " - "in dataset without its field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal." - ), - id="baseclass-add-unit-of-missing-undocumented-field", - ), - pytest.param( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal/@units", - "illegal_attribute", - ), - ( - "The attribute /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal/@units will not be written." - ), + "in dataset without its field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal.", + "The attribute /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/illegal/@units will not be written.", + ], id="baseclass-add-unit-of-missing-undocumented-field", ), pytest.param( @@ -904,63 +873,26 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/required_group/author/@units", "s", ), - ( + [ "The unit, /ENTRY[my_entry]/required_group/author/@units = s written without documentation." - ), + ], id="baseclass-field-with-illegal-unit", ), ], ) -def test_validate_data_dict(caplog, data_dict, error_message, request): +def test_validate_data_dict(caplog, data_dict, error_messages, request): """Unit test for the data validation routine.""" def format_error_message(msg: str) -> str: return msg[msg.rfind("G: ") + 3 :].rstrip("\n") - if request.node.callspec.id in ( - "valid-data-dict", - "lists", - "empty-optional-field", - "UTC-with-+00:00", - "UTC-with-Z", - "no-child-provided-optional-parent", - "link-dict-instead-of-int", - "opt-group-completely-removed", - "required-field-provided-in-variadic-optional-group", - "valid-ndarray-instead-of-char", - "list-of-int-instead-of-int", - "list-of-string-instead-of-chars", - "array-of-int32-instead-of-int", - "List-of-int-instead-of-int", - "positive-posint-list", - "positive-posint-array", - "array-of-chars", - "array-of-bytes-chars", - "array-of-float-instead-of-float", - "numpy-chararray", - "removed-optional-value", - "group-with-only-attributes", - ): + if not error_messages: with caplog.at_level(logging.WARNING): assert validate_dict_against("NXtest", data_dict)[0] assert caplog.text == "" - # Missing required fields caught by logger with warning - elif request.node.callspec.id in ( - "empty-required-field", - "allow-required-and-empty-group", - "req-group-in-opt-parent-removed", - "missing-empty-yet-required-group", - "missing-empty-yet-required-group2", - ): - assert "" == caplog.text - captured_logs = caplog.records - assert not validate_dict_against("NXtest", data_dict)[0] - assert any( - error_message == format_error_message(rec.message) for rec in captured_logs - ) else: with caplog.at_level(logging.WARNING): assert not validate_dict_against("NXtest", data_dict)[0] - assert any( - error_message == format_error_message(rec.message) for rec in caplog.records - ) + assert len(caplog.records) == len(error_messages) + for expected_message, rec in zip(error_messages, caplog.records): + assert expected_message == format_error_message(rec.message) From eb5f8f61d73a0d418ef7b24f92e01ce9fa728878 Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 14 Mar 2025 09:52:05 +0100 Subject: [PATCH 50/61] add test for wrong namefitting --- src/pynxtools/data/NXtest.nxdl.xml | 5 +++++ tests/dataconverter/test_validation.py | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/src/pynxtools/data/NXtest.nxdl.xml b/src/pynxtools/data/NXtest.nxdl.xml index 2d6547698..7e392ca8e 100644 --- a/src/pynxtools/data/NXtest.nxdl.xml +++ b/src/pynxtools/data/NXtest.nxdl.xml @@ -82,5 +82,10 @@ This is a required group in an optional group. + + + A required NXuser entry. + + diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 8e1467cb5..03be7f7ce 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -777,6 +777,15 @@ def listify_template(data_dict: Template): ], id="bad-namefitting", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/data/test", + 1, + ), + ["Field /ENTRY[my_entry]/data/test written without documentation."], + id="namefitting-of-illegal-named-group", + ), pytest.param( alter_dict( TEMPLATE, From 25656bd7ac13eae77c00ae9a937e8822bfacff33 Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 14 Mar 2025 09:55:57 +0100 Subject: [PATCH 51/61] trigger workflow --- tests/dataconverter/test_validation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 03be7f7ce..f245829e9 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -905,3 +905,6 @@ def format_error_message(msg: str) -> str: assert len(caplog.records) == len(error_messages) for expected_message, rec in zip(error_messages, caplog.records): assert expected_message == format_error_message(rec.message) + + +# test From 8829f06410bf87d4e0115bd0d6c11b74daceed4f Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 14 Mar 2025 11:32:39 +0100 Subject: [PATCH 52/61] fix reader tests --- tests/data/dataconverter/readers/example/testdata.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/data/dataconverter/readers/example/testdata.json b/tests/data/dataconverter/readers/example/testdata.json index e66af9962..1f4d98427 100644 --- a/tests/data/dataconverter/readers/example/testdata.json +++ b/tests/data/dataconverter/readers/example/testdata.json @@ -20,5 +20,6 @@ "required_child": 1, "optional_child": 1, "@version": "1.0", - "@array": [0, 1, 2] + "@array": [0, 1, 2], + "name": "Test" } \ No newline at end of file From 501537d00803b1a6216f71c2cc823a4376137bf8 Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 14 Mar 2025 12:00:59 +0100 Subject: [PATCH 53/61] catch failing namefitting --- src/pynxtools/dataconverter/validation.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 1d06a8acb..1ce2dc181 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -790,13 +790,12 @@ def startswith_with_variations( else: # check that parent has units node = add_best_matches_for(not_visited_key.rsplit("/", 1)[0], tree) - if node is not None: - if node.type != "field" or node.unit is None: - collector.collect_and_log( - not_visited_key, - ValidationProblem.UnitWithoutDocumentation, - mapping[not_visited_key], - ) + if node is None or node.type != "field" or node.unit is None: + collector.collect_and_log( + not_visited_key, + ValidationProblem.UnitWithoutDocumentation, + mapping[not_visited_key], + ) # parent key will be checked on its own if it exists, because it is in the list continue @@ -806,7 +805,7 @@ def startswith_with_variations( if not_visited_key.rsplit("/", 1)[0] not in mapping.keys(): # check that parent is not a group node = add_best_matches_for(not_visited_key.rsplit("/", 1)[0], tree) - if node.type != "group": + if node is None or node.type != "group": collector.collect_and_log( not_visited_key.rsplit("/", 1)[0], ValidationProblem.AttributeForNonExistingField, From 55397dfc21b64bd2a955136d30328e4c0e234d2e Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 14 Mar 2025 15:21:15 +0100 Subject: [PATCH 54/61] tread undocumented units as warning --- src/pynxtools/dataconverter/helpers.py | 8 ++++++-- tests/dataconverter/test_validation.py | 22 +++++++++++++++------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 820124207..290b4a634 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -80,7 +80,9 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar value = "" if log_type == ValidationProblem.UnitWithoutDocumentation: - logger.warning(f"The unit, {path} = {value} written without documentation.") + logger.info( + f"The unit, {path} = {value}, is being written but has no documentation." + ) elif log_type == ValidationProblem.InvalidEnum: logger.warning( f"The value at {path} should be one of the following: {value}" @@ -161,7 +163,9 @@ def collect_and_log( return if self.logging and path + str(log_type) + str(value) not in self.data: self._log(path, log_type, value, *args, **kwargs) - self.data.add(path + str(log_type) + str(value)) + # info messages should not fail validation + if log_type not in (ValidationProblem.UnitWithoutDocumentation,): + self.data.add(path + str(log_type) + str(value)) def has_validation_problems(self): """Returns True if there were any validation problems.""" diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index f245829e9..adadc4c58 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -762,7 +762,7 @@ def listify_template(data_dict: Template): "s", ), [ - "The unit, /ENTRY[my_entry]/optional_parent/required_child/@units = s written without documentation." + "The unit, /ENTRY[my_entry]/optional_parent/required_child/@units = s, is being written but has no documentation." ], id="field-with-illegal-unit", ), @@ -883,7 +883,7 @@ def listify_template(data_dict: Template): "s", ), [ - "The unit, /ENTRY[my_entry]/required_group/author/@units = s written without documentation." + "The unit, /ENTRY[my_entry]/required_group/author/@units = s, is being written but has no documentation." ], id="baseclass-field-with-illegal-unit", ), @@ -900,11 +900,19 @@ def format_error_message(msg: str) -> str: assert validate_dict_against("NXtest", data_dict)[0] assert caplog.text == "" else: - with caplog.at_level(logging.WARNING): - assert not validate_dict_against("NXtest", data_dict)[0] - assert len(caplog.records) == len(error_messages) - for expected_message, rec in zip(error_messages, caplog.records): - assert expected_message == format_error_message(rec.message) + if request.node.callspec.id in ( + "field-with-illegal-unit", + "baseclass-field-with-illegal-unit", + ): + with caplog.at_level(logging.INFO): + assert validate_dict_against("NXtest", data_dict)[0] + assert error_messages[0] in caplog.text + else: + with caplog.at_level(logging.WARNING): + assert not validate_dict_against("NXtest", data_dict)[0] + assert len(caplog.records) == len(error_messages) + for expected_message, rec in zip(error_messages, caplog.records): + assert expected_message == format_error_message(rec.message) # test From 6828e033b56af2b8c5dc20cc21b1bd1e5094dc0c Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 14 Mar 2025 16:23:38 +0100 Subject: [PATCH 55/61] require concept_name fornamefitting --- src/pynxtools/dataconverter/validation.py | 73 +++++++++++++---------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 1ce2dc181..13bbbfb09 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -134,35 +134,38 @@ def split_class_and_name_of(name: str) -> Tuple[Optional[str], str]: ), f"{name_match.group(2)}{'' if prefix is None else prefix}" -def best_namefit_of(name: str, keys: Iterable[str]) -> Tuple[Optional[str], bool]: +def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode]: """ Get the best namefit of `name` in `keys`. Args: name (str): The name to fit against the keys. - keys (Iterable[str]): The keys to fit `name` against. + nodes (Iterable[NexusNode]): The nodes to fit `name` against. Returns: - Tuple[Optional[str], bool]: A tuple where the first element is the best fitting key (or None if no fit was found), - and the second element is a boolean indicating if the match was exact. + Optional[NexusNode]: The best fitting node. None if no fit was found. """ + if not nodes: + return None - if not keys: - return None, True + concept_name, instance_name = split_class_and_name_of(name) - nx_name, name2fit = split_class_and_name_of(name) + for node in nodes: + if not node.variadic: + if instance_name == node.name: + return node + else: + if not concept_name or concept_name != node.name: + continue + if instance_name == node.name: + return node - if name2fit in keys: - return name2fit, True - if nx_name is not None and nx_name in keys: - return nx_name, True - best_match, score = max( - map(lambda x: (x, get_nx_namefit(name2fit, x)), keys), key=lambda x: x[1] - ) - if score < 0: - return None, False + score = get_nx_namefit(instance_name, node.name) - return best_match, False + if score > -1: + return node + + return None def validate_dict_against( @@ -199,21 +202,28 @@ def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]: variations = [] for key in keys: - nx_name, name2fit = split_class_and_name_of(key) + concept_name, instance_name = split_class_and_name_of(key) + if node.type == "attribute": # Remove the starting @ from attributes - name2fit = name2fit[1:] if name2fit.startswith("@") else name2fit - if nx_name is not None and nx_name != node.name: + instance_name = ( + instance_name[1:] + if instance_name.startswith("@") + else instance_name + ) + + if not concept_name or concept_name != node.name: continue + if ( - get_nx_namefit(name2fit, node.name) >= 0 + get_nx_namefit(instance_name, node.name) >= 0 and key not in node.parent.get_all_direct_children_names() ): variations.append(key) - if nx_name is not None and not variations: + if not variations: collector.collect_and_log( - nx_name, ValidationProblem.FailedNamefitting, keys + concept_name, ValidationProblem.FailedNamefitting, keys ) return variations @@ -513,24 +523,23 @@ def handle_unknown_type(node: NexusNode, keys: Mapping[str, Any], prev_path: str def add_best_matches_for(key: str, node: NexusNode) -> Optional[NexusNode]: for name in key[1:].replace("@", "").split("/"): - children = node.get_all_direct_children_names() - best_name, good_name_fit = best_namefit_of(name, children) - if best_name is None: - return None - - node = node.search_add_child_for(best_name) + children_to_check = [ + node.search_add_child_for(child) + for child in node.get_all_direct_children_names() + ] + node = best_namefit_of(name, children_to_check) - if not good_name_fit: + if node is None: return None return node - def is_documented(key: str, node: NexusNode) -> bool: + def is_documented(key: str, tree: NexusNode) -> bool: if mapping.get(key) is None: # This value is not really set. Skip checking it's documentation. return True - node = add_best_matches_for(key, node) + node = add_best_matches_for(key, tree) if node is None: return False From a493e1a43af920bb02c47d4aba318a79107d0244 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 14 Mar 2025 16:26:47 +0100 Subject: [PATCH 56/61] add test for namefitting with typo in concept_name --- tests/dataconverter/test_validation.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index adadc4c58..5bda1e973 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -786,6 +786,24 @@ def listify_template(data_dict: Template): ["Field /ENTRY[my_entry]/data/test written without documentation."], id="namefitting-of-illegal-named-group", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/USE[user]/name", + "Some name", + ), + ["Field /ENTRY[my_entry]/USE[user]/name written without documentation."], + id="namefitting-of-group-with-typo", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/USE[user]/test", + "Some name", + ), + ["Field /ENTRY[my_entry]/USE[user]/test written without documentation."], + id="namefitting-of-group-with-typo-and-new-field", + ), pytest.param( alter_dict( TEMPLATE, From 33718254fd14e04c7c08c2ae31225528a650d091 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 14 Mar 2025 16:37:12 +0100 Subject: [PATCH 57/61] remove test comment --- tests/dataconverter/test_validation.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 5bda1e973..c49b1969f 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -931,6 +931,3 @@ def format_error_message(msg: str) -> str: assert len(caplog.records) == len(error_messages) for expected_message, rec in zip(error_messages, caplog.records): assert expected_message == format_error_message(rec.message) - - -# test From d317a7e9ae63a43db18bd1f8d46e1584e350e40b Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 14 Mar 2025 21:42:54 +0100 Subject: [PATCH 58/61] add proper handling of group attributes --- src/pynxtools/data/NXtest.nxdl.xml | 7 ++++++ src/pynxtools/dataconverter/validation.py | 6 +++-- .../readers/example/testdata.json | 4 +++- tests/dataconverter/test_validation.py | 22 +++++++++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/data/NXtest.nxdl.xml b/src/pynxtools/data/NXtest.nxdl.xml index 7e392ca8e..fa2917205 100644 --- a/src/pynxtools/data/NXtest.nxdl.xml +++ b/src/pynxtools/data/NXtest.nxdl.xml @@ -64,6 +64,13 @@ + + + + + + + This is a required yet empty group. diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 13bbbfb09..043e6e02d 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -192,7 +192,7 @@ def validate_dict_against( def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]: if not node.variadic: - if node.name in keys: + if f"{'@' if node.type == 'attribute' else ''}{node.name}" in keys: return [node.name] elif ( hasattr(node, "nx_class") @@ -228,7 +228,9 @@ def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]: return variations def get_field_attributes(name: str, keys: Mapping[str, Any]) -> Mapping[str, Any]: - return {k.split("@")[1]: keys[k] for k in keys if k.startswith(f"{name}@")} + return { + f"@{k.split('@')[1]}": keys[k] for k in keys if k.startswith(f"{name}@") + } def handle_nxdata(node: NexusGroup, keys: Mapping[str, Any], prev_path: str): def check_nxdata(): diff --git a/tests/data/dataconverter/readers/example/testdata.json b/tests/data/dataconverter/readers/example/testdata.json index 1f4d98427..dbcf035a3 100644 --- a/tests/data/dataconverter/readers/example/testdata.json +++ b/tests/data/dataconverter/readers/example/testdata.json @@ -21,5 +21,7 @@ "optional_child": 1, "@version": "1.0", "@array": [0, 1, 2], - "name": "Test" + "name": "Test", + "@group_attribute": "data", + "@signal": "data" } \ No newline at end of file diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index c49b1969f..6f32d64c8 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -75,6 +75,7 @@ def listify_template(data_dict: Template): "type", "definition", "date_value", + "@signal", ) or isinstance(data_dict[optionality][path], list): listified_template[optionality][path] = data_dict[optionality][path] else: @@ -114,6 +115,11 @@ def listify_template(data_dict: Template): "just chars" # pylint: disable=E1126 ) TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value/@units"] = "" +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/@group_attribute"] = ( + "data" # pylint: disable=E1126 +) +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/@signal"] = "data" +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/DATA[data]"] = 1 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value"] = True # pylint: disable=E1126 TEMPLATE["required"][ "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value/@units" @@ -152,6 +158,11 @@ def listify_template(data_dict: Template): TEMPLATE["required"][ "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/date_value/@units" ] = "" +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/@group_attribute"] = ( + "data" # pylint: disable=E1126 +) +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/@signal"] = "data" +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/DATA[data]"] = 1 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field"] = 1 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/definition"] = "NXtest" # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/definition/@version"] = "2.4.6" # pylint: disable=E1126 @@ -523,6 +534,17 @@ def listify_template(data_dict: Template): ], id="missing-required-value", ), + pytest.param( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/@group_attribute", + "required", + ), + [ + 'Missing attribute: "/ENTRY[my_entry]/NXODD_name[nxodd_name]/@group_attribute"' + ], + id="missing-required-group-attribute", + ), pytest.param( set_whole_group_to_none( set_whole_group_to_none( From a018c9b56f0bad41481852e34884a5243df02375 Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 14 Mar 2025 22:08:16 +0100 Subject: [PATCH 59/61] add exception for @URL, and run tests on update branch --- .github/workflows/plugin_test.yaml | 4 ++-- src/pynxtools/dataconverter/validation.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 32fd9bbfd..5b7b43b7d 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -30,10 +30,10 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-igor - branch: main + branch: update-definitions tests_to_run: tests/. - plugin: pynxtools-mpes - branch: main + branch: update-definitions tests_to_run: tests/. - plugin: pynxtools-raman branch: main diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 043e6e02d..8b4ffdb61 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -541,6 +541,10 @@ def is_documented(key: str, tree: NexusNode) -> bool: # This value is not really set. Skip checking it's documentation. return True + # TODO remove when nameType is implemented + if key.endswith("/@URL"): + return True + node = add_best_matches_for(key, tree) if node is None: return False From 25e00134b01aa836608930e4272fd89e781003c1 Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 14 Mar 2025 22:31:30 +0100 Subject: [PATCH 60/61] prefer direct name match over concept match reliably --- src/pynxtools/dataconverter/validation.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 8b4ffdb61..9ecaed24e 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -150,11 +150,15 @@ def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode concept_name, instance_name = split_class_and_name_of(name) + # prefer direct name match over concept match for node in nodes: if not node.variadic: if instance_name == node.name: return node - else: + + # if no direct name match is found, look for concept match + for node in nodes: + if node.variadic: if not concept_name or concept_name != node.name: continue if instance_name == node.name: From f24e9e0e56597a0ae205159fa683c12aaf0f3fd2 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 17 Mar 2025 10:02:36 +0100 Subject: [PATCH 61/61] streamline best_namefit_of function --- src/pynxtools/dataconverter/validation.py | 24 ++++++++++------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 9ecaed24e..250a87bf4 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -150,26 +150,22 @@ def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode concept_name, instance_name = split_class_and_name_of(name) - # prefer direct name match over concept match - for node in nodes: - if not node.variadic: - if instance_name == node.name: - return node + best_match = None - # if no direct name match is found, look for concept match for node in nodes: - if node.variadic: - if not concept_name or concept_name != node.name: - continue + if not node.variadic: if instance_name == node.name: return node + else: + if concept_name and concept_name == node.name: + if instance_name == node.name: + return node - score = get_nx_namefit(instance_name, node.name) - - if score > -1: - return node + score = get_nx_namefit(instance_name, node.name) + if score > -1: + best_match = node - return None + return best_match def validate_dict_against(