diff --git a/cassis/__init__.py b/cassis/__init__.py index 9a5239a..e20cbb4 100644 --- a/cassis/__init__.py +++ b/cassis/__init__.py @@ -2,15 +2,24 @@ from .cas import Cas, Sofa, View from .json import load_cas_from_json -from .typesystem import TypeSystem, load_dkpro_core_typesystem, load_typesystem, merge_typesystems +from .typesystem import ( + Annotation, + TypeSystem, + is_annotation, + load_dkpro_core_typesystem, + load_typesystem, + merge_typesystems, +) from .util import cas_to_comparable_text from .xmi import load_cas_from_xmi __all__ = [ + "Annotation", "Cas", "Sofa", "View", "TypeSystem", + "is_annotation", "load_typesystem", "load_dkpro_core_typesystem", "merge_typesystems", diff --git a/cassis/cas.py b/cassis/cas.py index 5bbdafd..213aa52 100644 --- a/cassis/cas.py +++ b/cassis/cas.py @@ -14,16 +14,18 @@ from cassis.typesystem import ( FEATURE_BASE_NAME_HEAD, FEATURE_BASE_NAME_LANGUAGE, - TYPE_NAME_DOCUMENT_ANNOTATION, TYPE_NAME_ANNOTATION, + TYPE_NAME_DOCUMENT_ANNOTATION, TYPE_NAME_FS_ARRAY, TYPE_NAME_FS_LIST, TYPE_NAME_SOFA, FeatureStructure, + Annotation, Type, TypeCheckError, TypeSystem, TypeSystemMode, + is_annotation, ) _validator_optional_string = validators.optional(validators.instance_of(str)) @@ -171,29 +173,59 @@ def type_index(self) -> Dict[str, SortedKeyList]: """ return self._indices + def add_fs_to_indexes(self, fs: FeatureStructure): + """Adds a feature structure to the indexes of this view.""" + self._indices[fs.type.name].add(fs) + + @deprecation.deprecated(details="Use add_fs_to_indexes()") def add_annotation_to_index(self, annotation: FeatureStructure): - self._indices[annotation.type.name].add(annotation) + """Adds a feature structure to the indexes of this view. - def get_all_annotations(self) -> List[FeatureStructure]: - """Gets all the annotations in this view. + .. deprecated:: + Use :meth:`add_fs_to_indexes`. + """ + self.add_fs_to_indexes(annotation) + + def get_all_fs(self) -> List[FeatureStructure]: + """Gets all indexed feature structures in this view. Returns: - A list of all annotations in this view. + A list of all indexed feature structures (annotations and non-annotations) in this view. """ result = [] - for annotations_by_type in self._indices.values(): - result.extend(annotations_by_type) + for fs_by_type in self._indices.values(): + result.extend(fs_by_type) return result - def remove_annotation_from_index(self, annotation: FeatureStructure): - """Removes an annotation from an index. This throws if the - annotation was not present. + @deprecation.deprecated(details="Use get_all_fs() for all indexed feature structures or filter with cassis.typesystem.is_annotation") + def get_all_annotations(self) -> List[FeatureStructure]: + """Gets all indexed annotations in this view. + + .. deprecated:: + Use :meth:`get_all_fs` for all indexed feature structures, or filter the result + with :func:`cassis.typesystem.is_annotation`. + """ + return [fs for fs in self.get_all_fs() if is_annotation(fs)] + + def remove_fs_from_indexes(self, fs: FeatureStructure): + """Removes a feature structure from the indexes of this view. Throws if the + feature structure was not present. Args: - annotation: The annotation to remove. + fs: The feature structure to remove. """ - self._indices[annotation.type.name].remove(annotation) + self._indices[fs.type.name].remove(fs) + + @deprecation.deprecated(details="Use remove_fs_from_indexes()") + def remove_annotation_from_index(self, annotation: FeatureStructure): + """Removes a feature structure from the indexes of this view. Throws if the + feature structure was not present. + + .. deprecated:: + Use :meth:`remove_fs_from_indexes`. + """ + self.remove_fs_from_indexes(annotation) class Index: @@ -313,29 +345,29 @@ def views(self) -> List[View]: """ return list(self._views.values()) - def add(self, annotation: FeatureStructure, keep_id: Optional[bool] = True): - """Adds an annotation to this Cas. + def add(self, fs: FeatureStructure, keep_id: Optional[bool] = True): + """Adds a feature structure to this Cas. Args: - annotation: The annotation to add. - keep_id: Keep the XMI id of `annotation` if true, else generate a new one. + fs: The feature structure to add. + keep_id: Keep the XMI id of `fs` if true, else generate a new one. """ - if not self._lenient and not self._typesystem.contains_type(annotation.type.name): - msg = f"Typesystem of CAS does not contain type [{annotation.type.name}]. " + if not self._lenient and not self._typesystem.contains_type(fs.type.name): + msg = f"Typesystem of CAS does not contain type [{fs.type.name}]. " msg += "Either add the type to the type system or specify `lenient=True` when creating the CAS." raise RuntimeError(msg) - if keep_id and annotation.xmiID is not None: - next_id = annotation.xmiID + if keep_id and fs.xmiID is not None: + next_id = fs.xmiID else: next_id = self._get_next_xmi_id() - annotation.xmiID = next_id - if hasattr(annotation, "sofa"): - annotation.sofa = self.get_sofa() + fs.xmiID = next_id + if hasattr(fs, "sofa"): + fs.sofa = self.get_sofa() - self._current_view.add_annotation_to_index(annotation) + self._current_view.add_fs_to_indexes(fs) @deprecation.deprecated(details="Use add()") def add_annotation(self, annotation: FeatureStructure, keep_id: Optional[bool] = True): @@ -404,7 +436,7 @@ def crop_sofa_string(self, sofa_begin: int, sofa_end: int, overlap: bool = True) self.sofa_string = self.sofa_string[sofa_begin:sofa_end] # Make an explicit snapshot of the current annotations to avoid # issues when removing/modifying elements during iteration. - for annotation in list(self.select_all()): + for annotation in list(self.select_all_annotations()): # Determine whether the annotation will be kept and how its # offsets need to be adjusted. If offsets are adjusted we must # reindex the annotation (remove then add) so that the @@ -412,28 +444,28 @@ def crop_sofa_string(self, sofa_begin: int, sofa_end: int, overlap: bool = True) # updated begin/end values. if sofa_begin <= annotation.begin and annotation.end <= sofa_end: # fully contained - self._current_view.remove_annotation_from_index(annotation) + self._current_view.remove_fs_from_indexes(annotation) annotation.begin = annotation.begin - sofa_begin annotation.end = annotation.end - sofa_begin - self._current_view.add_annotation_to_index(annotation) + self._current_view.add_fs_to_indexes(annotation) elif overlap and sofa_begin < annotation.end <= sofa_end: # left overlap (annotation starts before cut) - self._current_view.remove_annotation_from_index(annotation) + self._current_view.remove_fs_from_indexes(annotation) annotation.begin = 0 annotation.end = annotation.end - sofa_begin - self._current_view.add_annotation_to_index(annotation) + self._current_view.add_fs_to_indexes(annotation) elif overlap and sofa_begin <= annotation.begin < sofa_end: # right overlap (annotation ends after cut) - self._current_view.remove_annotation_from_index(annotation) + self._current_view.remove_fs_from_indexes(annotation) annotation.begin = annotation.begin - sofa_begin annotation.end = len(self.sofa_string) - self._current_view.add_annotation_to_index(annotation) + self._current_view.add_fs_to_indexes(annotation) elif overlap and annotation.begin <= sofa_begin and sofa_end <= annotation.end: # annotation fully covers the cut - self._current_view.remove_annotation_from_index(annotation) + self._current_view.remove_fs_from_indexes(annotation) annotation.begin = 0 annotation.end = len(self.sofa_string) - self._current_view.add_annotation_to_index(annotation) + self._current_view.add_fs_to_indexes(annotation) else: # annotation falls completely outside the cut; remove it self.remove(annotation) @@ -447,7 +479,7 @@ def remove(self, annotation: FeatureStructure): Args: annotation: The annotation to remove. """ - self._current_view.remove_annotation_from_index(annotation) + self._current_view.remove_fs_from_indexes(annotation) @deprecation.deprecated(details="Use remove()") def remove_annotation(self, annotation: FeatureStructure): @@ -468,17 +500,13 @@ def remove_annotations_in_range(self, begin: int, end: int, type_: Optional[Unio type_: The type or name of the type name whose annotation instances are to be found Raises: ValueError: If range indices are invalid. + TypeError: If ``type_`` is not a subtype of ``uima.tcas.Annotation``. """ - # If no type is provided, operate on annotation-like feature - # structures only (those that have `begin` and `end`) to avoid - # AttributeError for arbitrary FS (e.g., instances of uima.cas.TOP). if type_ is None: - # Only operate on annotation-like feature structures to avoid - # AttributeError for non-annotation FS present in the view. - annotations = [a for a in self.select_all() if self.typesystem.is_instance_of(a.type, TYPE_NAME_ANNOTATION)] + annotations = self.select_all_annotations() else: - annotations = self.select(type_) + annotations = self.select(self._require_annotation_type(type_, "remove_annotations_in_range")) if self.sofa_string is None: raise ValueError("Cannot remove annotations by range: CAS has no sofa string for the current view") @@ -492,7 +520,7 @@ def remove_annotations_in_range(self, begin: int, end: int, type_: Optional[Unio raise ValueError(f"Invalid indices for begin {begin} and end {end}") @deprecation.deprecated(details="Use annotation.get_covered_text()") - def get_covered_text(self, annotation: FeatureStructure) -> str: + def get_covered_text(self, annotation: Annotation) -> str: """Gets the text that is covered by `annotation`. Args: @@ -518,7 +546,7 @@ def select(self, type_: Union[Type, str]) -> List[FeatureStructure]: t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_) return self._get_feature_structures(t) - def select_covered(self, type_: Union[Type, str], covering_annotation: FeatureStructure) -> List[FeatureStructure]: + def select_covered(self, type_: Union[Type, str], covering_annotation: Annotation) -> List[Annotation]: """Returns a list of covered annotations. Return all annotations that are covered @@ -533,8 +561,11 @@ def select_covered(self, type_: Union[Type, str], covering_annotation: FeatureSt Returns: A list of covered annotations + Raises: + TypeError: If ``type_`` is not a subtype of ``uima.tcas.Annotation``. + """ - t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_) + t = self._require_annotation_type(type_, "select_covered") c_begin = covering_annotation.begin c_end = covering_annotation.end @@ -544,7 +575,7 @@ def select_covered(self, type_: Union[Type, str], covering_annotation: FeatureSt result.append(annotation) return result - def select_covering(self, type_: Union[Type, str], covered_annotation: FeatureStructure) -> List[FeatureStructure]: + def select_covering(self, type_: Union[Type, str], covered_annotation: Annotation) -> List[Annotation]: """Returns a list of annotations that cover the given annotation. Return all annotations that are covering. This can be potentially be slow. @@ -559,28 +590,65 @@ def select_covering(self, type_: Union[Type, str], covered_annotation: FeatureSt Returns: A list of covering annotations + Raises: + TypeError: If ``type_`` is not a subtype of ``uima.tcas.Annotation``. + """ - t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_) + t = self._require_annotation_type(type_, "select_covering") c_begin = covered_annotation.begin c_end = covered_annotation.end - # We iterate over all annotations and check whether the provided annotation - # is covered in the current annotation + result = [] for annotation in self._get_feature_structures(t): if c_begin >= annotation.begin and c_end <= annotation.end: - yield annotation + result.append(annotation) + return result - def select_all(self) -> List[FeatureStructure]: - """Finds all feature structures in this Cas + def select_all_fs(self) -> List[FeatureStructure]: + """Returns all indexed feature structures (annotations and non-annotations) in the current view. Returns: - A list of all annotations in this Cas + A list of all indexed feature structures in the current view. + """ + return self._current_view.get_all_fs() + + def select_all_annotations(self) -> List[Annotation]: + """Returns all indexed annotations in the current view. + + Non-annotation feature structures present in the view are filtered out, so it is safe + to access ``begin``/``end`` on the returned items. + Returns: + A list of all indexed annotations in the current view. """ - return self._current_view.get_all_annotations() + return [fs for fs in self._current_view.get_all_fs() if is_annotation(fs)] + + @deprecation.deprecated(details="Use select_all_annotations() for annotations only or select_all_fs() for all indexed feature structures") + def select_all(self) -> List[Annotation]: + """Finds all annotations in this Cas. + + .. deprecated:: + Use :meth:`select_all_annotations` for annotations only, or + :meth:`select_all_fs` for all indexed feature structures. + """ + return self.select_all_annotations() # FS handling + def _require_annotation_type(self, type_: Union[Type, str], operation: str) -> Type: + """Resolves ``type_`` and validates it is a subtype of ``uima.tcas.Annotation``. + + Raises: + TypeError: If the resolved type is not an annotation type. + """ + t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_) + if not self.typesystem.is_instance_of(t, TYPE_NAME_ANNOTATION): + raise TypeError( + f"Type [{t.name}] is not a subtype of [{TYPE_NAME_ANNOTATION}]; " + f"{operation} only operates on annotation types" + ) + return t + def _get_feature_structures(self, type_: Type) -> List[FeatureStructure]: """Returns a list of all feature structures of type `type_name` and child types.""" types = {c.name for c in type_.descendants} @@ -838,7 +906,7 @@ def _find_all_fs( else: for sofa in self.sofas: view = self.get_view(sofa.sofaID) - openlist.extend(view.select_all()) + openlist.extend(view.select_all_fs()) ts = self.typesystem while openlist: @@ -939,8 +1007,9 @@ def _copy(self) -> "Cas": def _sort_func(a: FeatureStructure) -> Tuple[int, int, int]: - d = a.__slots__ - if "begin" in d and "end" in d: - return a.begin, a.end, id(a) - else: - return sys.maxsize, sys.maxsize, id(a) + xmi_id = getattr(a, "xmiID", None) + tiebreaker = xmi_id if xmi_id is not None else id(a) + if is_annotation(a): + return a.begin, a.end, tiebreaker + # Non-annotation feature structures sort after annotations. + return sys.maxsize, sys.maxsize, tiebreaker diff --git a/cassis/json.py b/cassis/json.py index 6c39384..2a20090 100644 --- a/cassis/json.py +++ b/cassis/json.py @@ -565,7 +565,7 @@ def _serialize_ref(self, fs) -> int: def _serialize_view(self, view: View): return { VIEW_SOFA_FIELD: view.sofa.xmiID, - VIEW_MEMBERS_FIELD: sorted(x.xmiID for x in view.get_all_annotations()), + VIEW_MEMBERS_FIELD: sorted(x.xmiID for x in view.get_all_fs()), } def _to_external_type_name(self, type_name: str): diff --git a/cassis/typesystem.py b/cassis/typesystem.py index 274b819..104f9b8 100644 --- a/cassis/typesystem.py +++ b/cassis/typesystem.py @@ -6,7 +6,7 @@ from io import BytesIO from itertools import chain, filterfalse from pathlib import Path -from typing import IO, Any, Callable, Dict, Iterator, List, Optional, Set, Union +from typing import IO, Any, Callable, Dict, Iterator, List, Optional, Set, TypeGuard, Union import attr from deprecation import deprecated @@ -500,6 +500,23 @@ def __repr__(self): return str(self) +@attr.s(slots=True, eq=False, order=False, repr=False) +class Annotation(FeatureStructure): + """Concrete base class for annotation instances. + + Generated types that represent (subtypes of) `uima.tcas.Annotation` will + inherit from this class so that static typing can rely on a nominal base + providing `begin` and `end`. + """ + + begin: int = attr.ib(default=0) + end: int = attr.ib(default=0) + + +def is_annotation(fs: FeatureStructure) -> TypeGuard[Annotation]: + return isinstance(fs, Annotation) + + @attr.s(slots=True, eq=False, order=False, repr=False) class Feature: """A feature defines one attribute of a feature structure""" @@ -572,15 +589,46 @@ class Type: def __attrs_post_init__(self): """Build the constructor that can create feature structures of this type""" name = _string_to_valid_classname(self.name) - fields = {feature.name: attr.ib(default=None, repr=(feature.name != "sofa")) for feature in self.all_features} + + # Determine whether this type is (transitively) a subtype of uima.tcas.Annotation + def _is_annotation_type(t: "Type") -> bool: + cur = t + while cur is not None: + if cur.name == TYPE_NAME_ANNOTATION: + return True + cur = cur.supertype + return False + + is_annotation_type = _is_annotation_type(self) + + # When inheriting from our concrete Annotation base, do not redeclare + # the 'begin' and 'end' features as fields; they are already present. + fields = {} + for feature in self.all_features: + if feature.name in {"begin", "end"} and is_annotation_type: + # skip - Annotation base provides these + continue + fields[feature.name] = attr.ib(default=None, repr=(feature.name != "sofa")) fields["type"] = attr.ib(default=self) # We assign this to a lambda to make it lazy # When creating large type systems, almost no types are used so # creating them on the fly is on average better - self._constructor_fn = lambda: attr.make_class( - name, fields, bases=(FeatureStructure,), slots=True, eq=False, order=False - ) + bases = (Annotation,) if is_annotation_type else (FeatureStructure,) + + def _make_fs_class(): + cls = attr.make_class(name, fields, bases=bases, slots=True, eq=False, order=False) + # Ensure generated FS classes are hashable. When a class defines an + # __eq__ (inherited or generated) but no __hash__, Python makes + # instances unhashable. We want FeatureStructure-based instances to + # be usable as dict/set keys (they are keyed by xmiID), so assign the + # base FeatureStructure.__hash__ implementation to the generated + # class if it doesn't already provide one. + if getattr(cls, "__hash__", None) is None: + cls.__hash__ = FeatureStructure.__hash__ + return cls + + self._constructor_fn = _make_fs_class def __call__(self, **kwargs) -> FeatureStructure: """Creates an feature structure of this type diff --git a/cassis/util.py b/cassis/util.py index 1d50755..6a6b632 100644 --- a/cassis/util.py +++ b/cassis/util.py @@ -26,6 +26,7 @@ TYPE_NAME_STRING_ARRAY, FeatureStructure, Type, + is_annotation, is_array, is_list, ) @@ -205,7 +206,7 @@ def _render_feature_structure( if indexed_column: row_data.append(_bool_to_java_string(id(fs) in indexed_feature_structure_ids)) - if max_covered_text > 0 and _is_annotation_fs(fs): + if max_covered_text > 0 and is_annotation(fs): covered_text_value = _abbreviate_middle(fs.get_covered_text(), "...", max_covered_text) row_data.append(_escape(_render_string_value(covered_text_value, treat_empty_strings_as_null, null_value))) @@ -280,7 +281,7 @@ def _get_indexed_feature_structures(cas: Cas) -> Iterable[FeatureStructure]: feature_structures = [] for sofa in cas.sofas: view = cas.get_view(sofa.sofaID) - feature_structures.extend(view.select_all()) + feature_structures.extend(view.select_all_fs()) return feature_structures @@ -354,7 +355,7 @@ def _generate_anchor( ) -> str: anchor = fs.type.name.rsplit(".", 2)[-1] # Get the short type name (no package) - if include_offsets and _is_annotation_fs(fs): + if include_offsets and is_annotation(fs): anchor += f"[{fs.begin}-{fs.end}]" if add_index_mark: @@ -381,10 +382,6 @@ def _is_multi_valued_feature_structure(fs: Any) -> bool: return isinstance(fs, FeatureStructure) and (is_array(fs.type) or is_list(fs.type)) -def _is_annotation_fs(fs: FeatureStructure) -> bool: - return hasattr(fs, "begin") and isinstance(fs.begin, int) and hasattr(fs, "end") and isinstance(fs.end, int) - - def _compare_fs( type_: Type, a: FeatureStructure, @@ -395,11 +392,11 @@ def _compare_fs( if a is b: return 0 - # duck-typing check if something is a annotation - if yes, try sorting by offets - fs_a_is_annotation = _is_annotation_fs(a) - fs_b_is_annotation = _is_annotation_fs(b) + # duck-typing check if something is an annotation - if yes, try sorting by offsets + fs_a_is_annotation = is_annotation(a) + fs_b_is_annotation = is_annotation(b) if fs_a_is_annotation != fs_b_is_annotation: - return -1 + return -1 if fs_a_is_annotation else 1 if fs_a_is_annotation and fs_b_is_annotation: begin_cmp = a.begin - b.begin if begin_cmp != 0: @@ -536,7 +533,9 @@ def _escape(value: str) -> str: return value.translate(_ESCAPE_TRANSLATION) -def _abbreviate_middle(value: str, middle: str, max_length: int) -> str: +def _abbreviate_middle(value: Optional[str], middle: str, max_length: int) -> Optional[str]: + if value is None: + return None if len(value) <= max_length: return value @@ -584,7 +583,7 @@ def _render_multi_valued_feature_structure( if values is None: return null_value - if sort_annotations_in_multi_valued_features and all(_is_annotation_fs(value) for value in values): + if sort_annotations_in_multi_valued_features and all(is_annotation(value) for value in values): values = sorted(values, key=lambda value: (value.begin, -value.end, value.type.name)) return _render_sequence( diff --git a/cassis/xmi.py b/cassis/xmi.py index 2e1bfe2..eadfadf 100644 --- a/cassis/xmi.py +++ b/cassis/xmi.py @@ -619,13 +619,19 @@ def _serialize_feature_structure(self, cas: Cas, root: etree.Element, fs: Featur continue # Map back from offsets in Unicode codepoints to UIMA UTF-16 based offsets - if ( - ts.is_instance_of(fs.type.name, TYPE_NAME_ANNOTATION) - and feature_name == FEATURE_BASE_NAME_BEGIN - or feature_name == FEATURE_BASE_NAME_END + # Ensure we only convert begin/end for annotation instances. Parentheses are + # required because `and` has higher precedence than `or` and we must not + # attempt conversion for the END feature on non-annotations. + if ts.is_instance_of(fs.type.name, TYPE_NAME_ANNOTATION) and ( + feature_name == FEATURE_BASE_NAME_BEGIN or feature_name == FEATURE_BASE_NAME_END ): - sofa: Sofa = fs.sofa - value = sofa._offset_converter.python_to_external(value) + # Be defensive: only perform offset conversion if the sofa and its + # offset converter have been initialized. In some workflows (e.g. a + # freshly constructed CAS without sofa strings) the converter may + # not exist yet and conversion is not possible. + sofa = getattr(fs, "sofa", None) + if sofa is not None and getattr(sofa, "_offset_converter", None) is not None: + value = sofa._offset_converter.python_to_external(value) if ts.is_instance_of(feature.rangeType, TYPE_NAME_STRING_ARRAY) and not feature.multipleReferencesAllowed: if value.elements is not None: # Compare to none as not to skip if elements is empty! @@ -683,7 +689,7 @@ def _serialize_view(self, root: etree.Element, view: View): elem = etree.SubElement(root, name) elem.attrib["sofa"] = str(view.sofa.xmiID) - elem.attrib["members"] = " ".join(sorted((str(x.xmiID) for x in view.get_all_annotations()), key=int)) + elem.attrib["members"] = " ".join(sorted((str(x.xmiID) for x in view.get_all_fs()), key=int)) def _collect_list_elements(self, type_name: str, value) -> List[str]: if type_name not in _LIST_TYPES: diff --git a/tests/test_cas.py b/tests/test_cas.py index 3e0dba2..79342c7 100644 --- a/tests/test_cas.py +++ b/tests/test_cas.py @@ -336,8 +336,8 @@ def test_select_only_returns_annotations_of_current_view( view = cas.create_view("testView") view.add_all(sentences) - actual_annotations_in_initial_view = list(cas.get_view("_InitialView").select_all()) - actual_annotations_in_test_view = list(cas.get_view("testView").select_all()) + actual_annotations_in_initial_view = list(cas.get_view("_InitialView").select_all_annotations()) + actual_annotations_in_test_view = list(cas.get_view("testView").select_all_annotations()) assert tokens == actual_annotations_in_initial_view assert sentences == actual_annotations_in_test_view @@ -600,6 +600,25 @@ def test_covered_text_on_non_annotation(): top.get_covered_text() +def test_add_non_annotation_and_select(): + """Create a non-annotation type, add an instance and verify select returns it.""" + cas = Cas() + + # Create a type that is not an annotation (override the default uima.tcas.Annotation supertype) + NonAnnotation = cas.typesystem.create_type("test.NonAnnotation", supertypeName=TYPE_NAME_TOP) + + # Instantiate and add to CAS + fs = NonAnnotation() + cas.add(fs) + + # Should be retrievable by select using the type name + selected = list(cas.select("test.NonAnnotation")) + assert selected == [fs] + + # And visible via select_all_fs + assert fs in cas.select_all_fs() + + def test_covered_text_on_annotation_without_sofa(): cas = Cas() Annotation = cas.typesystem.get_type(TYPE_NAME_ANNOTATION) @@ -609,6 +628,20 @@ def test_covered_text_on_annotation_without_sofa(): ann.get_covered_text() +def test_runtime_generated_annotation_is_detected_and_shown_in_anchor(): + ts = TypeSystem() + # Create a new annotation subtype (should inherit from Annotation base) + MyAnno = ts.create_type("my.pkg.MyAnnotation", supertypeName="uima.tcas.Annotation") + + cas = Cas(ts) + # Create an instance of the runtime-generated type; ensure we can set begin/end + a = MyAnno(begin=5, end=10) + cas.add(a) + + text = cas_to_comparable_text(cas) + assert "MyAnnotation[5-10]" in text + + def test_remove_annotations_in_range(small_typesystem_xml, small_xmi): typesystem = load_typesystem(small_typesystem_xml) cas = load_cas_from_xmi(small_xmi, typesystem) @@ -617,12 +650,12 @@ def test_remove_annotations_in_range(small_typesystem_xml, small_xmi): end = 20 expected_leftover_annotations = [ - annotation for annotation in cas.select_all() if not (begin <= annotation.begin < annotation.end <= end) + annotation for annotation in cas.select_all_annotations() if not (begin <= annotation.begin < annotation.end <= end) ] cas.remove_annotations_in_range(begin, end) - result_leftover_annotations = cas.select_all() + result_leftover_annotations = cas.select_all_annotations() assert len(result_leftover_annotations) == len(expected_leftover_annotations) @@ -639,13 +672,13 @@ def test_remove_annotations_in_range_with_type(small_typesystem_xml, small_xmi): type_ = "cassis.Token" expected_leftover_annotations = [ annotation - for annotation in cas.select_all() + for annotation in cas.select_all_annotations() if not (begin <= annotation.begin < annotation.end <= end and annotation.type.name == type_) ] cas.remove_annotations_in_range(begin, end, type_) - result_leftover_annotations = cas.select_all() + result_leftover_annotations = cas.select_all_annotations() assert len(result_leftover_annotations) == len(expected_leftover_annotations) @@ -665,7 +698,7 @@ def test_crop_sofa_string(small_typesystem_xml, small_xmi): # Snapshot annotations' original offsets so we can compute expected adjusted offsets expected_leftover_annotations = [ (annotation, annotation.begin, annotation.end) - for annotation in cas.select_all() + for annotation in cas.select_all_annotations() if overlapping(begin, end, annotation.begin, annotation.end) ] @@ -674,7 +707,7 @@ def test_crop_sofa_string(small_typesystem_xml, small_xmi): cas.crop_sofa_string(begin, end) assert cas.sofa_string == original_sofa[begin:end] - assert len(cas.select_all()) == len(expected_leftover_annotations) + assert len(cas.select_all_annotations()) == len(expected_leftover_annotations) # Verify offsets were adjusted as expected for the remaining annotations for annotation, orig_begin, orig_end in expected_leftover_annotations: @@ -709,7 +742,7 @@ def test_crop_sofa_string_no_overlap(small_typesystem_xml, small_xmi): # Snapshot annotations' original offsets so we can compute expected adjusted offsets expected_leftover_annotations = [ (annotation, annotation.begin, annotation.end) - for annotation in cas.select_all() + for annotation in cas.select_all_annotations() if begin <= annotation.begin < annotation.end <= end ] @@ -718,7 +751,7 @@ def test_crop_sofa_string_no_overlap(small_typesystem_xml, small_xmi): cas.crop_sofa_string(begin, end, overlap=False) assert cas.sofa_string == original_sofa[begin:end] - assert len(cas.select_all()) == len(expected_leftover_annotations) + assert len(cas.select_all_annotations()) == len(expected_leftover_annotations) # Verify offsets were adjusted as expected for the remaining annotations for annotation, orig_begin, orig_end in expected_leftover_annotations: @@ -819,11 +852,11 @@ def test_crop_sofa_string_various_overlap_cases( assert cas.sofa_string == original_sofa[begin:end] if expect_kept: - assert ann in cas.select_all() + assert ann in cas.select_all_annotations() assert ann.begin == expect_begin assert ann.end == expect_end else: - assert ann not in cas.select_all() + assert ann not in cas.select_all_annotations() def test_crop_sofa_string_transitive_references_remain(small_typesystem_xml): @@ -853,7 +886,7 @@ def test_crop_sofa_string_transitive_references_remain(small_typesystem_xml): cas.crop_sofa_string(begin, end) # Child was outside the cut and therefore removed from the view index - assert child not in cas.select_all() + assert child not in cas.select_all_annotations() # But child is still reachable via parent and will be discovered by traversal all_fs = list(cas._find_all_fs()) diff --git a/tests/test_files/xmi/cas_with_collections.xmi b/tests/test_files/xmi/cas_with_collections.xmi index b3846f7..4e47463 100644 --- a/tests/test_files/xmi/cas_with_collections.xmi +++ b/tests/test_files/xmi/cas_with_collections.xmi @@ -20,7 +20,7 @@ C - + A B C @@ -48,9 +48,9 @@ - + - + diff --git a/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi b/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi index 41577b0..5a96065 100644 --- a/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi +++ b/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi @@ -8,7 +8,7 @@ - + LNC diff --git a/tests/test_files/xmi/cas_with_reserved_names.xmi b/tests/test_files/xmi/cas_with_reserved_names.xmi index a089da0..2634639 100644 --- a/tests/test_files/xmi/cas_with_reserved_names.xmi +++ b/tests/test_files/xmi/cas_with_reserved_names.xmi @@ -3,7 +3,7 @@ xmlns:test="http:///test.ecore" xmi:version="2.0"> - + diff --git a/tests/test_json.py b/tests/test_json.py index 810e421..d29a4ec 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -129,7 +129,7 @@ def test_multi_type_random_serialization_deserialization(): generator.type_count = i + 1 typesystem = generator.generate_type_system() randomized_cas = generator.generate_cas(typesystem) - print(f"CAS size: {sum(len(view.get_all_annotations()) for view in randomized_cas.views)}") + print(f"CAS size: {sum(len(view.get_all_fs()) for view in randomized_cas.views)}") expected_json = randomized_cas.to_json() loaded_cas = load_cas_from_json(expected_json) @@ -144,7 +144,7 @@ def test_multi_feature_random_serialization_deserialization(): generator.size = (i + 1) * 10 typesystem = generator.generate_type_system() randomized_cas = generator.generate_cas(typesystem) - print(f"CAS size: {sum(len(view.get_all_annotations()) for view in randomized_cas.views)}") + print(f"CAS size: {sum(len(view.get_all_fs()) for view in randomized_cas.views)}") expected_json = randomized_cas.to_json() loaded_cas = load_cas_from_json(expected_json) diff --git a/tests/test_util.py b/tests/test_util.py index ad555b1..b86cdfa 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -55,7 +55,7 @@ def test_cas_to_comparable_text_on_multi_feature_random(): generator.size = (i + 1) * 10 typesystem = generator.generate_type_system() randomized_cas = generator.generate_cas(typesystem) - print(f"CAS size: {sum(len(view.get_all_annotations()) for view in randomized_cas.views)}") + print(f"CAS size: {sum(len(view.get_all_fs()) for view in randomized_cas.views)}") cas_to_comparable_text(randomized_cas) # At this point, we are just testing if there is no exception during rendering @@ -66,7 +66,7 @@ def test_cas_to_comparable_text_on_multi_type_random(): generator.size = (i + 1) * 10 typesystem = generator.generate_type_system() randomized_cas = generator.generate_cas(typesystem) - print(f"CAS size: {sum(len(view.get_all_annotations()) for view in randomized_cas.views)}") + print(f"CAS size: {sum(len(view.get_all_fs()) for view in randomized_cas.views)}") cas_to_comparable_text(randomized_cas) # At this point, we are just testing if there is no exception during rendering diff --git a/tests/test_xmi.py b/tests/test_xmi.py index 03d59b7..f1be393 100644 --- a/tests/test_xmi.py +++ b/tests/test_xmi.py @@ -109,8 +109,8 @@ def test_views_are_parsed(small_xmi, small_typesystem_xml): view1 = cas.get_view("sofa1") view2 = cas.get_view("sofa2") - assert 2 == len(list(view1.select_all())) - assert 1 == len(list(view2.select_all())) + assert 2 == len(list(view1.select_all_annotations())) + assert 1 == len(list(view2.select_all_annotations())) def test_deserializing_and_then_adding_annotations_works(small_xmi, small_typesystem_xml):