diff --git a/cassis/__init__.py b/cassis/__init__.py
index 9a5239a..e20cbb4 100644
--- a/cassis/__init__.py
+++ b/cassis/__init__.py
@@ -2,15 +2,24 @@
from .cas import Cas, Sofa, View
from .json import load_cas_from_json
-from .typesystem import TypeSystem, load_dkpro_core_typesystem, load_typesystem, merge_typesystems
+from .typesystem import (
+ Annotation,
+ TypeSystem,
+ is_annotation,
+ load_dkpro_core_typesystem,
+ load_typesystem,
+ merge_typesystems,
+)
from .util import cas_to_comparable_text
from .xmi import load_cas_from_xmi
__all__ = [
+ "Annotation",
"Cas",
"Sofa",
"View",
"TypeSystem",
+ "is_annotation",
"load_typesystem",
"load_dkpro_core_typesystem",
"merge_typesystems",
diff --git a/cassis/cas.py b/cassis/cas.py
index 5bbdafd..213aa52 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -14,16 +14,18 @@
from cassis.typesystem import (
FEATURE_BASE_NAME_HEAD,
FEATURE_BASE_NAME_LANGUAGE,
- TYPE_NAME_DOCUMENT_ANNOTATION,
TYPE_NAME_ANNOTATION,
+ TYPE_NAME_DOCUMENT_ANNOTATION,
TYPE_NAME_FS_ARRAY,
TYPE_NAME_FS_LIST,
TYPE_NAME_SOFA,
FeatureStructure,
+ Annotation,
Type,
TypeCheckError,
TypeSystem,
TypeSystemMode,
+ is_annotation,
)
_validator_optional_string = validators.optional(validators.instance_of(str))
@@ -171,29 +173,59 @@ def type_index(self) -> Dict[str, SortedKeyList]:
"""
return self._indices
+ def add_fs_to_indexes(self, fs: FeatureStructure):
+ """Adds a feature structure to the indexes of this view."""
+ self._indices[fs.type.name].add(fs)
+
+ @deprecation.deprecated(details="Use add_fs_to_indexes()")
def add_annotation_to_index(self, annotation: FeatureStructure):
- self._indices[annotation.type.name].add(annotation)
+ """Adds a feature structure to the indexes of this view.
- def get_all_annotations(self) -> List[FeatureStructure]:
- """Gets all the annotations in this view.
+ .. deprecated::
+ Use :meth:`add_fs_to_indexes`.
+ """
+ self.add_fs_to_indexes(annotation)
+
+ def get_all_fs(self) -> List[FeatureStructure]:
+ """Gets all indexed feature structures in this view.
Returns:
- A list of all annotations in this view.
+ A list of all indexed feature structures (annotations and non-annotations) in this view.
"""
result = []
- for annotations_by_type in self._indices.values():
- result.extend(annotations_by_type)
+ for fs_by_type in self._indices.values():
+ result.extend(fs_by_type)
return result
- def remove_annotation_from_index(self, annotation: FeatureStructure):
- """Removes an annotation from an index. This throws if the
- annotation was not present.
+ @deprecation.deprecated(details="Use get_all_fs() for all indexed feature structures or filter with cassis.typesystem.is_annotation")
+ def get_all_annotations(self) -> List[FeatureStructure]:
+ """Gets all indexed annotations in this view.
+
+ .. deprecated::
+ Use :meth:`get_all_fs` for all indexed feature structures, or filter the result
+ with :func:`cassis.typesystem.is_annotation`.
+ """
+ return [fs for fs in self.get_all_fs() if is_annotation(fs)]
+
+ def remove_fs_from_indexes(self, fs: FeatureStructure):
+ """Removes a feature structure from the indexes of this view. Throws if the
+ feature structure was not present.
Args:
- annotation: The annotation to remove.
+ fs: The feature structure to remove.
"""
- self._indices[annotation.type.name].remove(annotation)
+ self._indices[fs.type.name].remove(fs)
+
+ @deprecation.deprecated(details="Use remove_fs_from_indexes()")
+ def remove_annotation_from_index(self, annotation: FeatureStructure):
+ """Removes a feature structure from the indexes of this view. Throws if the
+ feature structure was not present.
+
+ .. deprecated::
+ Use :meth:`remove_fs_from_indexes`.
+ """
+ self.remove_fs_from_indexes(annotation)
class Index:
@@ -313,29 +345,29 @@ def views(self) -> List[View]:
"""
return list(self._views.values())
- def add(self, annotation: FeatureStructure, keep_id: Optional[bool] = True):
- """Adds an annotation to this Cas.
+ def add(self, fs: FeatureStructure, keep_id: Optional[bool] = True):
+ """Adds a feature structure to this Cas.
Args:
- annotation: The annotation to add.
- keep_id: Keep the XMI id of `annotation` if true, else generate a new one.
+ fs: The feature structure to add.
+ keep_id: Keep the XMI id of `fs` if true, else generate a new one.
"""
- if not self._lenient and not self._typesystem.contains_type(annotation.type.name):
- msg = f"Typesystem of CAS does not contain type [{annotation.type.name}]. "
+ if not self._lenient and not self._typesystem.contains_type(fs.type.name):
+ msg = f"Typesystem of CAS does not contain type [{fs.type.name}]. "
msg += "Either add the type to the type system or specify `lenient=True` when creating the CAS."
raise RuntimeError(msg)
- if keep_id and annotation.xmiID is not None:
- next_id = annotation.xmiID
+ if keep_id and fs.xmiID is not None:
+ next_id = fs.xmiID
else:
next_id = self._get_next_xmi_id()
- annotation.xmiID = next_id
- if hasattr(annotation, "sofa"):
- annotation.sofa = self.get_sofa()
+ fs.xmiID = next_id
+ if hasattr(fs, "sofa"):
+ fs.sofa = self.get_sofa()
- self._current_view.add_annotation_to_index(annotation)
+ self._current_view.add_fs_to_indexes(fs)
@deprecation.deprecated(details="Use add()")
def add_annotation(self, annotation: FeatureStructure, keep_id: Optional[bool] = True):
@@ -404,7 +436,7 @@ def crop_sofa_string(self, sofa_begin: int, sofa_end: int, overlap: bool = True)
self.sofa_string = self.sofa_string[sofa_begin:sofa_end]
# Make an explicit snapshot of the current annotations to avoid
# issues when removing/modifying elements during iteration.
- for annotation in list(self.select_all()):
+ for annotation in list(self.select_all_annotations()):
# Determine whether the annotation will be kept and how its
# offsets need to be adjusted. If offsets are adjusted we must
# reindex the annotation (remove then add) so that the
@@ -412,28 +444,28 @@ def crop_sofa_string(self, sofa_begin: int, sofa_end: int, overlap: bool = True)
# updated begin/end values.
if sofa_begin <= annotation.begin and annotation.end <= sofa_end:
# fully contained
- self._current_view.remove_annotation_from_index(annotation)
+ self._current_view.remove_fs_from_indexes(annotation)
annotation.begin = annotation.begin - sofa_begin
annotation.end = annotation.end - sofa_begin
- self._current_view.add_annotation_to_index(annotation)
+ self._current_view.add_fs_to_indexes(annotation)
elif overlap and sofa_begin < annotation.end <= sofa_end:
# left overlap (annotation starts before cut)
- self._current_view.remove_annotation_from_index(annotation)
+ self._current_view.remove_fs_from_indexes(annotation)
annotation.begin = 0
annotation.end = annotation.end - sofa_begin
- self._current_view.add_annotation_to_index(annotation)
+ self._current_view.add_fs_to_indexes(annotation)
elif overlap and sofa_begin <= annotation.begin < sofa_end:
# right overlap (annotation ends after cut)
- self._current_view.remove_annotation_from_index(annotation)
+ self._current_view.remove_fs_from_indexes(annotation)
annotation.begin = annotation.begin - sofa_begin
annotation.end = len(self.sofa_string)
- self._current_view.add_annotation_to_index(annotation)
+ self._current_view.add_fs_to_indexes(annotation)
elif overlap and annotation.begin <= sofa_begin and sofa_end <= annotation.end:
# annotation fully covers the cut
- self._current_view.remove_annotation_from_index(annotation)
+ self._current_view.remove_fs_from_indexes(annotation)
annotation.begin = 0
annotation.end = len(self.sofa_string)
- self._current_view.add_annotation_to_index(annotation)
+ self._current_view.add_fs_to_indexes(annotation)
else:
# annotation falls completely outside the cut; remove it
self.remove(annotation)
@@ -447,7 +479,7 @@ def remove(self, annotation: FeatureStructure):
Args:
annotation: The annotation to remove.
"""
- self._current_view.remove_annotation_from_index(annotation)
+ self._current_view.remove_fs_from_indexes(annotation)
@deprecation.deprecated(details="Use remove()")
def remove_annotation(self, annotation: FeatureStructure):
@@ -468,17 +500,13 @@ def remove_annotations_in_range(self, begin: int, end: int, type_: Optional[Unio
type_: The type or name of the type name whose annotation instances are to be found
Raises:
ValueError: If range indices are invalid.
+ TypeError: If ``type_`` is not a subtype of ``uima.tcas.Annotation``.
"""
- # If no type is provided, operate on annotation-like feature
- # structures only (those that have `begin` and `end`) to avoid
- # AttributeError for arbitrary FS (e.g., instances of uima.cas.TOP).
if type_ is None:
- # Only operate on annotation-like feature structures to avoid
- # AttributeError for non-annotation FS present in the view.
- annotations = [a for a in self.select_all() if self.typesystem.is_instance_of(a.type, TYPE_NAME_ANNOTATION)]
+ annotations = self.select_all_annotations()
else:
- annotations = self.select(type_)
+ annotations = self.select(self._require_annotation_type(type_, "remove_annotations_in_range"))
if self.sofa_string is None:
raise ValueError("Cannot remove annotations by range: CAS has no sofa string for the current view")
@@ -492,7 +520,7 @@ def remove_annotations_in_range(self, begin: int, end: int, type_: Optional[Unio
raise ValueError(f"Invalid indices for begin {begin} and end {end}")
@deprecation.deprecated(details="Use annotation.get_covered_text()")
- def get_covered_text(self, annotation: FeatureStructure) -> str:
+ def get_covered_text(self, annotation: Annotation) -> str:
"""Gets the text that is covered by `annotation`.
Args:
@@ -518,7 +546,7 @@ def select(self, type_: Union[Type, str]) -> List[FeatureStructure]:
t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_)
return self._get_feature_structures(t)
- def select_covered(self, type_: Union[Type, str], covering_annotation: FeatureStructure) -> List[FeatureStructure]:
+ def select_covered(self, type_: Union[Type, str], covering_annotation: Annotation) -> List[Annotation]:
"""Returns a list of covered annotations.
Return all annotations that are covered
@@ -533,8 +561,11 @@ def select_covered(self, type_: Union[Type, str], covering_annotation: FeatureSt
Returns:
A list of covered annotations
+ Raises:
+ TypeError: If ``type_`` is not a subtype of ``uima.tcas.Annotation``.
+
"""
- t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_)
+ t = self._require_annotation_type(type_, "select_covered")
c_begin = covering_annotation.begin
c_end = covering_annotation.end
@@ -544,7 +575,7 @@ def select_covered(self, type_: Union[Type, str], covering_annotation: FeatureSt
result.append(annotation)
return result
- def select_covering(self, type_: Union[Type, str], covered_annotation: FeatureStructure) -> List[FeatureStructure]:
+ def select_covering(self, type_: Union[Type, str], covered_annotation: Annotation) -> List[Annotation]:
"""Returns a list of annotations that cover the given annotation.
Return all annotations that are covering. This can be potentially be slow.
@@ -559,28 +590,65 @@ def select_covering(self, type_: Union[Type, str], covered_annotation: FeatureSt
Returns:
A list of covering annotations
+ Raises:
+ TypeError: If ``type_`` is not a subtype of ``uima.tcas.Annotation``.
+
"""
- t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_)
+ t = self._require_annotation_type(type_, "select_covering")
c_begin = covered_annotation.begin
c_end = covered_annotation.end
- # We iterate over all annotations and check whether the provided annotation
- # is covered in the current annotation
+ result = []
for annotation in self._get_feature_structures(t):
if c_begin >= annotation.begin and c_end <= annotation.end:
- yield annotation
+ result.append(annotation)
+ return result
- def select_all(self) -> List[FeatureStructure]:
- """Finds all feature structures in this Cas
+ def select_all_fs(self) -> List[FeatureStructure]:
+ """Returns all indexed feature structures (annotations and non-annotations) in the current view.
Returns:
- A list of all annotations in this Cas
+ A list of all indexed feature structures in the current view.
+ """
+ return self._current_view.get_all_fs()
+
+ def select_all_annotations(self) -> List[Annotation]:
+ """Returns all indexed annotations in the current view.
+
+ Non-annotation feature structures present in the view are filtered out, so it is safe
+ to access ``begin``/``end`` on the returned items.
+ Returns:
+ A list of all indexed annotations in the current view.
"""
- return self._current_view.get_all_annotations()
+ return [fs for fs in self._current_view.get_all_fs() if is_annotation(fs)]
+
+ @deprecation.deprecated(details="Use select_all_annotations() for annotations only or select_all_fs() for all indexed feature structures")
+ def select_all(self) -> List[Annotation]:
+ """Finds all annotations in this Cas.
+
+ .. deprecated::
+ Use :meth:`select_all_annotations` for annotations only, or
+ :meth:`select_all_fs` for all indexed feature structures.
+ """
+ return self.select_all_annotations()
# FS handling
+ def _require_annotation_type(self, type_: Union[Type, str], operation: str) -> Type:
+ """Resolves ``type_`` and validates it is a subtype of ``uima.tcas.Annotation``.
+
+ Raises:
+ TypeError: If the resolved type is not an annotation type.
+ """
+ t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_)
+ if not self.typesystem.is_instance_of(t, TYPE_NAME_ANNOTATION):
+ raise TypeError(
+ f"Type [{t.name}] is not a subtype of [{TYPE_NAME_ANNOTATION}]; "
+ f"{operation} only operates on annotation types"
+ )
+ return t
+
def _get_feature_structures(self, type_: Type) -> List[FeatureStructure]:
"""Returns a list of all feature structures of type `type_name` and child types."""
types = {c.name for c in type_.descendants}
@@ -838,7 +906,7 @@ def _find_all_fs(
else:
for sofa in self.sofas:
view = self.get_view(sofa.sofaID)
- openlist.extend(view.select_all())
+ openlist.extend(view.select_all_fs())
ts = self.typesystem
while openlist:
@@ -939,8 +1007,9 @@ def _copy(self) -> "Cas":
def _sort_func(a: FeatureStructure) -> Tuple[int, int, int]:
- d = a.__slots__
- if "begin" in d and "end" in d:
- return a.begin, a.end, id(a)
- else:
- return sys.maxsize, sys.maxsize, id(a)
+ xmi_id = getattr(a, "xmiID", None)
+ tiebreaker = xmi_id if xmi_id is not None else id(a)
+ if is_annotation(a):
+ return a.begin, a.end, tiebreaker
+ # Non-annotation feature structures sort after annotations.
+ return sys.maxsize, sys.maxsize, tiebreaker
diff --git a/cassis/json.py b/cassis/json.py
index 6c39384..2a20090 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -565,7 +565,7 @@ def _serialize_ref(self, fs) -> int:
def _serialize_view(self, view: View):
return {
VIEW_SOFA_FIELD: view.sofa.xmiID,
- VIEW_MEMBERS_FIELD: sorted(x.xmiID for x in view.get_all_annotations()),
+ VIEW_MEMBERS_FIELD: sorted(x.xmiID for x in view.get_all_fs()),
}
def _to_external_type_name(self, type_name: str):
diff --git a/cassis/typesystem.py b/cassis/typesystem.py
index 274b819..104f9b8 100644
--- a/cassis/typesystem.py
+++ b/cassis/typesystem.py
@@ -6,7 +6,7 @@
from io import BytesIO
from itertools import chain, filterfalse
from pathlib import Path
-from typing import IO, Any, Callable, Dict, Iterator, List, Optional, Set, Union
+from typing import IO, Any, Callable, Dict, Iterator, List, Optional, Set, TypeGuard, Union
import attr
from deprecation import deprecated
@@ -500,6 +500,23 @@ def __repr__(self):
return str(self)
+@attr.s(slots=True, eq=False, order=False, repr=False)
+class Annotation(FeatureStructure):
+ """Concrete base class for annotation instances.
+
+ Generated types that represent (subtypes of) `uima.tcas.Annotation` will
+ inherit from this class so that static typing can rely on a nominal base
+ providing `begin` and `end`.
+ """
+
+ begin: int = attr.ib(default=0)
+ end: int = attr.ib(default=0)
+
+
+def is_annotation(fs: FeatureStructure) -> TypeGuard[Annotation]:
+ return isinstance(fs, Annotation)
+
+
@attr.s(slots=True, eq=False, order=False, repr=False)
class Feature:
"""A feature defines one attribute of a feature structure"""
@@ -572,15 +589,46 @@ class Type:
def __attrs_post_init__(self):
"""Build the constructor that can create feature structures of this type"""
name = _string_to_valid_classname(self.name)
- fields = {feature.name: attr.ib(default=None, repr=(feature.name != "sofa")) for feature in self.all_features}
+
+ # Determine whether this type is (transitively) a subtype of uima.tcas.Annotation
+ def _is_annotation_type(t: "Type") -> bool:
+ cur = t
+ while cur is not None:
+ if cur.name == TYPE_NAME_ANNOTATION:
+ return True
+ cur = cur.supertype
+ return False
+
+ is_annotation_type = _is_annotation_type(self)
+
+ # When inheriting from our concrete Annotation base, do not redeclare
+ # the 'begin' and 'end' features as fields; they are already present.
+ fields = {}
+ for feature in self.all_features:
+ if feature.name in {"begin", "end"} and is_annotation_type:
+ # skip - Annotation base provides these
+ continue
+ fields[feature.name] = attr.ib(default=None, repr=(feature.name != "sofa"))
fields["type"] = attr.ib(default=self)
# We assign this to a lambda to make it lazy
# When creating large type systems, almost no types are used so
# creating them on the fly is on average better
- self._constructor_fn = lambda: attr.make_class(
- name, fields, bases=(FeatureStructure,), slots=True, eq=False, order=False
- )
+ bases = (Annotation,) if is_annotation_type else (FeatureStructure,)
+
+ def _make_fs_class():
+ cls = attr.make_class(name, fields, bases=bases, slots=True, eq=False, order=False)
+ # Ensure generated FS classes are hashable. When a class defines an
+ # __eq__ (inherited or generated) but no __hash__, Python makes
+ # instances unhashable. We want FeatureStructure-based instances to
+ # be usable as dict/set keys (they are keyed by xmiID), so assign the
+ # base FeatureStructure.__hash__ implementation to the generated
+ # class if it doesn't already provide one.
+ if getattr(cls, "__hash__", None) is None:
+ cls.__hash__ = FeatureStructure.__hash__
+ return cls
+
+ self._constructor_fn = _make_fs_class
def __call__(self, **kwargs) -> FeatureStructure:
"""Creates an feature structure of this type
diff --git a/cassis/util.py b/cassis/util.py
index 1d50755..6a6b632 100644
--- a/cassis/util.py
+++ b/cassis/util.py
@@ -26,6 +26,7 @@
TYPE_NAME_STRING_ARRAY,
FeatureStructure,
Type,
+ is_annotation,
is_array,
is_list,
)
@@ -205,7 +206,7 @@ def _render_feature_structure(
if indexed_column:
row_data.append(_bool_to_java_string(id(fs) in indexed_feature_structure_ids))
- if max_covered_text > 0 and _is_annotation_fs(fs):
+ if max_covered_text > 0 and is_annotation(fs):
covered_text_value = _abbreviate_middle(fs.get_covered_text(), "...", max_covered_text)
row_data.append(_escape(_render_string_value(covered_text_value, treat_empty_strings_as_null, null_value)))
@@ -280,7 +281,7 @@ def _get_indexed_feature_structures(cas: Cas) -> Iterable[FeatureStructure]:
feature_structures = []
for sofa in cas.sofas:
view = cas.get_view(sofa.sofaID)
- feature_structures.extend(view.select_all())
+ feature_structures.extend(view.select_all_fs())
return feature_structures
@@ -354,7 +355,7 @@ def _generate_anchor(
) -> str:
anchor = fs.type.name.rsplit(".", 2)[-1] # Get the short type name (no package)
- if include_offsets and _is_annotation_fs(fs):
+ if include_offsets and is_annotation(fs):
anchor += f"[{fs.begin}-{fs.end}]"
if add_index_mark:
@@ -381,10 +382,6 @@ def _is_multi_valued_feature_structure(fs: Any) -> bool:
return isinstance(fs, FeatureStructure) and (is_array(fs.type) or is_list(fs.type))
-def _is_annotation_fs(fs: FeatureStructure) -> bool:
- return hasattr(fs, "begin") and isinstance(fs.begin, int) and hasattr(fs, "end") and isinstance(fs.end, int)
-
-
def _compare_fs(
type_: Type,
a: FeatureStructure,
@@ -395,11 +392,11 @@ def _compare_fs(
if a is b:
return 0
- # duck-typing check if something is a annotation - if yes, try sorting by offets
- fs_a_is_annotation = _is_annotation_fs(a)
- fs_b_is_annotation = _is_annotation_fs(b)
+ # duck-typing check if something is an annotation - if yes, try sorting by offsets
+ fs_a_is_annotation = is_annotation(a)
+ fs_b_is_annotation = is_annotation(b)
if fs_a_is_annotation != fs_b_is_annotation:
- return -1
+ return -1 if fs_a_is_annotation else 1
if fs_a_is_annotation and fs_b_is_annotation:
begin_cmp = a.begin - b.begin
if begin_cmp != 0:
@@ -536,7 +533,9 @@ def _escape(value: str) -> str:
return value.translate(_ESCAPE_TRANSLATION)
-def _abbreviate_middle(value: str, middle: str, max_length: int) -> str:
+def _abbreviate_middle(value: Optional[str], middle: str, max_length: int) -> Optional[str]:
+ if value is None:
+ return None
if len(value) <= max_length:
return value
@@ -584,7 +583,7 @@ def _render_multi_valued_feature_structure(
if values is None:
return null_value
- if sort_annotations_in_multi_valued_features and all(_is_annotation_fs(value) for value in values):
+ if sort_annotations_in_multi_valued_features and all(is_annotation(value) for value in values):
values = sorted(values, key=lambda value: (value.begin, -value.end, value.type.name))
return _render_sequence(
diff --git a/cassis/xmi.py b/cassis/xmi.py
index 2e1bfe2..eadfadf 100644
--- a/cassis/xmi.py
+++ b/cassis/xmi.py
@@ -619,13 +619,19 @@ def _serialize_feature_structure(self, cas: Cas, root: etree.Element, fs: Featur
continue
# Map back from offsets in Unicode codepoints to UIMA UTF-16 based offsets
- if (
- ts.is_instance_of(fs.type.name, TYPE_NAME_ANNOTATION)
- and feature_name == FEATURE_BASE_NAME_BEGIN
- or feature_name == FEATURE_BASE_NAME_END
+ # Ensure we only convert begin/end for annotation instances. Parentheses are
+ # required because `and` has higher precedence than `or` and we must not
+ # attempt conversion for the END feature on non-annotations.
+ if ts.is_instance_of(fs.type.name, TYPE_NAME_ANNOTATION) and (
+ feature_name == FEATURE_BASE_NAME_BEGIN or feature_name == FEATURE_BASE_NAME_END
):
- sofa: Sofa = fs.sofa
- value = sofa._offset_converter.python_to_external(value)
+ # Be defensive: only perform offset conversion if the sofa and its
+ # offset converter have been initialized. In some workflows (e.g. a
+ # freshly constructed CAS without sofa strings) the converter may
+ # not exist yet and conversion is not possible.
+ sofa = getattr(fs, "sofa", None)
+ if sofa is not None and getattr(sofa, "_offset_converter", None) is not None:
+ value = sofa._offset_converter.python_to_external(value)
if ts.is_instance_of(feature.rangeType, TYPE_NAME_STRING_ARRAY) and not feature.multipleReferencesAllowed:
if value.elements is not None: # Compare to none as not to skip if elements is empty!
@@ -683,7 +689,7 @@ def _serialize_view(self, root: etree.Element, view: View):
elem = etree.SubElement(root, name)
elem.attrib["sofa"] = str(view.sofa.xmiID)
- elem.attrib["members"] = " ".join(sorted((str(x.xmiID) for x in view.get_all_annotations()), key=int))
+ elem.attrib["members"] = " ".join(sorted((str(x.xmiID) for x in view.get_all_fs()), key=int))
def _collect_list_elements(self, type_name: str, value) -> List[str]:
if type_name not in _LIST_TYPES:
diff --git a/tests/test_cas.py b/tests/test_cas.py
index 3e0dba2..79342c7 100644
--- a/tests/test_cas.py
+++ b/tests/test_cas.py
@@ -336,8 +336,8 @@ def test_select_only_returns_annotations_of_current_view(
view = cas.create_view("testView")
view.add_all(sentences)
- actual_annotations_in_initial_view = list(cas.get_view("_InitialView").select_all())
- actual_annotations_in_test_view = list(cas.get_view("testView").select_all())
+ actual_annotations_in_initial_view = list(cas.get_view("_InitialView").select_all_annotations())
+ actual_annotations_in_test_view = list(cas.get_view("testView").select_all_annotations())
assert tokens == actual_annotations_in_initial_view
assert sentences == actual_annotations_in_test_view
@@ -600,6 +600,25 @@ def test_covered_text_on_non_annotation():
top.get_covered_text()
+def test_add_non_annotation_and_select():
+ """Create a non-annotation type, add an instance and verify select returns it."""
+ cas = Cas()
+
+ # Create a type that is not an annotation (override the default uima.tcas.Annotation supertype)
+ NonAnnotation = cas.typesystem.create_type("test.NonAnnotation", supertypeName=TYPE_NAME_TOP)
+
+ # Instantiate and add to CAS
+ fs = NonAnnotation()
+ cas.add(fs)
+
+ # Should be retrievable by select using the type name
+ selected = list(cas.select("test.NonAnnotation"))
+ assert selected == [fs]
+
+ # And visible via select_all_fs
+ assert fs in cas.select_all_fs()
+
+
def test_covered_text_on_annotation_without_sofa():
cas = Cas()
Annotation = cas.typesystem.get_type(TYPE_NAME_ANNOTATION)
@@ -609,6 +628,20 @@ def test_covered_text_on_annotation_without_sofa():
ann.get_covered_text()
+def test_runtime_generated_annotation_is_detected_and_shown_in_anchor():
+ ts = TypeSystem()
+ # Create a new annotation subtype (should inherit from Annotation base)
+ MyAnno = ts.create_type("my.pkg.MyAnnotation", supertypeName="uima.tcas.Annotation")
+
+ cas = Cas(ts)
+ # Create an instance of the runtime-generated type; ensure we can set begin/end
+ a = MyAnno(begin=5, end=10)
+ cas.add(a)
+
+ text = cas_to_comparable_text(cas)
+ assert "MyAnnotation[5-10]" in text
+
+
def test_remove_annotations_in_range(small_typesystem_xml, small_xmi):
typesystem = load_typesystem(small_typesystem_xml)
cas = load_cas_from_xmi(small_xmi, typesystem)
@@ -617,12 +650,12 @@ def test_remove_annotations_in_range(small_typesystem_xml, small_xmi):
end = 20
expected_leftover_annotations = [
- annotation for annotation in cas.select_all() if not (begin <= annotation.begin < annotation.end <= end)
+ annotation for annotation in cas.select_all_annotations() if not (begin <= annotation.begin < annotation.end <= end)
]
cas.remove_annotations_in_range(begin, end)
- result_leftover_annotations = cas.select_all()
+ result_leftover_annotations = cas.select_all_annotations()
assert len(result_leftover_annotations) == len(expected_leftover_annotations)
@@ -639,13 +672,13 @@ def test_remove_annotations_in_range_with_type(small_typesystem_xml, small_xmi):
type_ = "cassis.Token"
expected_leftover_annotations = [
annotation
- for annotation in cas.select_all()
+ for annotation in cas.select_all_annotations()
if not (begin <= annotation.begin < annotation.end <= end and annotation.type.name == type_)
]
cas.remove_annotations_in_range(begin, end, type_)
- result_leftover_annotations = cas.select_all()
+ result_leftover_annotations = cas.select_all_annotations()
assert len(result_leftover_annotations) == len(expected_leftover_annotations)
@@ -665,7 +698,7 @@ def test_crop_sofa_string(small_typesystem_xml, small_xmi):
# Snapshot annotations' original offsets so we can compute expected adjusted offsets
expected_leftover_annotations = [
(annotation, annotation.begin, annotation.end)
- for annotation in cas.select_all()
+ for annotation in cas.select_all_annotations()
if overlapping(begin, end, annotation.begin, annotation.end)
]
@@ -674,7 +707,7 @@ def test_crop_sofa_string(small_typesystem_xml, small_xmi):
cas.crop_sofa_string(begin, end)
assert cas.sofa_string == original_sofa[begin:end]
- assert len(cas.select_all()) == len(expected_leftover_annotations)
+ assert len(cas.select_all_annotations()) == len(expected_leftover_annotations)
# Verify offsets were adjusted as expected for the remaining annotations
for annotation, orig_begin, orig_end in expected_leftover_annotations:
@@ -709,7 +742,7 @@ def test_crop_sofa_string_no_overlap(small_typesystem_xml, small_xmi):
# Snapshot annotations' original offsets so we can compute expected adjusted offsets
expected_leftover_annotations = [
(annotation, annotation.begin, annotation.end)
- for annotation in cas.select_all()
+ for annotation in cas.select_all_annotations()
if begin <= annotation.begin < annotation.end <= end
]
@@ -718,7 +751,7 @@ def test_crop_sofa_string_no_overlap(small_typesystem_xml, small_xmi):
cas.crop_sofa_string(begin, end, overlap=False)
assert cas.sofa_string == original_sofa[begin:end]
- assert len(cas.select_all()) == len(expected_leftover_annotations)
+ assert len(cas.select_all_annotations()) == len(expected_leftover_annotations)
# Verify offsets were adjusted as expected for the remaining annotations
for annotation, orig_begin, orig_end in expected_leftover_annotations:
@@ -819,11 +852,11 @@ def test_crop_sofa_string_various_overlap_cases(
assert cas.sofa_string == original_sofa[begin:end]
if expect_kept:
- assert ann in cas.select_all()
+ assert ann in cas.select_all_annotations()
assert ann.begin == expect_begin
assert ann.end == expect_end
else:
- assert ann not in cas.select_all()
+ assert ann not in cas.select_all_annotations()
def test_crop_sofa_string_transitive_references_remain(small_typesystem_xml):
@@ -853,7 +886,7 @@ def test_crop_sofa_string_transitive_references_remain(small_typesystem_xml):
cas.crop_sofa_string(begin, end)
# Child was outside the cut and therefore removed from the view index
- assert child not in cas.select_all()
+ assert child not in cas.select_all_annotations()
# But child is still reachable via parent and will be discovered by traversal
all_fs = list(cas._find_all_fs())
diff --git a/tests/test_files/xmi/cas_with_collections.xmi b/tests/test_files/xmi/cas_with_collections.xmi
index b3846f7..4e47463 100644
--- a/tests/test_files/xmi/cas_with_collections.xmi
+++ b/tests/test_files/xmi/cas_with_collections.xmi
@@ -20,7 +20,7 @@
C
-
+
A
B
C
@@ -48,9 +48,9 @@
-
+
-
+
diff --git a/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi b/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi
index 41577b0..5a96065 100644
--- a/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi
+++ b/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi
@@ -8,7 +8,7 @@
-
+
LNC
diff --git a/tests/test_files/xmi/cas_with_reserved_names.xmi b/tests/test_files/xmi/cas_with_reserved_names.xmi
index a089da0..2634639 100644
--- a/tests/test_files/xmi/cas_with_reserved_names.xmi
+++ b/tests/test_files/xmi/cas_with_reserved_names.xmi
@@ -3,7 +3,7 @@
xmlns:test="http:///test.ecore" xmi:version="2.0">
-
+
diff --git a/tests/test_json.py b/tests/test_json.py
index 810e421..d29a4ec 100644
--- a/tests/test_json.py
+++ b/tests/test_json.py
@@ -129,7 +129,7 @@ def test_multi_type_random_serialization_deserialization():
generator.type_count = i + 1
typesystem = generator.generate_type_system()
randomized_cas = generator.generate_cas(typesystem)
- print(f"CAS size: {sum(len(view.get_all_annotations()) for view in randomized_cas.views)}")
+ print(f"CAS size: {sum(len(view.get_all_fs()) for view in randomized_cas.views)}")
expected_json = randomized_cas.to_json()
loaded_cas = load_cas_from_json(expected_json)
@@ -144,7 +144,7 @@ def test_multi_feature_random_serialization_deserialization():
generator.size = (i + 1) * 10
typesystem = generator.generate_type_system()
randomized_cas = generator.generate_cas(typesystem)
- print(f"CAS size: {sum(len(view.get_all_annotations()) for view in randomized_cas.views)}")
+ print(f"CAS size: {sum(len(view.get_all_fs()) for view in randomized_cas.views)}")
expected_json = randomized_cas.to_json()
loaded_cas = load_cas_from_json(expected_json)
diff --git a/tests/test_util.py b/tests/test_util.py
index ad555b1..b86cdfa 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -55,7 +55,7 @@ def test_cas_to_comparable_text_on_multi_feature_random():
generator.size = (i + 1) * 10
typesystem = generator.generate_type_system()
randomized_cas = generator.generate_cas(typesystem)
- print(f"CAS size: {sum(len(view.get_all_annotations()) for view in randomized_cas.views)}")
+ print(f"CAS size: {sum(len(view.get_all_fs()) for view in randomized_cas.views)}")
cas_to_comparable_text(randomized_cas)
# At this point, we are just testing if there is no exception during rendering
@@ -66,7 +66,7 @@ def test_cas_to_comparable_text_on_multi_type_random():
generator.size = (i + 1) * 10
typesystem = generator.generate_type_system()
randomized_cas = generator.generate_cas(typesystem)
- print(f"CAS size: {sum(len(view.get_all_annotations()) for view in randomized_cas.views)}")
+ print(f"CAS size: {sum(len(view.get_all_fs()) for view in randomized_cas.views)}")
cas_to_comparable_text(randomized_cas)
# At this point, we are just testing if there is no exception during rendering
diff --git a/tests/test_xmi.py b/tests/test_xmi.py
index 03d59b7..f1be393 100644
--- a/tests/test_xmi.py
+++ b/tests/test_xmi.py
@@ -109,8 +109,8 @@ def test_views_are_parsed(small_xmi, small_typesystem_xml):
view1 = cas.get_view("sofa1")
view2 = cas.get_view("sofa2")
- assert 2 == len(list(view1.select_all()))
- assert 1 == len(list(view2.select_all()))
+ assert 2 == len(list(view1.select_all_annotations()))
+ assert 1 == len(list(view2.select_all_annotations()))
def test_deserializing_and_then_adding_annotations_works(small_xmi, small_typesystem_xml):