dkpro · reckart · May 6, 2026 · Nov 10, 2025 · May 6, 2026 · May 6, 2026
diff --git a/cassis/cas.py b/cassis/cas.py
@@ -20,10 +20,12 @@
     TYPE_NAME_FS_LIST,
     TYPE_NAME_SOFA,
     FeatureStructure,
+    Annotation,
     Type,
     TypeCheckError,
     TypeSystem,
     TypeSystemMode,
+    is_annotation,
 )
 
 _validator_optional_string = validators.optional(validators.instance_of(str))
@@ -172,13 +174,14 @@ def type_index(self) -> Dict[str, SortedKeyList]:
         return self._indices
 
     def add_annotation_to_index(self, annotation: FeatureStructure):
+        """Adds a feature structure to the type index for this view."""
         self._indices[annotation.type.name].add(annotation)
 
     def get_all_annotations(self) -> List[FeatureStructure]:
-        """Gets all the annotations in this view.
+        """Gets all the FeatureStructure in this view.
 
         Returns:
-            A list of all annotations in this view.
+            A list of all FeatureStructure in this view.
 
         """
         result = []
@@ -335,6 +338,8 @@ def add(self, annotation: FeatureStructure, keep_id: Optional[bool] = True):
         if hasattr(annotation, "sofa"):
             annotation.sofa = self.get_sofa()
 
+        # Add to the index. The view index accepts any FeatureStructure;
+        # `_sort_func` will duck-type annotation-like objects when sorting.
         self._current_view.add_annotation_to_index(annotation)
 
     @deprecation.deprecated(details="Use add()")
@@ -492,7 +497,7 @@ def remove_annotations_in_range(self, begin: int, end: int, type_: Optional[Unio
             raise ValueError(f"Invalid indices for begin {begin} and end {end}")
 
     @deprecation.deprecated(details="Use annotation.get_covered_text()")
-    def get_covered_text(self, annotation: FeatureStructure) -> str:
+    def get_covered_text(self, annotation: Annotation) -> str:
         """Gets the text that is covered by `annotation`.
 
         Args:
@@ -518,7 +523,7 @@ def select(self, type_: Union[Type, str]) -> List[FeatureStructure]:
         t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_)
         return self._get_feature_structures(t)
 
-    def select_covered(self, type_: Union[Type, str], covering_annotation: FeatureStructure) -> List[FeatureStructure]:
+    def select_covered(self, type_: Union[Type, str], covering_annotation: Annotation) -> List[Annotation]:
         """Returns a list of covered annotations.
 
         Return all annotations that are covered
@@ -544,7 +549,7 @@ def select_covered(self, type_: Union[Type, str], covering_annotation: FeatureSt
                 result.append(annotation)
         return result
 
-    def select_covering(self, type_: Union[Type, str], covered_annotation: FeatureStructure) -> List[FeatureStructure]:
+    def select_covering(self, type_: Union[Type, str], covered_annotation: Annotation) -> List[FeatureStructure]:
         """Returns a list of annotations that cover the given annotation.
 
         Return all annotations that are covering. This can be potentially be slow.
@@ -570,7 +575,7 @@ def select_covering(self, type_: Union[Type, str], covered_annotation: FeatureSt
             if c_begin >= annotation.begin and c_end <= annotation.end:
                 yield annotation
 
-    def select_all(self) -> List[FeatureStructure]:
+    def select_all(self) -> List[Annotation]:
         """Finds all feature structures in this Cas
 
         Returns:
@@ -939,8 +944,8 @@ def _copy(self) -> "Cas":
 
 
 def _sort_func(a: FeatureStructure) -> Tuple[int, int, int]:
-    d = a.__slots__
-    if "begin" in d and "end" in d:
-        return a.begin, a.end, id(a)
-    else:
-        return sys.maxsize, sys.maxsize, id(a)
+    if is_annotation(a):
+        return a.begin, a.end, a.xmiID if getattr(a, "xmiID", None) is not None else id(a)
+
+    # Non-annotation feature structures are sorted after annotations using large sentinels
+    return sys.maxsize, sys.maxsize, a.xmiID if getattr(a, "xmiID", None) is not None else id(a)
diff --git a/cassis/typesystem.py b/cassis/typesystem.py
@@ -500,6 +500,23 @@ def __repr__(self):
         return str(self)
 
 
+@attr.s(slots=True, hash=False, eq=True, order=True, repr=False)
+class Annotation(FeatureStructure):
+    """Concrete base class for annotation instances.
+
+    Generated types that represent (subtypes of) `uima.tcas.Annotation` will
+    inherit from this class so that static typing can rely on a nominal base
+    providing `begin` and `end`.
+    """
+
+    begin: int = attr.ib(default=0)
+    end: int = attr.ib(default=0)
+
+
+def is_annotation(fs: FeatureStructure) -> bool:
+    return hasattr(fs, "begin") and isinstance(fs.begin, int) and hasattr(fs, "end") and isinstance(fs.end, int)
+
+
 @attr.s(slots=True, eq=False, order=False, repr=False)
 class Feature:
     """A feature defines one attribute of a feature structure"""
@@ -572,15 +589,44 @@ class Type:
     def __attrs_post_init__(self):
         """Build the constructor that can create feature structures of this type"""
         name = _string_to_valid_classname(self.name)
-        fields = {feature.name: attr.ib(default=None, repr=(feature.name != "sofa")) for feature in self.all_features}
+
+        # Determine whether this type is (transitively) a subtype of uima.tcas.Annotation
+        def _is_annotation_type(t: "Type") -> bool:
+            cur = t
+            while cur is not None:
+                if cur.name == TYPE_NAME_ANNOTATION:
+                    return True
+                cur = cur.supertype
+            return False
+
+        # When inheriting from our concrete Annotation base, do not redeclare
+        # the 'begin' and 'end' features as fields; they are already present.
+        fields = {}
+        for feature in self.all_features:
+            if feature.name in {"begin", "end"} and _is_annotation_type(self):
+                # skip - Annotation base provides these
+                continue
+            fields[feature.name] = attr.ib(default=None, repr=(feature.name != "sofa"))
         fields["type"] = attr.ib(default=self)
 
         # We assign this to a lambda to make it lazy
         # When creating large type systems, almost no types are used so
         # creating them on the fly is on average better
-        self._constructor_fn = lambda: attr.make_class(
-            name, fields, bases=(FeatureStructure,), slots=True, eq=False, order=False
-        )
+        bases = (Annotation,) if _is_annotation_type(self) else (FeatureStructure,)
+
+        def _make_fs_class():
+            cls = attr.make_class(name, fields, bases=bases, slots=True, eq=False, order=False)
+            # Ensure generated FS classes are hashable. When a class defines an
+            # __eq__ (inherited or generated) but no __hash__, Python makes
+            # instances unhashable. We want FeatureStructure-based instances to
+            # be usable as dict/set keys (they are keyed by xmiID), so assign the
+            # base FeatureStructure.__hash__ implementation to the generated
+            # class if it doesn't already provide one.
+            if getattr(cls, "__hash__", None) is None:
+                cls.__hash__ = FeatureStructure.__hash__
+            return cls
+
+        self._constructor_fn = _make_fs_class
 
     def __call__(self, **kwargs) -> FeatureStructure:
         """Creates an feature structure of this type

diff --git a/cassis/util.py b/cassis/util.py
@@ -26,6 +26,7 @@
     TYPE_NAME_STRING_ARRAY,
     FeatureStructure,
     Type,
+    is_annotation,
     is_array,
     is_list,
 )
@@ -205,7 +206,7 @@ def _render_feature_structure(
     if indexed_column:
         row_data.append(_bool_to_java_string(id(fs) in indexed_feature_structure_ids))
 
-    if max_covered_text > 0 and _is_annotation_fs(fs):
+    if max_covered_text > 0 and is_annotation(fs):
         covered_text_value = _abbreviate_middle(fs.get_covered_text(), "...", max_covered_text)
         row_data.append(_escape(_render_string_value(covered_text_value, treat_empty_strings_as_null, null_value)))
 
@@ -354,7 +355,7 @@ def _generate_anchor(
 ) -> str:
     anchor = fs.type.name.rsplit(".", 2)[-1]  # Get the short type name (no package)
 
-    if include_offsets and _is_annotation_fs(fs):
+    if include_offsets and is_annotation(fs):
         anchor += f"[{fs.begin}-{fs.end}]"
 
     if add_index_mark:
@@ -381,10 +382,6 @@ def _is_multi_valued_feature_structure(fs: Any) -> bool:
     return isinstance(fs, FeatureStructure) and (is_array(fs.type) or is_list(fs.type))
 
 
-def _is_annotation_fs(fs: FeatureStructure) -> bool:
-    return hasattr(fs, "begin") and isinstance(fs.begin, int) and hasattr(fs, "end") and isinstance(fs.end, int)
-
-
 def _compare_fs(
     type_: Type,
     a: FeatureStructure,
@@ -396,8 +393,8 @@ def _compare_fs(
         return 0
 
     # duck-typing check if something is a annotation - if yes, try sorting by offets
-    fs_a_is_annotation = _is_annotation_fs(a)
-    fs_b_is_annotation = _is_annotation_fs(b)
+    fs_a_is_annotation = is_annotation(a)
+    fs_b_is_annotation = is_annotation(b)
     if fs_a_is_annotation != fs_b_is_annotation:
         return -1
     if fs_a_is_annotation and fs_b_is_annotation:
@@ -536,7 +533,9 @@ def _escape(value: str) -> str:
     return value.translate(_ESCAPE_TRANSLATION)
 
 
-def _abbreviate_middle(value: str, middle: str, max_length: int) -> str:
+def _abbreviate_middle(value: Optional[str], middle: str, max_length: int) -> Optional[str]:
+    if value is None:
+        return None
     if len(value) <= max_length:
         return value
 
@@ -584,7 +583,7 @@ def _render_multi_valued_feature_structure(
     if values is None:
         return null_value
 
-    if sort_annotations_in_multi_valued_features and all(_is_annotation_fs(value) for value in values):
+    if sort_annotations_in_multi_valued_features and all(is_annotation(value) for value in values):
         values = sorted(values, key=lambda value: (value.begin, -value.end, value.type.name))
 
     return _render_sequence(

diff --git a/cassis/xmi.py b/cassis/xmi.py
@@ -619,13 +619,19 @@ def _serialize_feature_structure(self, cas: Cas, root: etree.Element, fs: Featur
                 continue
 
             # Map back from offsets in Unicode codepoints to UIMA UTF-16 based offsets
-            if (
-                ts.is_instance_of(fs.type.name, TYPE_NAME_ANNOTATION)
-                and feature_name == FEATURE_BASE_NAME_BEGIN
-                or feature_name == FEATURE_BASE_NAME_END
+            # Ensure we only convert begin/end for annotation instances. Parentheses are
+            # required because `and` has higher precedence than `or` and we must not
+            # attempt conversion for the END feature on non-annotations.
+            if ts.is_instance_of(fs.type.name, TYPE_NAME_ANNOTATION) and (
+                feature_name == FEATURE_BASE_NAME_BEGIN or feature_name == FEATURE_BASE_NAME_END
             ):
-                sofa: Sofa = fs.sofa
-                value = sofa._offset_converter.python_to_external(value)
+                # Be defensive: only perform offset conversion if the sofa and its
+                # offset converter have been initialized. In some workflows (e.g. a
+                # freshly constructed CAS without sofa strings) the converter may
+                # not exist yet and conversion is not possible.
+                sofa = getattr(fs, "sofa", None)
+                if sofa is not None and getattr(sofa, "_offset_converter", None) is not None:
+                    value = sofa._offset_converter.python_to_external(value)
 
             if ts.is_instance_of(feature.rangeType, TYPE_NAME_STRING_ARRAY) and not feature.multipleReferencesAllowed:
                 if value.elements is not None:  # Compare to none as not to skip if elements is empty!

diff --git a/tests/test_cas.py b/tests/test_cas.py
@@ -600,6 +600,25 @@ def test_covered_text_on_non_annotation():
         top.get_covered_text()
 
 
+def test_add_non_annotation_and_select():
+    """Create a non-annotation type, add an instance and verify select returns it."""
+    cas = Cas()
+
+    # Create a type that does not define annotation offsets (begin/end)
+    NonAnnotation = cas.typesystem.create_type("test.NonAnnotation")
+
+    # Instantiate and add to CAS
+    fs = NonAnnotation()
+    cas.add(fs)
+
+    # Should be retrievable by select using the type name
+    selected = list(cas.select("test.NonAnnotation"))
+    assert selected == [fs]
+
+    # And visible via select_all
+    assert fs in cas.select_all()
+
+
 def test_covered_text_on_annotation_without_sofa():
     cas = Cas()
     Annotation = cas.typesystem.get_type(TYPE_NAME_ANNOTATION)
@@ -609,6 +628,20 @@ def test_covered_text_on_annotation_without_sofa():
         ann.get_covered_text()
 
 
+def test_runtime_generated_annotation_is_detected_and_shown_in_anchor():
+    ts = TypeSystem()
+    # Create a new annotation subtype (should inherit from Annotation base)
+    MyAnno = ts.create_type("my.pkg.MyAnnotation", supertypeName="uima.tcas.Annotation")
+
+    cas = Cas(ts)
+    # Create an instance of the runtime-generated type; ensure we can set begin/end
+    a = MyAnno(begin=5, end=10)
+    cas.add(a)
+
+    text = cas_to_comparable_text(cas)
+    assert "MyAnnotation[5-10]" in text
+
+
 def test_remove_annotations_in_range(small_typesystem_xml, small_xmi):
     typesystem = load_typesystem(small_typesystem_xml)
     cas = load_cas_from_xmi(small_xmi, typesystem)

diff --git a/tests/test_files/xmi/cas_with_collections.xmi b/tests/test_files/xmi/cas_with_collections.xmi
@@ -20,7 +20,7 @@
     <elements>C</elements>
   </cas:StringArray>
 
-  <cassis:Group xmi:id="5" sofa="1" integers="1 2 3" shorts="1 2 3" longs="1 2 3" floats="1.0 2.0 3.0" doubles="1.0 2.0 3.0" bytes="42DB3064" booleans="true false" fses="0 6 7">
+  <cassis:Group xmi:id="5" sofa="1" begin="0" end="0" integers="1 2 3" shorts="1 2 3" longs="1 2 3" floats="1.0 2.0 3.0" doubles="1.0 2.0 3.0" bytes="42DB3064" booleans="true false" fses="0 6 7">
     <strings>A</strings>
     <strings>B</strings>
     <strings>C</strings>
@@ -48,9 +48,9 @@
   <cas:LongArray xmi:id="17" elements="2516571677013944794"/>
   <cas:DoubleArray xmi:id="18" elements="0.4362829094329638 0.6487936445670887 0.6959691863162578"/>
 
-  <cassis:Group xmi:id="19" sofa="1" integers="" shorts="" longs="" floats="" doubles="" bytes="" booleans="" fses=""/>
+  <cassis:Group xmi:id="19" sofa="1" begin="0" end="0" integers="" shorts="" longs="" floats="" doubles="" bytes="" booleans="" fses=""/>
 
-  <cassis:Group xmi:id="20" sofa="1"/>
+  <cassis:Group xmi:id="20" sofa="1" begin="0" end="0"/>
 
   <cas:FSArray xmi:id="21" elements="" />
 

diff --git a/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi b/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi
@@ -8,7 +8,7 @@
 
     <tcas:DocumentAnnotation xmi:id="1" sofa="1" begin="0" end="47" language="x-unspecified"/>
 
-    <test:type xmi:id="2" sofa="1" target="3"/>
+	<test:type xmi:id="2" sofa="1" target="3" begin="0" end="0"/>
 
 	<cas:StringArray xmi:id="3">
 		<elements>LNC</elements>

diff --git a/tests/test_files/xmi/cas_with_reserved_names.xmi b/tests/test_files/xmi/cas_with_reserved_names.xmi
@@ -3,7 +3,7 @@
          xmlns:test="http:///test.ecore" xmi:version="2.0">
     <cas:NULL xmi:id="0"/>
     <tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="47" language="x-unspecified"/>
-    <test:type xmi:id="3" type="2" self="2" sofa="1"/>
+    <test:type xmi:id="3" type="2" self="2" sofa="1" begin="0" end="0"/>
     <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text/plain"
               sofaString="Joe waited for the train . The train was late ."/>
     <cas:View sofa="1" members="2 3"/>