From 030324873519d42a0815339986a50df28b86f474 Mon Sep 17 00:00:00 2001 From: Elihei2 Date: Mon, 1 Jun 2026 14:51:25 +0200 Subject: [PATCH] fix(writer): guard per-gene thresholding against degenerate similarities threshold_yen / threshold_li_custom crash (or return garbage) on degenerate per-gene similarity arrays. Before thresholding, drop null / non-finite values and short-circuit the two degenerate cases: no finite values -> treat as failed-to-converge (backfilled with the global quantile); a single or constant value -> use that value directly. Prevents crashes on sparse genes. What to review: the few guard lines added before the existing try/except in the per-gene threshold loop. No change for genes with a normal spread of values. --- src/segger/data/writer.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/segger/data/writer.py b/src/segger/data/writer.py index d0d68bc..26c8514 100644 --- a/src/segger/data/writer.py +++ b/src/segger/data/writer.py @@ -224,10 +224,20 @@ def assign_transcripts_to_cells( logger.debug(f"Processing feature {i+1}/{n_groups} (feature {feature[0]} | transcripts {group.shape[0]/1e3:.1f}K)...") # sample if too many - arr = group["segger_similarity"] + arr = group["segger_similarity"].drop_nulls() if arr.shape[0] > n: arr = arr.sample(n=n, seed=0) arr = arr.to_numpy() + arr = arr[np.isfinite(arr)] + + # Degenerate inputs crash threshold_yen / threshold_li: no finite + # values, or a single constant value (nothing to threshold). + if arr.size == 0: + failed_to_converge.append(feature[0]) + continue + if arr.size == 1 or np.allclose(arr, arr[0]): + thresholds.append({tx_fields.feature: feature[0], "similarity_threshold": float(arr[0]), "converged": True}) + continue # threshold try: