keymanapp · jahorton · May 12, 2026
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
@@ -174,7 +174,7 @@ export class ModelCompositor {
     const deduplicatedSuggestionTuples = dedupeSuggestions(this.lexicalModel, rawPredictions, context);
 
     // Needs "casing" to be applied first.
-    const hasExistingKeep = processSimilarity(this.lexicalModel, deduplicatedSuggestionTuples, context, transformDistribution[0]);
+    const hasExistingKeep = processSimilarity(this.lexicalModel, deduplicatedSuggestionTuples, context, postContext);
 
     // If no existing suggestion directly matches the user-visible version of
     // the token, also add a 'keep' suggestion (with `.matchesModel = false`)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -874,35 +874,33 @@ export function dedupeSuggestions(
 export function processSimilarity(
   lexicalModel: LexicalModel,
   suggestionDistribution: IntermediateCompositedPrediction[],
-  context: Context,
-  trueInput: ProbabilityMass<Transform>
+  baseContext: Context,
+  finalContext: Context
 ): boolean {
-  const { sample: inputTransform } = trueInput;
   const wordbreak = determineModelWordbreaker(lexicalModel);
 
-  const postContext = models.applyTransform(inputTransform, context);
-  const truePrefix = wordbreak(postContext);
-
   const keyed = (text: string) => lexicalModel.toKey ? lexicalModel.toKey(text) : text;
   const keyCased = (text: string) => lexicalModel.applyCasing ? lexicalModel.applyCasing('lower', text) : text;
-  const keyedPrefix = keyed(truePrefix);
-  const lowercasedPrefix = keyCased(truePrefix);
+  const keyedTarget = keyed(finalContext.left);
+  const lowercasedTarget = keyCased(finalContext.left);
 
   let keepOption: Outcome<Keep>;
 
-  for(let tuple of suggestionDistribution) {
-    // Don't set it unnecessarily; this can have side-effects in some automated tests.
-    if(inputTransform.id !== undefined) {
-      tuple.components.prediction.transformId = inputTransform.id;
-    }
+  // If there are no suggestions found, we can't validate that the underlying
+  // correction was an empty token.
+  let allCorrectionsEmpty: boolean = suggestionDistribution.length > 0
+    ? true
+    : wordbreak(finalContext) == '';
 
-  const predictedWord = wordbreak(models.applyTransform(tuple.components.prediction.transform, context));
+  for(let tuple of suggestionDistribution) {
+    const appliedContext = models.applyTransform(tuple.components.prediction.transform, baseContext);
+    allCorrectionsEmpty &&= tuple.components.correction == '';
 
     // Is the suggestion an exact match (or, "similar enough") to the
     // actually-typed context?  If so, we wish to note this fact and to
     // prioritize such a suggestion over suggestions that are not.
-    if(keyed(tuple.components.correction) == keyedPrefix) {
-      if(predictedWord == truePrefix) {
+    if(keyed(tuple.components.correction) == keyedTarget) {
+      if(appliedContext.left == finalContext.left) {
         // Exact match:  it's a perfect 'keep' suggestion.
         tuple.metadata.matchLevel = SuggestionSimilarity.exact;
         keepOption = toAnnotatedSuggestion(lexicalModel, tuple.components.prediction, 'keep',  models.QuoteBehavior.noQuotes);
@@ -914,10 +912,10 @@ export function processSimilarity(
         keepOption.matchesModel = true;
         Object.assign(tuple.components.prediction, keepOption);
         keepOption = tuple.components.prediction as Outcome<Keep>;
-      } else if(keyCased(predictedWord) == lowercasedPrefix) {
+      } else if(keyCased(appliedContext.left) == lowercasedTarget) {
         // Case-insensitive match.  No diacritic differences; the ONLY difference is casing.
         tuple.metadata.matchLevel = SuggestionSimilarity.sameText;
-      } else if(keyed(predictedWord) == keyedPrefix) {
+      } else if(keyed(appliedContext.left) == keyedTarget) {
         // Diacritic-insensitive / exact-key match.
         tuple.metadata.matchLevel = SuggestionSimilarity.sameKey;
       } else {
@@ -932,7 +930,7 @@ export function processSimilarity(
   //
   // No actual 'keep' needed if the current context token is empty, so we say we
   // have a 'keep' for that case, even though there isn't really one.
-  return !!(keepOption || truePrefix == '');
+  return !!(keepOption || allCorrectionsEmpty);
 }
 
 /**

diff --git a/...ss/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts b/...ss/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts
@@ -233,7 +233,7 @@ describe('processSimilarity', () => {
     const keep_its = toAnnotatedSuggestion(testModelWithCasing, original_its.components.prediction, 'keep', QuoteBehavior.noQuotes);
     keep_its.matchesModel = true;
 
-    processSimilarity(testModelWithCasing, distribution, context, trueInput);
+    processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context));
 
     assert.sameDeepMembers(distribution, expectation);
     assert.equal(its.components.prediction.tag, 'keep');
@@ -270,7 +270,7 @@ describe('processSimilarity', () => {
     const keep_it_is = toAnnotatedSuggestion(testModelWithCasing, original_it_is.components.prediction, 'keep', QuoteBehavior.noQuotes);
     keep_it_is.matchesModel = true;
 
-    processSimilarity(testModelWithCasing, distribution, context, trueInput);
+    processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context));
 
     assert.sameDeepMembers(distribution, expectation);
     assert.equal(it_is.components.prediction.tag, 'keep');
@@ -318,7 +318,7 @@ describe('processSimilarity', () => {
       expectation[1].metadata.matchLevel = SuggestionSimilarity.sameText;  // it_is
       expectation[2].metadata.matchLevel = SuggestionSimilarity.none;      // is
       expectation[3].metadata.matchLevel = SuggestionSimilarity.none;      // is_not
-      processSimilarity(testModelWithCasing, distribution, context, trueInput);
+      processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context));
 
       // Because we mucked with the casing here, there is no perfect 'keep' match.
       const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep');
@@ -358,7 +358,7 @@ describe('processSimilarity', () => {
       const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)];
 
       expectation.forEach((entry) => entry.metadata.matchLevel = SuggestionSimilarity.none);
-      processSimilarity(testModelWithoutCasing, distribution, context, trueInput);
+      processSimilarity(testModelWithoutCasing, distribution, context, models.applyTransform(trueInput.sample, context));
 
       // Because we mucked with the casing here, there is no perfect 'keep' match.
       const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep');