diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts index 08e0f8b9f89..1d4893aab49 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts @@ -174,7 +174,7 @@ export class ModelCompositor { const deduplicatedSuggestionTuples = dedupeSuggestions(this.lexicalModel, rawPredictions, context); // Needs "casing" to be applied first. - const hasExistingKeep = processSimilarity(this.lexicalModel, deduplicatedSuggestionTuples, context, transformDistribution[0]); + const hasExistingKeep = processSimilarity(this.lexicalModel, deduplicatedSuggestionTuples, context, postContext); // If no existing suggestion directly matches the user-visible version of // the token, also add a 'keep' suggestion (with `.matchesModel = false`) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts index 177eef0a694..176c80e09f0 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts @@ -874,35 +874,33 @@ export function dedupeSuggestions( export function processSimilarity( lexicalModel: LexicalModel, suggestionDistribution: IntermediateCompositedPrediction[], - context: Context, - trueInput: ProbabilityMass + baseContext: Context, + finalContext: Context ): boolean { - const { sample: inputTransform } = trueInput; const wordbreak = determineModelWordbreaker(lexicalModel); - const postContext = models.applyTransform(inputTransform, context); - const truePrefix = wordbreak(postContext); - const keyed = (text: string) => lexicalModel.toKey ? lexicalModel.toKey(text) : text; const keyCased = (text: string) => lexicalModel.applyCasing ? lexicalModel.applyCasing('lower', text) : text; - const keyedPrefix = keyed(truePrefix); - const lowercasedPrefix = keyCased(truePrefix); + const keyedTarget = keyed(finalContext.left); + const lowercasedTarget = keyCased(finalContext.left); let keepOption: Outcome; - for(let tuple of suggestionDistribution) { - // Don't set it unnecessarily; this can have side-effects in some automated tests. - if(inputTransform.id !== undefined) { - tuple.components.prediction.transformId = inputTransform.id; - } + // If there are no suggestions found, we can't validate that the underlying + // correction was an empty token. + let allCorrectionsEmpty: boolean = suggestionDistribution.length > 0 + ? true + : wordbreak(finalContext) == ''; - const predictedWord = wordbreak(models.applyTransform(tuple.components.prediction.transform, context)); + for(let tuple of suggestionDistribution) { + const appliedContext = models.applyTransform(tuple.components.prediction.transform, baseContext); + allCorrectionsEmpty &&= tuple.components.correction == ''; // Is the suggestion an exact match (or, "similar enough") to the // actually-typed context? If so, we wish to note this fact and to // prioritize such a suggestion over suggestions that are not. - if(keyed(tuple.components.correction) == keyedPrefix) { - if(predictedWord == truePrefix) { + if(keyed(tuple.components.correction) == keyedTarget) { + if(appliedContext.left == finalContext.left) { // Exact match: it's a perfect 'keep' suggestion. tuple.metadata.matchLevel = SuggestionSimilarity.exact; keepOption = toAnnotatedSuggestion(lexicalModel, tuple.components.prediction, 'keep', models.QuoteBehavior.noQuotes); @@ -914,10 +912,10 @@ export function processSimilarity( keepOption.matchesModel = true; Object.assign(tuple.components.prediction, keepOption); keepOption = tuple.components.prediction as Outcome; - } else if(keyCased(predictedWord) == lowercasedPrefix) { + } else if(keyCased(appliedContext.left) == lowercasedTarget) { // Case-insensitive match. No diacritic differences; the ONLY difference is casing. tuple.metadata.matchLevel = SuggestionSimilarity.sameText; - } else if(keyed(predictedWord) == keyedPrefix) { + } else if(keyed(appliedContext.left) == keyedTarget) { // Diacritic-insensitive / exact-key match. tuple.metadata.matchLevel = SuggestionSimilarity.sameKey; } else { @@ -932,7 +930,7 @@ export function processSimilarity( // // No actual 'keep' needed if the current context token is empty, so we say we // have a 'keep' for that case, even though there isn't really one. - return !!(keepOption || truePrefix == ''); + return !!(keepOption || allCorrectionsEmpty); } /** diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts index e4cbfc81b1d..a485fecd053 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts @@ -233,7 +233,7 @@ describe('processSimilarity', () => { const keep_its = toAnnotatedSuggestion(testModelWithCasing, original_its.components.prediction, 'keep', QuoteBehavior.noQuotes); keep_its.matchesModel = true; - processSimilarity(testModelWithCasing, distribution, context, trueInput); + processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context)); assert.sameDeepMembers(distribution, expectation); assert.equal(its.components.prediction.tag, 'keep'); @@ -270,7 +270,7 @@ describe('processSimilarity', () => { const keep_it_is = toAnnotatedSuggestion(testModelWithCasing, original_it_is.components.prediction, 'keep', QuoteBehavior.noQuotes); keep_it_is.matchesModel = true; - processSimilarity(testModelWithCasing, distribution, context, trueInput); + processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context)); assert.sameDeepMembers(distribution, expectation); assert.equal(it_is.components.prediction.tag, 'keep'); @@ -318,7 +318,7 @@ describe('processSimilarity', () => { expectation[1].metadata.matchLevel = SuggestionSimilarity.sameText; // it_is expectation[2].metadata.matchLevel = SuggestionSimilarity.none; // is expectation[3].metadata.matchLevel = SuggestionSimilarity.none; // is_not - processSimilarity(testModelWithCasing, distribution, context, trueInput); + processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context)); // Because we mucked with the casing here, there is no perfect 'keep' match. const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep'); @@ -358,7 +358,7 @@ describe('processSimilarity', () => { const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)]; expectation.forEach((entry) => entry.metadata.matchLevel = SuggestionSimilarity.none); - processSimilarity(testModelWithoutCasing, distribution, context, trueInput); + processSimilarity(testModelWithoutCasing, distribution, context, models.applyTransform(trueInput.sample, context)); // Because we mucked with the casing here, there is no perfect 'keep' match. const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep');