Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ export class ModelCompositor {
const deduplicatedSuggestionTuples = dedupeSuggestions(this.lexicalModel, rawPredictions, context);

// Needs "casing" to be applied first.
const hasExistingKeep = processSimilarity(this.lexicalModel, deduplicatedSuggestionTuples, context, transformDistribution[0]);
const hasExistingKeep = processSimilarity(this.lexicalModel, deduplicatedSuggestionTuples, context, postContext);

// If no existing suggestion directly matches the user-visible version of
// the token, also add a 'keep' suggestion (with `.matchesModel = false`)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -874,35 +874,33 @@ export function dedupeSuggestions(
export function processSimilarity(
lexicalModel: LexicalModel,
suggestionDistribution: IntermediateCompositedPrediction[],
context: Context,
trueInput: ProbabilityMass<Transform>
baseContext: Context,
finalContext: Context
): boolean {
const { sample: inputTransform } = trueInput;
const wordbreak = determineModelWordbreaker(lexicalModel);

const postContext = models.applyTransform(inputTransform, context);
const truePrefix = wordbreak(postContext);

const keyed = (text: string) => lexicalModel.toKey ? lexicalModel.toKey(text) : text;
const keyCased = (text: string) => lexicalModel.applyCasing ? lexicalModel.applyCasing('lower', text) : text;
const keyedPrefix = keyed(truePrefix);
const lowercasedPrefix = keyCased(truePrefix);
const keyedTarget = keyed(finalContext.left);
const lowercasedTarget = keyCased(finalContext.left);

let keepOption: Outcome<Keep>;

for(let tuple of suggestionDistribution) {
// Don't set it unnecessarily; this can have side-effects in some automated tests.
if(inputTransform.id !== undefined) {
tuple.components.prediction.transformId = inputTransform.id;
}
// If there are no suggestions found, we can't validate that the underlying
// correction was an empty token.
let allCorrectionsEmpty: boolean = suggestionDistribution.length > 0
? true
: wordbreak(finalContext) == '';

const predictedWord = wordbreak(models.applyTransform(tuple.components.prediction.transform, context));
for(let tuple of suggestionDistribution) {
const appliedContext = models.applyTransform(tuple.components.prediction.transform, baseContext);
allCorrectionsEmpty &&= tuple.components.correction == '';

// Is the suggestion an exact match (or, "similar enough") to the
// actually-typed context? If so, we wish to note this fact and to
// prioritize such a suggestion over suggestions that are not.
if(keyed(tuple.components.correction) == keyedPrefix) {
if(predictedWord == truePrefix) {
if(keyed(tuple.components.correction) == keyedTarget) {
if(appliedContext.left == finalContext.left) {
// Exact match: it's a perfect 'keep' suggestion.
tuple.metadata.matchLevel = SuggestionSimilarity.exact;
keepOption = toAnnotatedSuggestion(lexicalModel, tuple.components.prediction, 'keep', models.QuoteBehavior.noQuotes);
Expand All @@ -914,10 +912,10 @@ export function processSimilarity(
keepOption.matchesModel = true;
Object.assign(tuple.components.prediction, keepOption);
keepOption = tuple.components.prediction as Outcome<Keep>;
} else if(keyCased(predictedWord) == lowercasedPrefix) {
} else if(keyCased(appliedContext.left) == lowercasedTarget) {
// Case-insensitive match. No diacritic differences; the ONLY difference is casing.
tuple.metadata.matchLevel = SuggestionSimilarity.sameText;
} else if(keyed(predictedWord) == keyedPrefix) {
} else if(keyed(appliedContext.left) == keyedTarget) {
// Diacritic-insensitive / exact-key match.
tuple.metadata.matchLevel = SuggestionSimilarity.sameKey;
} else {
Expand All @@ -932,7 +930,7 @@ export function processSimilarity(
//
// No actual 'keep' needed if the current context token is empty, so we say we
// have a 'keep' for that case, even though there isn't really one.
return !!(keepOption || truePrefix == '');
return !!(keepOption || allCorrectionsEmpty);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ describe('processSimilarity', () => {
const keep_its = toAnnotatedSuggestion(testModelWithCasing, original_its.components.prediction, 'keep', QuoteBehavior.noQuotes);
keep_its.matchesModel = true;

processSimilarity(testModelWithCasing, distribution, context, trueInput);
processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context));

assert.sameDeepMembers(distribution, expectation);
assert.equal(its.components.prediction.tag, 'keep');
Expand Down Expand Up @@ -270,7 +270,7 @@ describe('processSimilarity', () => {
const keep_it_is = toAnnotatedSuggestion(testModelWithCasing, original_it_is.components.prediction, 'keep', QuoteBehavior.noQuotes);
keep_it_is.matchesModel = true;

processSimilarity(testModelWithCasing, distribution, context, trueInput);
processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context));

assert.sameDeepMembers(distribution, expectation);
assert.equal(it_is.components.prediction.tag, 'keep');
Expand Down Expand Up @@ -318,7 +318,7 @@ describe('processSimilarity', () => {
expectation[1].metadata.matchLevel = SuggestionSimilarity.sameText; // it_is
expectation[2].metadata.matchLevel = SuggestionSimilarity.none; // is
expectation[3].metadata.matchLevel = SuggestionSimilarity.none; // is_not
processSimilarity(testModelWithCasing, distribution, context, trueInput);
processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context));

// Because we mucked with the casing here, there is no perfect 'keep' match.
const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep');
Expand Down Expand Up @@ -358,7 +358,7 @@ describe('processSimilarity', () => {
const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)];

expectation.forEach((entry) => entry.metadata.matchLevel = SuggestionSimilarity.none);
processSimilarity(testModelWithoutCasing, distribution, context, trueInput);
processSimilarity(testModelWithoutCasing, distribution, context, models.applyTransform(trueInput.sample, context));

// Because we mucked with the casing here, there is no perfect 'keep' match.
const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep');
Expand Down
Loading