diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts index 3b71b2eb4f8..885cdb0ed2b 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts @@ -58,6 +58,7 @@ export class TokenizationCorrector implements CorrectionSearchable; private tokenCostMap: Map; private tokenLookupMap: Map; @@ -172,13 +173,16 @@ export class TokenizationCorrector implements CorrectionSearchable { // New issue: this mangles the space IDs! We almost certainly need some // sort of proper map to the source token. const searchModule = new QuotientNodeFinalizer(token.searchModule, index == orderedTokens.length - 1); this.tokenLookupMap.set(searchModule.spaceId, token); - if(!filterClosure(token)) { + const passesFilter = filterClosure(token); + modelsCorrectables ||= passesFilter; + if(!passesFilter) { this._uncorrectables.push(searchModule); } else if(index == tailCorrectionLength - 1) { // The sole assignment case for this field. It may only be assigned for @@ -189,6 +193,8 @@ export class TokenizationCorrector implements CorrectionSearchable, + prediction: Suggestion | Keep; /** * The correction upon which the Suggestion (or Keep) is based */ - correction: ProbabilityMass, + correction: string +} + +export interface PredictionProbabilities { /** - * The likelihood of the prediction - its lexical-model likelihood multiplied - * by the keystroke-sequence + correction likelihood. + * The probability of the word itself, separate from corrections, as + * determined by the LexicalModel itself. */ - totalProb: number; + prediction: number; + + /** + * The probability of text-correction steps taken to build the correction upon + * which the prediction is based. + */ + correction: number; + + /** + * The likelihood of the represented prediction, combining both the + * `prediction` and `correction` components into a single value. + */ + total: number; +} + +/** + * Tracks common intermediate prediction data, such as its underlying probabilities and its similarity to the actual context. + */ +export interface PredictionMetadata { + /** + * Tracks the relevant probability components contributing to a generated + * prediction. + */ + probabilities: PredictionProbabilities; + + /** + * Indicates that the 'suggestion' represents context changes that qualify for + * auto-selection. + */ + autoSelectable: boolean; + /** * How directly the prediction matches the current token in the context. * @@ -101,12 +130,26 @@ export type CorrectionPredictionTuple = { * available upon initial construction of this type. */ matchLevel?: SuggestionSimilarity; + /** * Text from the triggering input that should _not_ be affected by the * prediction. */ preservationTransform?: Transform; -}; +} + +export interface IntermediateCompositedPrediction { + /** + * Contains the fully composited predictive-text Suggestion and its underlying correction string. + */ + components: CompositedPredictionData; + /** + * Tracks common intermediate prediction data, such as its underlying probabilities and its similarity to the actual context. + */ + metadata: PredictionMetadata; +} + +type IntermediatePrediction = IntermediateCompositedPrediction; /** * An enum to be used when categorizing the level of similarity between @@ -144,15 +187,15 @@ export enum SuggestionSimilarity { exact = 3 } -export function tupleDisplayOrderSort(a: CorrectionPredictionTuple, b: CorrectionPredictionTuple) { +export function tupleDisplayOrderSort(a: IntermediatePrediction, b: IntermediatePrediction) { // Similarity distance - const simDist = (b.matchLevel ?? 0) - (a.matchLevel ?? 0); + const simDist = (b.metadata.matchLevel ?? 0) - (a.metadata.matchLevel ?? 0); if(simDist != 0) { return simDist; } // Probability distance - return b.totalProb - a.totalProb; + return b.metadata.probabilities.total - a.metadata.probabilities.total; } /** @@ -320,7 +363,7 @@ export function determineSuggestionRange( export function buildAndMapPredictions( transition: ContextTransition, tokenizationCorrection: TokenizationResultMapping, -): CorrectionPredictionTuple[] { +): IntermediateCompositedPrediction[] { const model = transition.final.model; const tokenization = tokenizationCorrection.matchingSpace.tokenization; @@ -398,6 +441,10 @@ export function buildAndMapPredictions( // Regardless of origin, overwrite the transform's deleteLeft value with what it should actually hold. predictions.forEach((entry) => { entry.sample.transform.deleteLeft = deleteLeft; + if(transition.transitionId !== undefined) { + entry.sample.transformId = transition.transitionId; + entry.sample.transform.id = transition.transitionId; + } }); // Use traversals if possible - extract the most likely entry that is on the traversal, @@ -415,8 +462,9 @@ export function buildAndMapPredictions( .slice(0, predictionComponents.length-1) .reduce((accum, curr) => accum * curr[0].p, 1) - const completePredictionTuples: CorrectionPredictionTuple[] = predictionComponents[predictionComponents.length-1].map((prediction) => { + const completePredictionTuples: IntermediateCompositedPrediction[] = predictionComponents[predictionComponents.length-1].map((prediction) => { const predictionCost = prediction.p * prefixProb; + return { // Will need to do this differently. We want to have each component // individualized b/c casing. Case should be maintained for prior tokens @@ -428,28 +476,30 @@ export function buildAndMapPredictions( // applySuggestionCasing applies onto suggestions, so we'll want to build // the FULL suggestion AFTER applying casing changes (to each token's // suggestion component). - prediction: { - sample: { + components: { + prediction: { transformId: transition.transitionId, transform: models.buildMergedTransform(predictionPrefix, prediction.sample.transform), displayAs: models.buildMergedTransform(predictionPrefix, prediction.sample.transform).insert // should composite the displayAs strings instead... }, - p: predictionCost, + correction: correctionTransforms[correctionTransforms.length-1].insert }, - correction: { - // Is used partly for word-casing, partly for auto-select enabling. - // Is already the full word, as that's what is provided by TokenizationCorrector. - sample: correctionTransforms[correctionTransforms.length-1].insert, - p: correctionCost - }, - totalProb: predictionCost * correctionCost, - matchLevel: SuggestionSimilarity.none, - // Long-term, we shouldn't have `.preservationTransform` here. - // - // Needed for now until the search actually operates based on - // TokenizationCorrector, rather than the half-converted use currently in - // place. - preservationTransform: tokenization.taillessTrueKeystroke + metadata: { + probabilities: { + prediction: predictionCost, + correction: correctionCost, + total: predictionCost * correctionCost + }, + matchLevel: SuggestionSimilarity.none, + autoSelectable: tokenizationCorrection.matchingSpace.modelsCorrectables, + + // Long-term, we shouldn't have `.preservationTransform` here. + // + // Needed for now until the search actually operates based on + // TokenizationCorrector, rather than the half-converted use currently in + // place. + preservationTransform: tokenization.taillessTrueKeystroke + } } }); @@ -515,7 +565,7 @@ export async function correctAndEnumerate( /** * The suggestions generated based on the user's input state. */ - rawPredictions: CorrectionPredictionTuple[]; + rawPredictions: IntermediateCompositedPrediction[]; /** * The id of a prior ContextTransition event that triggered a Suggestion found @@ -567,9 +617,8 @@ export async function correctAndEnumerate( const searchModules = tokenizations.map(t => t.tail.searchModule); // Only run the correction search when corrections are enabled. - let rawPredictions: CorrectionPredictionTuple[] = []; + let rawPredictions: IntermediateCompositedPrediction[] = []; let bestCorrectionCost: number; - const correctionPredictionMap: Record> = {}; for await(const match of getBestTokenMatches(searchModules, timer)) { // Corrections obtained: now to predict from them! const tokenization = tokenizations.find(t => t.spaceId == match.spaceId); @@ -588,38 +637,15 @@ export async function correctAndEnumerate( continue; } - /* If we're dealing with the FIRST keystroke of a new sequence, we'll **dramatically** boost - * the exponent to ensure only VERY nearby corrections have a chance of winning, and only if - * there are significantly more likely words. We only need this to allow very minor fat-finger - * adjustments for 100% keystroke-sequence corrections in order to prevent finickiness on - * key borders. - * - * Technically, the probabilities this produces won't be normalized as-is... but there's no - * true NEED to do so for it, even if it'd be 'nice to have'. Consistently tracking when - * to apply it could become tricky, so it's simpler to leave out. - * - * Worst-case, it's possible to temporarily add normalization if a code deep-dive - * is needed in the future. - */ - const costFactor = (tokenization.tail.inputCount <= 1) ? ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT : 1; - const suggestionRange = determineSuggestionRange(transition.base.displayTokenization, tokenization) const corrector = new TokenizationCorrector(tokenization, suggestionRange.tokensToPredict.length, () => true); const predictions = buildAndMapPredictions(transition, new TokenizationResultMapping([match], corrector)); // Only set 'best correction' cost when a correction ACTUALLY YIELDS predictions. if(predictions.length > 0 && bestCorrectionCost === undefined) { - bestCorrectionCost = match.totalCost * costFactor; - } - - // If we're getting the same prediction again, it's lower-cost. Update! - let oldPredictionSet = correctionPredictionMap[match.matchString]; - if(oldPredictionSet) { - rawPredictions = rawPredictions.filter((entry) => !oldPredictionSet.find((match) => entry.prediction.sample == match.sample)); + bestCorrectionCost = predictions[0].metadata.probabilities.correction; } - correctionPredictionMap[match.matchString] = predictions.map((entry) => entry.prediction); - rawPredictions = rawPredictions.concat(predictions); if(shouldStopSearchingEarly(bestCorrectionCost, match.totalCost, rawPredictions)) { @@ -640,7 +666,7 @@ export async function correctAndEnumerate( export function shouldStopSearchingEarly( bestCorrectionCost: number, currentCorrectionCost: number, - rawPredictions: CorrectionPredictionTuple[] + rawPredictions: IntermediateCompositedPrediction[] ) { if(currentCorrectionCost >= bestCorrectionCost + CORRECTION_SEARCH_THRESHOLDS.MAX_SEARCH_THRESHOLD) { return true; @@ -656,7 +682,7 @@ export function shouldStopSearchingEarly( // If the best suggestion from the search's current tier fails to beat the worst // pending suggestion from previous tiers, assume all further corrections will // similarly fail to win; terminate the search-loop. - if(rawPredictions[ModelCompositor.MAX_SUGGESTIONS-1].totalProb > Math.exp(-currentCorrectionCost)) { + if(rawPredictions[ModelCompositor.MAX_SUGGESTIONS-1].metadata.probabilities.total > Math.exp(-currentCorrectionCost)) { return true; } } @@ -681,8 +707,8 @@ export function correctAndEnumerateWithoutTraversals( lexicalModel: LexicalModel, corrections: ProbabilityMass[], context: Context -): CorrectionPredictionTuple[] { - let returnedPredictions: CorrectionPredictionTuple[] = []; +): IntermediateCompositedPrediction[] { + let returnedPredictions: IntermediateCompositedPrediction[] = []; const wordbreak = determineModelWordbreaker(lexicalModel); const tokenizer = determineModelTokenizer(lexicalModel); @@ -730,15 +756,21 @@ export function correctAndEnumerateWithoutTraversals( correctionText = wordbreak(postContext); } - let tuple: CorrectionPredictionTuple = { - prediction: pair, - correction: { - sample: correctionText, - p: correction.p + let tuple: IntermediateCompositedPrediction = { + components: { + prediction: pair.sample, + correction: correctionText }, - totalProb: pair.p * correction.p, - matchLevel: SuggestionSimilarity.none, - preservationTransform + metadata: { + probabilities: { + prediction: pair.p, + correction: correction.p, + total: pair.p * correction.p + }, + autoSelectable: correctionValidForAutoSelect(tailCorrection.insert), + matchLevel: SuggestionSimilarity.none, + preservationTransform + } }; return tuple; }); @@ -784,17 +816,17 @@ export function applySuggestionCasing(suggestion: Suggestion, baseWord: string, */ export function dedupeSuggestions( lexicalModel: LexicalModel, - rawPredictions: CorrectionPredictionTuple[], + rawPredictions: IntermediateCompositedPrediction[], context: Context ) { const wordbreak = determineModelWordbreaker(lexicalModel); - let suggestionDistribMap: {[key: string]: CorrectionPredictionTuple} = {}; - let suggestionDistribution: CorrectionPredictionTuple[] = []; + let suggestionDistribMap: {[key: string]: IntermediateCompositedPrediction} = {}; + let suggestionDistribution: IntermediateCompositedPrediction[] = []; // Deduplicator + annotator of 'keep' suggestions. for(let tuple of rawPredictions) { - const predictedWord = wordbreak(models.applyTransform(tuple.prediction.sample.transform, context)); + const predictedWord = wordbreak(models.applyTransform(tuple.components.prediction.transform, context)); // Assumption: suggestions that have the same net result should have the // same displayAs string. (We could try to pick the one with highest net @@ -804,7 +836,7 @@ export function dedupeSuggestions( // Merge 'em! const existingSuggestion = suggestionDistribMap[predictedWord]; if(existingSuggestion) { - existingSuggestion.totalProb += tuple.totalProb; + existingSuggestion.metadata.probabilities.total += tuple.metadata.probabilities.total; } else { suggestionDistribMap[predictedWord] = tuple; } @@ -832,15 +864,16 @@ export function dedupeSuggestions( * current text * - any other suggestion * + * @param lexicalModel * @param suggestionDistribution - * @param context - * @param trueInput inputTransform + its assigned probability + * @param baseContext + * @param finalContext * @returns true if an existing suggestion fulfills the role of 'keep'; * otherwise, false. */ export function processSimilarity( lexicalModel: LexicalModel, - suggestionDistribution: CorrectionPredictionTuple[], + suggestionDistribution: IntermediateCompositedPrediction[], context: Context, trueInput: ProbabilityMass ): boolean { @@ -860,38 +893,38 @@ export function processSimilarity( for(let tuple of suggestionDistribution) { // Don't set it unnecessarily; this can have side-effects in some automated tests. if(inputTransform.id !== undefined) { - tuple.prediction.sample.transformId = inputTransform.id; + tuple.components.prediction.transformId = inputTransform.id; } - const predictedWord = wordbreak(models.applyTransform(tuple.prediction.sample.transform, context)); + const predictedWord = wordbreak(models.applyTransform(tuple.components.prediction.transform, context)); // Is the suggestion an exact match (or, "similar enough") to the // actually-typed context? If so, we wish to note this fact and to // prioritize such a suggestion over suggestions that are not. - if(keyed(tuple.correction.sample) == keyedPrefix) { + if(keyed(tuple.components.correction) == keyedPrefix) { if(predictedWord == truePrefix) { // Exact match: it's a perfect 'keep' suggestion. - tuple.matchLevel = SuggestionSimilarity.exact; - keepOption = toAnnotatedSuggestion(lexicalModel, tuple.prediction.sample, 'keep', models.QuoteBehavior.noQuotes); + tuple.metadata.matchLevel = SuggestionSimilarity.exact; + keepOption = toAnnotatedSuggestion(lexicalModel, tuple.components.prediction, 'keep', models.QuoteBehavior.noQuotes); // Indicates that this suggestion exists directly within the lexical // model as a valid suggestion. (We actively display it if it's an // exact match, but hide it if not, only preserving it for reversions // if/when needed.) keepOption.matchesModel = true; - Object.assign(tuple.prediction.sample, keepOption); - keepOption = tuple.prediction.sample as Outcome; + Object.assign(tuple.components.prediction, keepOption); + keepOption = tuple.components.prediction as Outcome; } else if(keyCased(predictedWord) == lowercasedPrefix) { // Case-insensitive match. No diacritic differences; the ONLY difference is casing. - tuple.matchLevel = SuggestionSimilarity.sameText; + tuple.metadata.matchLevel = SuggestionSimilarity.sameText; } else if(keyed(predictedWord) == keyedPrefix) { // Diacritic-insensitive / exact-key match. - tuple.matchLevel = SuggestionSimilarity.sameKey; + tuple.metadata.matchLevel = SuggestionSimilarity.sameKey; } else { - tuple.matchLevel = SuggestionSimilarity.none; + tuple.metadata.matchLevel = SuggestionSimilarity.none; } } else { - tuple.matchLevel = SuggestionSimilarity.none; + tuple.metadata.matchLevel = SuggestionSimilarity.none; } } @@ -917,7 +950,7 @@ export function createDefaultKeep( lexicalModel: LexicalModel, postContext: Context, trueInput: ProbabilityMass -): CorrectionPredictionTuple { +): IntermediateCompositedPrediction { const { sample: inputTransform, p: inputTransformProb } = trueInput; const wordbreak = determineModelWordbreaker(lexicalModel); @@ -946,19 +979,19 @@ export function createDefaultKeep( // Insert our synthetic keepOption as a prediction tuple. return { - // Product of the two p's below. - totalProb: inputTransformProb * MAX_PROB, - prediction: { - sample: keepOption, - // We always show the keep option if it doesn't directly match, - // so max probability is fine. - p: MAX_PROB, + components: { + prediction: keepOption, + correction: truePrefix }, - correction: { - sample: truePrefix, - p: inputTransformProb * MAX_PROB - }, - matchLevel: SuggestionSimilarity.exact + metadata: { + probabilities: { + prediction: MAX_PROB, + correction: inputTransformProb, + total: inputTransformProb * MAX_PROB + }, + autoSelectable: false, + matchLevel: SuggestionSimilarity.exact + } }; } @@ -991,12 +1024,12 @@ export function correctionValidForAutoSelect(correction: string) { return false; } -export function predictionAutoSelect(suggestionDistribution: CorrectionPredictionTuple[]) { +export function predictionAutoSelect(suggestionDistribution: IntermediateCompositedPrediction[]) { if(suggestionDistribution.length == 0) { return; } - const keepOption = suggestionDistribution[0].prediction.sample as Outcome; + const keepOption = suggestionDistribution[0].components.prediction as Outcome; if(keepOption.tag == 'keep' && keepOption.matchesModel) { // Auto-select it for auto-acceptance; we don't correct away from perfectly-valid // lexical entries, even if they are comparatively low-frequency. @@ -1010,19 +1043,19 @@ export function predictionAutoSelect(suggestionDistribution: CorrectionPredictio if(suggestionDistribution.length == 1) { // Prevent auto-acceptance when the root doesn't meet validation criteria. - if(!correctionValidForAutoSelect(suggestionDistribution[0].correction.sample)) { + if(!suggestionDistribution[0].metadata.autoSelectable) { return; } // Mark for auto-acceptance; there are no alternatives. - suggestionDistribution[0].prediction.sample.autoAccept = true; + suggestionDistribution[0].components.prediction.autoAccept = true; return; } // Is it reasonable to auto-accept any of our suggestions? const bestSuggestion = suggestionDistribution[0]; - const baseCorrection = bestSuggestion.correction.sample; + const baseCorrection = bestSuggestion.components.correction; if(baseCorrection.length == 0) { // If the correction is rooted on an empty root, there's no basis for // auto-correcting to this suggestion. @@ -1031,8 +1064,8 @@ export function predictionAutoSelect(suggestionDistribution: CorrectionPredictio // Find the highest probability for any correction that led to a valid prediction. // No need to full-on re-sort everything, though. - const bestCorrection = suggestionDistribution.reduce((prev, current) => prev?.correction.p > current.correction.p ? prev : current, null).correction; - if(bestCorrection.p > bestSuggestion.correction.p) { + const bestCorrectionP = suggestionDistribution.reduce((prev, current) => Math.max(prev, current.metadata.probabilities.correction), 0); + if(bestCorrectionP > bestSuggestion.metadata.probabilities.correction) { // Here, the best suggestion didn't come from the best correction. // Is it actually reasonable to auto-correct? We're probably just very // biased toward its frequency. (Maybe a threshold should be considered?) @@ -1043,28 +1076,28 @@ export function predictionAutoSelect(suggestionDistribution: CorrectionPredictio // - such as replacing `cant` with `can't` if the latter is much more frequent - // we may wish to group matchLevel values below by 'mapping' them with an appropriate // function. (Both on the next line and within the reduce functor.) - const bestSuggestionTier = bestSuggestion.matchLevel; + const bestSuggestionTier = bestSuggestion.metadata.matchLevel; // compare best vs other probabilities of compatible tier. const probSum = suggestionDistribution.reduce((accum, current) => { // If the suggestion is from a different similarity tier, do not count it against // the required auto-select probability ratio threshold. That threshold should // only apply within the suggestion's tier. - return accum + (current.matchLevel == bestSuggestionTier ? current.totalProb : 0) + return accum + (current.metadata.matchLevel == bestSuggestionTier ? current.metadata.probabilities.total : 0) }, 0); - const proportionOfBest = bestSuggestion.totalProb / probSum; + const proportionOfBest = bestSuggestion.metadata.probabilities.total / probSum; if(proportionOfBest < AUTOSELECT_PROPORTION_THRESHOLD) { return; } - if(!correctionValidForAutoSelect(bestSuggestion.correction.sample)) { + if(!bestSuggestion.metadata.autoSelectable) { return; } // compare correction-cost aspects? We disable if the base correction is lower than best, // but should we do other comparisons too? - bestSuggestion.prediction.sample.autoAccept = true; + bestSuggestion.components.prediction.autoAccept = true; } /** @@ -1085,7 +1118,7 @@ export function predictionAutoSelect(suggestionDistribution: CorrectionPredictio */ export function finalizeSuggestions( lexicalModel: LexicalModel, - deduplicatedSuggestionTuples: CorrectionPredictionTuple[], + deduplicatedSuggestionTuples: IntermediateCompositedPrediction[], context: Context, inputTransform: Transform, verbose?: boolean @@ -1094,42 +1127,44 @@ export function finalizeSuggestions( const tokenize = determineModelTokenizer(lexicalModel); const suggestions = deduplicatedSuggestionTuples.map((tuple) => { - const prediction = tuple.prediction; + const prediction = tuple.components.prediction; // If this is a suggestion after any form of wordbreak input, make sure we preserve any components // from prior tokens! // // Note: may need adjustment if/when supporting phrase-level correction. - if(tuple.preservationTransform) { + if(tuple.metadata.preservationTransform) { const mergedTransform = { - ...models.buildMergedTransform(tuple.preservationTransform, {...prediction.sample.transform, deleteLeft: 0}), - deleteLeft: prediction.sample.transform.deleteLeft + ...models.buildMergedTransform(tuple.metadata.preservationTransform, {...prediction.transform, deleteLeft: 0}), + deleteLeft: prediction.transform.deleteLeft }; // Temporarily and locally drops 'readonly' semantics so that we can reassign the transform. // See https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#improved-control-over-mapped-type-modifiers - let mutableSuggestion = prediction.sample as {-readonly [transform in keyof Suggestion]: Suggestion[transform]}; + let mutableSuggestion = prediction as {-readonly [transform in keyof Suggestion]: Suggestion[transform]}; // Assignment via by-reference behavior, as suggestion is an object mutableSuggestion.transform = mergedTransform; } // Is sometimes not set during unit tests. - if(prediction.sample.transformId !== undefined) { - prediction.sample.transform.id = prediction.sample.transformId; + if(prediction.transformId) { + prediction.transform.id = prediction.transformId; } + const probs = tuple.metadata.probabilities; + if(!verbose) { return { - ...prediction.sample, - p: tuple.totalProb + ...prediction, + p: probs.total }; } else { const sample: Outcome = { - ...prediction.sample, - p: tuple.totalProb, - "lexical-p": prediction.p, - "correction-p": tuple.correction.p + ...prediction, + p: probs.total, + "lexical-p": probs.prediction, + "correction-p": probs.correction } return sample; diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts index bf45e3b94e1..430d9c6c7e0 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts @@ -1,6 +1,16 @@ import { assert } from 'chai'; -import { CORRECTION_SEARCH_THRESHOLDS, CorrectionPredictionTuple, ModelCompositor, shouldStopSearchingEarly } from "@keymanapp/lm-worker/test-index"; +import { CORRECTION_SEARCH_THRESHOLDS, IntermediateCompositedPrediction, ModelCompositor, shouldStopSearchingEarly } from "@keymanapp/lm-worker/test-index"; + +function mockIntermediatePrediction(value: number) { + return { + metadata: { + probabilities: { + total: value + } + } + } as IntermediateCompositedPrediction +} describe('correction-search: shouldStopSearchingEarly', () => { it('stops early once new corrections are less likely than currently discovered predictions', () => { @@ -12,12 +22,7 @@ describe('correction-search: shouldStopSearchingEarly', () => { assert.equal(predictionProbs.length, ModelCompositor.MAX_SUGGESTIONS, "test setup no longer valid"); // The only part for each entry we actually care about here: .totalProb. - /** @type {import('#./predict-helpers.js').CorrectionPredictionTuple[]} */ - const predictions = predictionProbs.map((entry) => { - return { - totalProb: entry - } as CorrectionPredictionTuple - }); + const predictions = predictionProbs.map((entry) => mockIntermediatePrediction(entry)); // Thresholding is performed in log-space. // 0.0501 and 0.0499 are offset on each side of 0.05, the last value in the array defined above. @@ -33,8 +38,8 @@ describe('correction-search: shouldStopSearchingEarly', () => { // // Can technically run the method with an empty array, but the actual scenario would have // at least one prediction present in the "found predictions" array. - assert.isFalse(shouldStopSearchingEarly(baseCost, baseCost + expectedThreshold - 0.01, [{ totalProb: Math.exp(-1) } as CorrectionPredictionTuple])); - assert.isTrue(shouldStopSearchingEarly( baseCost, baseCost + expectedThreshold + 0.01, [{ totalProb: Math.exp(-1) } as CorrectionPredictionTuple])); + assert.isFalse(shouldStopSearchingEarly(baseCost, baseCost + expectedThreshold - 0.01, [mockIntermediatePrediction(Math.exp(-1))])); + assert.isTrue(shouldStopSearchingEarly( baseCost, baseCost + expectedThreshold + 0.01, [mockIntermediatePrediction(Math.exp(-1))])); }); it('stops checking corrections earlier when enough predictions have been found', () => { @@ -43,11 +48,7 @@ describe('correction-search: shouldStopSearchingEarly', () => { // The only part for each entry we actually care about here: .totalProb. /** @type {import('#./predict-helpers.js').CorrectionPredictionTuple[]} */ - const predictions = predictionProbs.map((entry) => { - return { - totalProb: entry - } as CorrectionPredictionTuple - }); + const predictions = predictionProbs.map((entry) => mockIntermediatePrediction(entry)); const baseCost = 1; diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/auto-correct.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/auto-correct.tests.ts index d32326e8436..b55886bb42f 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/auto-correct.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/auto-correct.tests.ts @@ -1,6 +1,6 @@ import { assert } from 'chai'; -import { AUTOSELECT_PROPORTION_THRESHOLD, CorrectionPredictionTuple, predictionAutoSelect, SuggestionSimilarity, tupleDisplayOrderSort } from "@keymanapp/lm-worker/test-index"; +import { AUTOSELECT_PROPORTION_THRESHOLD, IntermediateCompositedPrediction, predictionAutoSelect, SuggestionSimilarity, tupleDisplayOrderSort } from "@keymanapp/lm-worker/test-index"; /* * Preconditions: * - there should always be a 'keep' option. Now, whether or not that option @@ -9,7 +9,7 @@ import { AUTOSELECT_PROPORTION_THRESHOLD, CorrectionPredictionTuple, predictionA */ describe('predictionAutoSelect', () => { it(`does not throw when no suggestions are available`, () => { - const predictions: CorrectionPredictionTuple[] = []; + const predictions: IntermediateCompositedPrediction[] = []; const originalPredictions = [].concat(predictions); assert.doesNotThrow(() => predictionAutoSelect(predictions)); @@ -17,14 +17,10 @@ describe('predictionAutoSelect', () => { }); it(`selects solitary 'keep' suggestion that does match the model`, () => { - const predictions: CorrectionPredictionTuple[] = [ + const predictions: IntermediateCompositedPrediction[] = [ { - correction: { - sample: 'apple', - p: 1 - }, - prediction: { - sample: { + components: { + prediction: { tag: 'keep', transform: { // can be null / "mocked out" insert: 'e', @@ -33,9 +29,16 @@ describe('predictionAutoSelect', () => { matchesModel: true, displayAs: 'apple' }, - p: 1 + correction: 'apple', }, - totalProb: 1 + metadata: { + probabilities: { + prediction: 1, + correction: 1, + total: 1 + }, + autoSelectable: true + } } ]; @@ -43,19 +46,15 @@ describe('predictionAutoSelect', () => { assert.doesNotThrow(() => predictionAutoSelect(predictions)); assert.sameDeepOrderedMembers(predictions, originalPredictions); - const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept); + const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept); assert.isOk(autoselected); }); it(`does not select suggestions if the root correction has no letters`, () => { - const predictions: CorrectionPredictionTuple[] = [ + const predictions: IntermediateCompositedPrediction[] = [ { - correction: { - sample: '5', - p: 1 - }, - prediction: { - sample: { + components: { + prediction: { tag: 'keep', transform: { insert: '5', @@ -64,17 +63,20 @@ describe('predictionAutoSelect', () => { matchesModel: false, displayAs: '5' }, - p: 0.01 + correction: '5' }, - totalProb: 0.01 + metadata: { + probabilities: { + prediction: 0.01, + correction: 1, + total: 0.01 + }, + autoSelectable: false + } }, { - correction: { - sample: '5', - p: 1 - }, - prediction: { - sample: { + components: { + prediction: { transform: { insert: '5th', deleteLeft: 0 @@ -82,9 +84,16 @@ describe('predictionAutoSelect', () => { matchesModel: true, displayAs: '5th' }, - p: 0.8 + correction: '5' }, - totalProb: 0.8 + metadata: { + probabilities: { + prediction: 0.8, + correction: 1, + total: 0.8 + }, + autoSelectable: false + } } ]; @@ -92,19 +101,15 @@ describe('predictionAutoSelect', () => { assert.doesNotThrow(() => predictionAutoSelect(predictions)); assert.sameDeepOrderedMembers(predictions, originalPredictions); - const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept); + const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept); assert.isNotOk(autoselected); }); it(`does not select solitary 'keep' suggestion that doesn't match the model`, () => { - const predictions: CorrectionPredictionTuple[] = [ + const predictions: IntermediateCompositedPrediction[] = [ { - correction: { - sample: 'appl', - p: 1 - }, - prediction: { - sample: { + components: { + prediction: { tag: 'keep', transform: { // can be null / "mocked out" insert: 'l', @@ -113,9 +118,16 @@ describe('predictionAutoSelect', () => { matchesModel: false, displayAs: '"appl"' }, - p: 1 + correction: 'appl' }, - totalProb: 1 + metadata: { + probabilities: { + prediction: 1, + correction: 1, + total: 1 + }, + autoSelectable: true + } } ]; @@ -123,18 +135,14 @@ describe('predictionAutoSelect', () => { assert.doesNotThrow(() => predictionAutoSelect(predictions)); assert.sameDeepOrderedMembers(predictions, originalPredictions); - const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept); + const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept); assert.isNotOk(autoselected); }); it(`selects 'keep' suggestion that does match the model over any alternatives`, () => { - const keepSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'thin', - p: .8 - }, - prediction: { - sample: { + const keepSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { tag: 'keep', transform: { // can be null / "mocked out" insert: 'n', @@ -143,65 +151,81 @@ describe('predictionAutoSelect', () => { matchesModel: true, displayAs: 'thin' }, - p: .05 + correction: 'thin' }, - totalProb: .04 + metadata: { + probabilities: { + prediction: .05, + correction: .8, + total: .05 * .8 + }, + autoSelectable: true + } } - const highestNonKeepSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'thin', - p: .8 - }, - prediction: { - sample: { + const highestNonKeepSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'nk', deleteLeft: 0 }, displayAs: 'think' }, - p: .55 + correction: 'thin' }, - totalProb: .44 + metadata: { + probabilities: { + prediction: .55, + correction: .8, + total: .55 * .8 + }, + autoSelectable: true + } }; - const predictions: CorrectionPredictionTuple[] = [ + const predictions: IntermediateCompositedPrediction[] = [ keepSuggestion, highestNonKeepSuggestion, { - correction: { - sample: 'thin', - p: .8 - }, - prediction: { - sample: { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'ng', deleteLeft: 0 }, displayAs: 'thing' }, - p: .4 + correction: 'thin' }, - totalProb: .32 + metadata: { + probabilities: { + prediction: .4, + correction: .8, + total: .4 * .8 + }, + autoSelectable: true + } }, { - correction: { - sample: 'thic', - p: .2 - }, - prediction: { - sample: { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'ck', deleteLeft: 0 }, displayAs: 'thick' }, - p: 1 + correction: 'thic' }, - totalProb: .2 + metadata: { + probabilities: { + prediction: 1, + correction: .2, + total: 1 * .2 + }, + autoSelectable: true + } } ]; @@ -209,18 +233,14 @@ describe('predictionAutoSelect', () => { assert.doesNotThrow(() => predictionAutoSelect(predictions)); assert.sameDeepMembers(predictions, originalPredictions); - const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept); + const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept); assert.equal(autoselected, keepSuggestion); }); it(`selects solitary non-'keep' suggestion when 'keep' does not match model`, () => { - const keepSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'thin', - p: .8 - }, - prediction: { - sample: { + const keepSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { tag: 'keep', transform: { // can be null / "mocked out" insert: 'n', @@ -229,40 +249,50 @@ describe('predictionAutoSelect', () => { displayAs: '"thin"', matchesModel: false }, - p: .05 + correction: 'thin' }, - totalProb: .04 + metadata: { + probabilities: { + prediction: .05, + correction: .8, + total: .8 * .05 + }, + autoSelectable: true + } } // To 'win', a suggestion (currently) needs at least twice the probability of the sum of all alternatives. // This threshold may be subject to change. // // Refer to AUTOSELECT_PROPORTION_THRESHOLD in predict-helpers.ts. - const onlyNonKeepSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'thin', - p: .8 - }, - prediction: { - sample: { + const onlyNonKeepSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'nk', deleteLeft: 0 }, displayAs: 'think' }, - p: .01 + correction: 'thin' }, - totalProb: .008 + metadata: { + probabilities: { + prediction: .01, + correction: .8, + total: .01 * .8 + }, + autoSelectable: true + } }; - const predictions: CorrectionPredictionTuple[] = [ + const predictions: IntermediateCompositedPrediction[] = [ keepSuggestion, onlyNonKeepSuggestion ]; - const totalProb = predictions.reduce((accum, current) => accum + current.totalProb, 0); - assert.isBelow(onlyNonKeepSuggestion.totalProb, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid'); + const totalProb = predictions.reduce((accum, current) => accum + current.metadata.probabilities.total, 0); + assert.isBelow(onlyNonKeepSuggestion.metadata.probabilities.total, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid'); predictions.sort(tupleDisplayOrderSort); @@ -270,18 +300,14 @@ describe('predictionAutoSelect', () => { assert.doesNotThrow(() => predictionAutoSelect(predictions)); assert.sameDeepOrderedMembers(predictions, originalPredictions); - const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept); + const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept); assert.equal(autoselected, onlyNonKeepSuggestion); }); it(`does not select non-'keep' without sufficient winning probability`, () => { - const keepSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'thin', - p: .8 - }, - prediction: { - sample: { + const keepSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { tag: 'keep', transform: { // can be null / "mocked out" insert: 'n', @@ -290,74 +316,90 @@ describe('predictionAutoSelect', () => { displayAs: '"thin"', matchesModel: false }, - p: .05 + correction: 'thin' }, - totalProb: .04 + metadata: { + probabilities: { + prediction: .05, + correction: .8, + total: .05 * .8 + }, + autoSelectable: true + } } // To 'win', a suggestion (currently) needs at least twice the probability of the sum of all alternatives. // This threshold may be subject to change. // // Refer to AUTOSELECT_PROPORTION_THRESHOLD in predict-helpers.ts. - const highestNonKeepSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'thin', - p: .8 - }, - prediction: { - sample: { + const highestNonKeepSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'nk', deleteLeft: 0 }, displayAs: 'think' }, - p: .55 + correction: 'thin' }, - totalProb: .44 + metadata: { + probabilities: { + prediction: .55, + correction: .8, + total: .55 * .8 + }, + autoSelectable: true + } }; - const predictions: CorrectionPredictionTuple[] = [ + const predictions: IntermediateCompositedPrediction[] = [ keepSuggestion, highestNonKeepSuggestion, { - correction: { - sample: 'thin', - p: .8 - }, - prediction: { - sample: { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'ng', deleteLeft: 0 }, displayAs: 'thing' }, - p: .4 + correction: 'thin' }, - totalProb: .32 + metadata: { + probabilities: { + prediction: .4, + correction: .8, + total: .4 * .8 + }, + autoSelectable: true + } }, { - correction: { - sample: 'thic', - p: .2 - }, - prediction: { - sample: { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'ck', deleteLeft: 0 }, displayAs: 'thick' }, - p: 1 + correction: 'thic' }, - totalProb: .2 + metadata: { + probabilities: { + prediction: 1, + correction: .2, + total: 1 * .2 + }, + autoSelectable: true + } } ]; - const totalProb = predictions.reduce((accum, current) => accum + current.totalProb, 0); - assert.isBelow(highestNonKeepSuggestion.totalProb, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid'); + const totalProb = predictions.reduce((accum, current) => accum + current.metadata.probabilities.total, 0); + assert.isBelow(highestNonKeepSuggestion.metadata.probabilities.total, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid'); predictions.sort(tupleDisplayOrderSort); @@ -365,18 +407,14 @@ describe('predictionAutoSelect', () => { assert.doesNotThrow(() => predictionAutoSelect(predictions)); assert.sameDeepOrderedMembers(predictions, originalPredictions); - const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept); + const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept); assert.isNotOk(autoselected); }); it(`does select non-'keep' with sufficient winning probability`, () => { - const keepSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'thin', - p: .8 - }, - prediction: { - sample: { + const keepSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { tag: 'keep', transform: { // can be null / "mocked out" insert: 'n', @@ -385,87 +423,99 @@ describe('predictionAutoSelect', () => { displayAs: '"thin"', matchesModel: false }, - p: .05 + correction: 'thin' }, - totalProb: .04 + metadata: { + probabilities: { + prediction: .05, + correction: .8, + total: .05 * .8 + }, + autoSelectable: true + } } - const highestNonKeepSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'thin', - p: .9 - }, - prediction: { - sample: { + const highestNonKeepSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'nk', deleteLeft: 0 }, displayAs: 'think' }, - p: .75 + correction: 'thin' }, - totalProb: .675 + metadata: { + probabilities: { + prediction: .75, + correction: .9, + total: .75 * .9 + }, + autoSelectable: true + } }; - const predictions: CorrectionPredictionTuple[] = [ + const predictions: IntermediateCompositedPrediction[] = [ keepSuggestion, highestNonKeepSuggestion, { - correction: { - sample: 'thin', - p: .9 - }, - prediction: { - sample: { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'ng', deleteLeft: 0 }, displayAs: 'thing' }, - p: .2 + correction: 'thin' }, - totalProb: .18 + metadata: { + probabilities: { + prediction: .2, + correction: .9, + total: .2 * .9 + }, + autoSelectable: true + } }, { - correction: { - sample: 'thic', - p: .1 - }, - prediction: { - sample: { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'ck', deleteLeft: 0 }, displayAs: 'thick' }, - p: 1 + correction: 'thic' }, - totalProb: .1 + metadata: { + probabilities: { + prediction: 1, + correction: .1, + total: 1 * .1 + }, + autoSelectable: true + } } ]; - const totalProb = predictions.reduce((accum, current) => accum + current.totalProb, 0); - assert.isAbove(highestNonKeepSuggestion.totalProb, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid'); + const totalProb = predictions.reduce((accum, current) => accum + current.metadata.probabilities.total, 0); + assert.isAbove(highestNonKeepSuggestion.metadata.probabilities.total, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid'); const originalPredictions = [].concat(predictions); assert.doesNotThrow(() => predictionAutoSelect(predictions)); assert.sameDeepMembers(predictions, originalPredictions); - const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept); + const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept); assert.equal(autoselected, highestNonKeepSuggestion); }); it('ignores non key-matched suggestions when key-matched suggestions exist', () => { - const keepSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'cant', - p: 1 - }, - prediction: { - sample: { + const keepSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { tag: 'keep', transform: { // can be null / "mocked out" insert: 't', @@ -474,51 +524,64 @@ describe('predictionAutoSelect', () => { displayAs: '"cant"', matchesModel: false }, - p: 1 + correction: 'cant' }, - totalProb: 1, - matchLevel: SuggestionSimilarity.exact + metadata: { + probabilities: { + prediction: 1, + correction: 1, + total: 1 * 1 + }, + autoSelectable: true, + matchLevel: SuggestionSimilarity.exact + } } - const expectedSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'cant', - p: 1 - }, - prediction: { - sample: { + const expectedSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { // can be null / "mocked out" insert: '\'t', deleteLeft: 0 }, displayAs: "can't" }, - p: .2 + correction: 'cant' }, - totalProb: .2, - matchLevel: SuggestionSimilarity.sameKey + metadata: { + probabilities: { + prediction: .2, + correction: 1, + total: .2 * 1 + }, + autoSelectable: true, + matchLevel: SuggestionSimilarity.sameKey + } }; - const predictions: CorrectionPredictionTuple[] = [ + const predictions: IntermediateCompositedPrediction[] = [ keepSuggestion, expectedSuggestion, { - correction: { - sample: 'cant', - p: 1 - }, - prediction: { - sample: { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'teen', deleteLeft: 0 }, displayAs: 'canteen' }, - p: .8 + correction: 'cant' }, - totalProb: .8, - matchLevel: SuggestionSimilarity.none + metadata: { + probabilities: { + prediction: .8, + correction: 1, + total: .8 * 1 + }, + autoSelectable: true, + matchLevel: SuggestionSimilarity.none + } } ]; @@ -527,20 +590,16 @@ describe('predictionAutoSelect', () => { assert.sameDeepMembers(predictions, originalPredictions); - const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept); + const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept); assert.equal(autoselected, expectedSuggestion); }); // The idea: avoid "over-correcting" when a potential correction has a // super-high-frequency word. it('does not auto-select suggestion if its root correction is not most likely', () => { - const keepSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'thi', - p: .7 - }, - prediction: { - sample: { + const keepSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { tag: 'keep', transform: { // can be null / "mocked out" insert: 'i', @@ -549,61 +608,74 @@ describe('predictionAutoSelect', () => { displayAs: '"thi"', matchesModel: false }, - p: .05 + correction: 'thi' }, - totalProb: .035 + metadata: { + probabilities: { + prediction: .05, + correction: .7, + total: .05 * .7 + }, + autoSelectable: true + } } - const highestCorrectionSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'thi', - p: .7 - }, - prediction: { - sample: { + const highestCorrectionSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'in', deleteLeft: 0 }, displayAs: 'thin' }, - p: .1 + correction: 'thi', }, - totalProb: .07 + metadata: { + probabilities: { + prediction: .1, + correction: .7, + total: .1 * .7 + }, + autoSelectable: true + } }; - const highestNonKeepSuggestion: CorrectionPredictionTuple = { - correction: { - sample: 'the', - p: .3 - }, - prediction: { - sample: { + const highestNonKeepSuggestion: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { // can be null / "mocked out" insert: 'e', deleteLeft: 0 }, displayAs: 'the' }, - p: 1 + correction: 'the' }, - totalProb: .3 + metadata: { + probabilities: { + prediction: 1, + correction: .3, + total: 1 * .3 + }, + autoSelectable: true + } }; - const predictions: CorrectionPredictionTuple[] = [ + const predictions: IntermediateCompositedPrediction[] = [ keepSuggestion, highestNonKeepSuggestion, highestCorrectionSuggestion ]; - const totalProb = predictions.reduce((accum, current) => accum + current.totalProb, 0); - assert.isAbove(highestNonKeepSuggestion.totalProb, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid'); + const totalProb = predictions.reduce((accum, current) => accum + current.metadata.probabilities.total, 0); + assert.isAbove(highestNonKeepSuggestion.metadata.probabilities.total, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid'); const originalPredictions = [].concat(predictions); assert.doesNotThrow(() => predictionAutoSelect(predictions)); assert.sameDeepMembers(predictions, originalPredictions); - const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept); + const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept); assert.isNotOk(autoselected); }); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts index 40fd2dafc6f..a86048b33d4 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts @@ -13,7 +13,7 @@ import { LexicalModelTypes } from "@keymanapp/common-types"; import * as wordBreakers from '@keymanapp/models-wordbreakers'; import { applyTransform } from '@keymanapp/models-templates'; -import { CorrectionPredictionTuple, createDefaultKeep, models, SuggestionSimilarity } from "@keymanapp/lm-worker/test-index"; +import { IntermediateCompositedPrediction, createDefaultKeep, models, SuggestionSimilarity } from "@keymanapp/lm-worker/test-index"; import CasingFunction = LexicalModelTypes.CasingFunction; import Context = LexicalModelTypes.Context; @@ -109,13 +109,9 @@ describe('createDefaultKeep', () => { p: 1 }; - const expectedKeep: CorrectionPredictionTuple = { - correction: { - sample: 'iphone', - p: 1 - }, - prediction: { - sample: { + const expectedKeep: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 'iphone', deleteLeft: 5 @@ -124,10 +120,17 @@ describe('createDefaultKeep', () => { matchesModel: false, tag: 'keep' }, - p: 1 + correction: 'iphone' }, - totalProb: 1, - matchLevel: SuggestionSimilarity.exact + metadata: { + probabilities: { + prediction: 1, + correction: 1, + total: 1 * 1 + }, + autoSelectable: false, + matchLevel: SuggestionSimilarity.exact + } }; const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput); @@ -150,13 +153,9 @@ describe('createDefaultKeep', () => { p: 1 }; - const expectedKeep: CorrectionPredictionTuple = { - correction: { - sample: 'iphone', - p: 1 - }, - prediction: { - sample: { + const expectedKeep: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 'iphone', deleteLeft: 7 @@ -165,10 +164,17 @@ describe('createDefaultKeep', () => { matchesModel: false, tag: 'keep' }, - p: 1 + correction: 'iphone' }, - totalProb: 1, - matchLevel: SuggestionSimilarity.exact + metadata: { + probabilities: { + prediction: 1, + correction: 1, + total: 1 * 1 + }, + autoSelectable: false, + matchLevel: SuggestionSimilarity.exact + } }; const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput); @@ -191,13 +197,9 @@ describe('createDefaultKeep', () => { p: 1 }; - const expectedKeep: CorrectionPredictionTuple = { - correction: { - sample: 'iphone', - p: 1 - }, - prediction: { - sample: { + const expectedKeep: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 'iphone', deleteLeft: 8 @@ -206,10 +208,17 @@ describe('createDefaultKeep', () => { matchesModel: false, tag: 'keep' }, - p: 1 + correction: 'iphone' }, - totalProb: 1, - matchLevel: SuggestionSimilarity.exact + metadata: { + probabilities: { + prediction: 1, + correction: 1, + total: 1 * 1 + }, + autoSelectable: false, + matchLevel: SuggestionSimilarity.exact + } }; const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput); @@ -232,13 +241,9 @@ describe('createDefaultKeep', () => { p: 1 }; - const expectedKeep: CorrectionPredictionTuple = { - correction: { - sample: 'and', - p: 1 - }, - prediction: { - sample: { + const expectedKeep: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 'and', deleteLeft: 3 @@ -247,10 +252,17 @@ describe('createDefaultKeep', () => { matchesModel: false, tag: 'keep' }, - p: 1 + correction: 'and' }, - totalProb: 1, - matchLevel: SuggestionSimilarity.exact + metadata: { + probabilities: { + prediction: 1, + correction: 1, + total: 1 * 1 + }, + autoSelectable: false, + matchLevel: SuggestionSimilarity.exact + } }; const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput); @@ -273,13 +285,9 @@ describe('createDefaultKeep', () => { p: 1 }; - const expectedKeep: CorrectionPredictionTuple = { - correction: { - sample: 'iphones', - p: 1 - }, - prediction: { - sample: { + const expectedKeep: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 'iphones', deleteLeft: 7 @@ -288,10 +296,17 @@ describe('createDefaultKeep', () => { matchesModel: false, tag: 'keep' }, - p: 1 + correction: 'iphones' }, - totalProb: 1, - matchLevel: SuggestionSimilarity.exact + metadata: { + probabilities: { + prediction: 1, + correction: 1, + total: 1 * 1 + }, + autoSelectable: false, + matchLevel: SuggestionSimilarity.exact + } }; const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput); @@ -314,13 +329,9 @@ describe('createDefaultKeep', () => { p: 1 }; - const expectedKeep: CorrectionPredictionTuple = { - correction: { - sample: '', - p: 1 - }, - prediction: { - sample: { + const expectedKeep: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: '', deleteLeft: 0 @@ -329,10 +340,17 @@ describe('createDefaultKeep', () => { matchesModel: false, tag: 'keep' }, - p: 1 + correction: '' }, - totalProb: 1, - matchLevel: SuggestionSimilarity.exact + metadata: { + probabilities: { + prediction: 1, + correction: 1, + total: 1 * 1 + }, + autoSelectable: false, + matchLevel: SuggestionSimilarity.exact + } }; const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts index a99187defa5..8234c6ba2a9 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts @@ -71,7 +71,7 @@ const DUMMY_MODEL_CONFIG = { languageUsesCasing: true }; -describe('predictFromCorrections', () => { +describe('correctAndEnumerateWithoutTraversals', () => { it('handles a single correction prefixing multiple entries - no transform ID', () => { const context: Context = { left: 'It', @@ -113,14 +113,15 @@ describe('predictFromCorrections', () => { }); const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context); - predictions.forEach((entry) => assert.equal(entry.correction.sample, 'Its')); - predictions.forEach((entry) => assert.equal(entry.correction.p, 0.6)); + + predictions.forEach((entry) => assert.equal(entry.components.correction, 'Its')); + predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, 0.6)); predictions.sort(tupleDisplayOrderSort); - assert.sameDeepOrderedMembers(predictions.map((entry) => entry.prediction.sample), dummied_suggestions); + assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions); - assert.approximately(predictions[0].totalProb, 0.18 * 0.6, 0.00001); - assert.approximately(predictions[1].totalProb, 0.02 * 0.6, 0.00001); + assert.approximately(predictions[0].metadata.probabilities.total, 0.18 * 0.6, 0.00001); + assert.approximately(predictions[1].metadata.probabilities.total, 0.02 * 0.6, 0.00001); }); it('handles a single correction prefixing multiple entries - with transform ID', () => { @@ -165,19 +166,20 @@ describe('predictFromCorrections', () => { }); const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context); - predictions.forEach((entry) => assert.equal(entry.correction.sample, 'Its')); - predictions.forEach((entry) => assert.equal(entry.correction.p, 0.6)); + + predictions.forEach((entry) => assert.equal(entry.components.correction, 'Its')); + predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, 0.6)); predictions.sort(tupleDisplayOrderSort); - assert.sameOrderedMembers(predictions.map((entry) => entry.prediction.sample.displayAs), ["it's", "its"]); - assert.sameDeepOrderedMembers(predictions.map((entry) => entry.prediction.sample), dummied_suggestions.map((entry) => { + assert.sameOrderedMembers(predictions.map((entry) => entry.components.prediction.displayAs), ["it's", "its"]); + assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions.map((entry) => { entry = deepCopy(entry); entry.transformId = 314159; return entry; })); - assert.approximately(predictions[0].totalProb, 0.18 * 0.6, 0.00001); - assert.approximately(predictions[1].totalProb, 0.02 * 0.6, 0.00001); + assert.approximately(predictions[0].metadata.probabilities.total, 0.18 * 0.6, 0.00001); + assert.approximately(predictions[1].metadata.probabilities.total, 0.02 * 0.6, 0.00001); }); it('handles multiple corrections at once', () => { @@ -250,12 +252,12 @@ describe('predictFromCorrections', () => { const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context); predictions.sort(tupleDisplayOrderSort); - assert.sameOrderedMembers(predictions.map((entry) => entry.prediction.sample.displayAs), ["is", "it's", "isn't", "its"]); - assert.sameDeepMembers(predictions.map((entry) => entry.prediction.sample), dummied_suggestions.flatMap((entry) => entry)); + assert.sameOrderedMembers(predictions.map((entry) => entry.components.prediction.displayAs), ["is", "it's", "isn't", "its"]); + assert.sameDeepMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions.flatMap((entry) => entry)); - assert.approximately(predictions[0].totalProb, 0.4 * 0.4, 0.00001); - assert.approximately(predictions[1].totalProb, 0.18 * 0.6, 0.00001); - assert.approximately(predictions[2].totalProb, 0.4 * 0.2, 0.00001); - assert.approximately(predictions[3].totalProb, 0.02 * 0.6, 0.00001); + assert.approximately(predictions[0].metadata.probabilities.total, 0.4 * 0.4, 0.00001); + assert.approximately(predictions[1].metadata.probabilities.total, 0.18 * 0.6, 0.00001); + assert.approximately(predictions[2].metadata.probabilities.total, 0.4 * 0.2, 0.00001); + assert.approximately(predictions[3].metadata.probabilities.total, 0.02 * 0.6, 0.00001); }); }); \ No newline at end of file diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-deduplication.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-deduplication.tests.ts index eea66d8ad0a..d1aa6df257e 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-deduplication.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-deduplication.tests.ts @@ -4,7 +4,7 @@ import * as wordBreakers from '@keymanapp/models-wordbreakers'; import { deepCopy } from '@keymanapp/web-utils'; import { LexicalModelTypes } from '@keymanapp/common-types'; -import { CorrectionPredictionTuple, dedupeSuggestions, models } from "@keymanapp/lm-worker/test-index"; +import { IntermediateCompositedPrediction, dedupeSuggestions, models } from "@keymanapp/lm-worker/test-index"; import Context = LexicalModelTypes.Context; import DummyModel = models.DummyModel; @@ -24,77 +24,89 @@ const testModel = new DummyModel({ * @returns */ const build_its_is_set = () => { - const its: CorrectionPredictionTuple = { - correction: { - sample: 'its', - p: 0.8 - }, - prediction: { - sample: { + const its: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 's', deleteLeft: 0 }, displayAs: 'its' }, - p: 0.2 + correction: 'its' }, - totalProb: 0.16 - // matchLevel does not yet exist. + metadata: { + probabilities: { + prediction: .2, + correction: .8, + total: .2 * .8 + }, + autoSelectable: true + // matchLevel does not yet exist. + } }; - const it_is: CorrectionPredictionTuple = { - correction: { - sample: 'its', - p: 0.8 - }, - prediction: { - sample: { + const it_is: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: '\'s', deleteLeft: 0 }, displayAs: 'it\'s' }, - p: 0.8 + correction: 'its' }, - totalProb: 0.64 + metadata: { + probabilities: { + prediction: .8, + correction: .8, + total: .8 * .8 + }, + autoSelectable: true + } }; - const is: CorrectionPredictionTuple = { - correction: { - sample: 'is', - p: 0.2 - }, - prediction: { - sample: { + const is: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 's', deleteLeft: 1 }, displayAs: 'is' }, - p: 0.5 + correction: 'is' }, - totalProb: 0.1 + metadata: { + probabilities: { + prediction: .5, + correction: .2, + total: .5 * .2 + }, + autoSelectable: true + } }; - const is_not: CorrectionPredictionTuple = { - correction: { - sample: 'is', - p: 0.2 - }, - prediction: { - sample: { + const is_not: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 'sn\'t', deleteLeft: 1 }, displayAs: 'isn\'t' }, - p: 0.5 + correction: 'is' }, - totalProb: 0.1 + metadata: { + probabilities: { + prediction: .5, + correction: .2, + total: .5 * .2 + }, + autoSelectable: true + } }; return { @@ -145,7 +157,7 @@ describe('dedupeSuggestions', () => { // There's no mathematically safe way to combine the components if the // underlying correction sources differ between duplicated suggestions, // though it's mathematically safe to combine their product. - expected.forEach((entry) => entry.totalProb *= (entry.prediction.sample.transform.insert == '\'s') ? 3 : 2); + expected.forEach((entry) => entry.metadata.probabilities.total *= (entry.components.prediction.transform.insert == '\'s') ? 3 : 2); assert.deepEqual(deduplicated, expected); }); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-finalization.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-finalization.tests.ts index 4e63055a101..c1c2ecacc5c 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-finalization.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-finalization.tests.ts @@ -5,7 +5,7 @@ import { deepCopy } from '@keymanapp/web-utils'; import * as wordBreakers from '@keymanapp/models-wordbreakers'; import { LexicalModelTypes } from '@keymanapp/common-types'; -import { CorrectionPredictionTuple, finalizeSuggestions, models } from "@keymanapp/lm-worker/test-index"; +import { IntermediateCompositedPrediction, finalizeSuggestions, models } from "@keymanapp/lm-worker/test-index"; import DummyModel = models.DummyModel; import Outcome = LexicalModelTypes.Outcome; @@ -39,6 +39,7 @@ const testModelWithoutSpacing = new DummyModel({ } }); + /** * Builds a fresh copy of test values useful for suggestion-similarity * testing. @@ -47,78 +48,89 @@ const testModelWithoutSpacing = new DummyModel({ */ const build_its_is_set = (verbose?: string) => { const verboseFlag = (verbose == 'verbose' ? true : false); - - const its: CorrectionPredictionTuple = { - correction: { - sample: 'its', - p: 0.8 - }, - prediction: { - sample: { + const its: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 's', deleteLeft: 0 }, displayAs: 'its' }, - p: 0.2 + correction: 'its' }, - totalProb: 0.16 - // matchLevel does not yet exist. + metadata: { + probabilities: { + prediction: .2, + correction: .8, + total: .2 * .8 + }, + autoSelectable: true + // matchLevel does not yet exist. + } }; - const it_is: CorrectionPredictionTuple = { - correction: { - sample: 'its', - p: 0.8 - }, - prediction: { - sample: { + const it_is: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: '\'s', deleteLeft: 0 }, displayAs: 'it\'s' }, - p: 0.8 + correction: 'its' }, - totalProb: 0.64 + metadata: { + probabilities: { + prediction: .8, + correction: .8, + total: .8 * .8 + }, + autoSelectable: true + } }; - const is: CorrectionPredictionTuple = { - correction: { - sample: 'is', - p: 0.2 - }, - prediction: { - sample: { + const is: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 's', deleteLeft: 1 }, displayAs: 'is' }, - p: 0.5 + correction: 'is' }, - totalProb: 0.1 + metadata: { + probabilities: { + prediction: .5, + correction: .2, + total: .5 * .2 + }, + autoSelectable: true + } }; - const is_not: CorrectionPredictionTuple = { - correction: { - sample: 'is', - p: 0.2 - }, - prediction: { - sample: { + const is_not: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 'sn\'t', deleteLeft: 1 }, displayAs: 'isn\'t' }, - p: 0.5 + correction: 'is' }, - totalProb: 0.1 + metadata: { + probabilities: { + prediction: .5, + correction: .2, + total: .5 * .2 + }, + autoSelectable: true + } }; const baseDefinitions = { @@ -132,13 +144,13 @@ const build_its_is_set = (verbose?: string) => { const expected = unfinalized.map((entry) => { const mapped: Outcome = { - ...deepCopy(entry.prediction.sample), - p: entry.totalProb + ...deepCopy(entry.components.prediction), + p: entry.metadata.probabilities.total }; if(verboseFlag) { - mapped['correction-p'] = entry.correction.p; - mapped['lexical-p'] = entry.prediction.p; + mapped['correction-p'] = entry.metadata.probabilities.correction; + mapped['lexical-p'] = entry.metadata.probabilities.prediction; } return mapped; diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts index 72911d11281..e4cbfc81b1d 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts @@ -5,7 +5,7 @@ import * as wordBreakers from '@keymanapp/models-wordbreakers'; import { deepCopy } from '@keymanapp/web-utils'; import { LexicalModelTypes } from '@keymanapp/common-types'; -import { CorrectionPredictionTuple, models, processSimilarity, SuggestionSimilarity, toAnnotatedSuggestion } from "@keymanapp/lm-worker/test-index"; +import { IntermediateCompositedPrediction, models, processSimilarity, SuggestionSimilarity, toAnnotatedSuggestion } from "@keymanapp/lm-worker/test-index"; import CasingFunction = LexicalModelTypes.CasingFunction; import Context = LexicalModelTypes.Context; @@ -109,77 +109,89 @@ const testModelWithCasing = new DummyModel({ * @returns */ const build_its_is_set = () => { - const its: CorrectionPredictionTuple = { - correction: { - sample: 'its', - p: 0.8 - }, - prediction: { - sample: { + const its: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 's', deleteLeft: 0 }, displayAs: 'its' }, - p: 0.2 + correction: 'its' }, - totalProb: 0.16 - // matchLevel does not yet exist. + metadata: { + probabilities: { + prediction: .2, + correction: .8, + total: .2 * .8 + }, + autoSelectable: true + // matchLevel does not yet exist. + } }; - const it_is: CorrectionPredictionTuple = { - correction: { - sample: 'its', - p: 0.8 - }, - prediction: { - sample: { + const it_is: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: '\'s', deleteLeft: 0 }, displayAs: 'it\'s' }, - p: 0.8 + correction: 'its' }, - totalProb: 0.64 + metadata: { + probabilities: { + prediction: .8, + correction: .8, + total: .8 * .8 + }, + autoSelectable: true + } }; - const is: CorrectionPredictionTuple = { - correction: { - sample: 'is', - p: 0.2 - }, - prediction: { - sample: { + const is: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 's', deleteLeft: 1 }, displayAs: 'is' }, - p: 0.5 + correction: 'is' }, - totalProb: 0.1 + metadata: { + probabilities: { + prediction: .5, + correction: .2, + total: .5 * .2 + }, + autoSelectable: true + } }; - const is_not: CorrectionPredictionTuple = { - correction: { - sample: 'is', - p: 0.2 - }, - prediction: { - sample: { + const is_not: IntermediateCompositedPrediction = { + components: { + prediction: { transform: { insert: 'sn\'t', deleteLeft: 1 }, displayAs: 'isn\'t' }, - p: 0.5 + correction: 'is' }, - totalProb: 0.1 + metadata: { + probabilities: { + prediction: .5, + correction: .2, + total: .5 * .2 + }, + autoSelectable: true + } }; return { @@ -210,32 +222,22 @@ describe('processSimilarity', () => { const testSet = build_its_is_set(); const distribution = [...Object.values(testSet)]; - const expectation: CorrectionPredictionTuple[] = [ - { - ...testSet.its, - matchLevel: SuggestionSimilarity.exact - }, { - ...testSet.it_is, - matchLevel: SuggestionSimilarity.sameKey - }, { - ...testSet.is, - matchLevel: SuggestionSimilarity.none - }, { - ...testSet.is_not, - matchLevel: SuggestionSimilarity.none - } - ]; + const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)]; + expectation[0].metadata.matchLevel = SuggestionSimilarity.exact; // its + expectation[1].metadata.matchLevel = SuggestionSimilarity.sameKey; // it_is + expectation[2].metadata.matchLevel = SuggestionSimilarity.none; // is + expectation[3].metadata.matchLevel = SuggestionSimilarity.none; // is_not const its = testSet.its; const original_its = deepCopy(its); - const keep_its = toAnnotatedSuggestion(testModelWithCasing, original_its.prediction.sample, 'keep', QuoteBehavior.noQuotes); + const keep_its = toAnnotatedSuggestion(testModelWithCasing, original_its.components.prediction, 'keep', QuoteBehavior.noQuotes); keep_its.matchesModel = true; processSimilarity(testModelWithCasing, distribution, context, trueInput); assert.sameDeepMembers(distribution, expectation); - assert.equal(its.prediction.sample.tag, 'keep'); - assert.deepEqual(its.prediction.sample, keep_its); + assert.equal(its.components.prediction.tag, 'keep'); + assert.deepEqual(its.components.prediction, keep_its); }); it(`selects contraction as 'more similar' than same-keyed non-contraction when context is contraction`, () => { @@ -257,32 +259,22 @@ describe('processSimilarity', () => { const testSet = build_its_is_set(); const distribution = [...Object.values(testSet)]; - const expectation: CorrectionPredictionTuple[] = [ - { - ...testSet.its, - matchLevel: SuggestionSimilarity.sameKey - }, { - ...testSet.it_is, - matchLevel: SuggestionSimilarity.exact - }, { - ...testSet.is, - matchLevel: SuggestionSimilarity.none - }, { - ...testSet.is_not, - matchLevel: SuggestionSimilarity.none - } - ]; + const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)]; + expectation[0].metadata.matchLevel = SuggestionSimilarity.sameKey; // its + expectation[1].metadata.matchLevel = SuggestionSimilarity.exact; // it_is + expectation[2].metadata.matchLevel = SuggestionSimilarity.none; // is + expectation[3].metadata.matchLevel = SuggestionSimilarity.none; // is_not const it_is = testSet.it_is; const original_it_is = deepCopy(it_is); - const keep_it_is = toAnnotatedSuggestion(testModelWithCasing, original_it_is.prediction.sample, 'keep', QuoteBehavior.noQuotes); + const keep_it_is = toAnnotatedSuggestion(testModelWithCasing, original_it_is.components.prediction, 'keep', QuoteBehavior.noQuotes); keep_it_is.matchesModel = true; processSimilarity(testModelWithCasing, distribution, context, trueInput); assert.sameDeepMembers(distribution, expectation); - assert.equal(it_is.prediction.sample.tag, 'keep'); - assert.deepEqual(it_is.prediction.sample, keep_it_is); + assert.equal(it_is.components.prediction.tag, 'keep'); + assert.deepEqual(it_is.components.prediction, keep_it_is); }); describe('with casing', () => { @@ -314,34 +306,22 @@ describe('processSimilarity', () => { // Have the predictions replace existing context parts with the lowercased equivalents. Object.values(testSet).forEach((entry) => { - const transform = entry.prediction.sample.transform; + const transform = entry.components.prediction.transform; transform.insert = transform.deleteLeft == 0 ? `it${transform.insert}` : `i${transform.insert}`; transform.deleteLeft = 2; }); const distribution = [...Object.values(testSet)]; - const expectation: CorrectionPredictionTuple[] = [ - { - ...testSet.its, - matchLevel: SuggestionSimilarity.sameKey - }, { - ...testSet.it_is, - // case mismatch, detectable because we have access to a lowercasing/uppercasing function. - matchLevel: SuggestionSimilarity.sameText - }, { - ...testSet.is, - matchLevel: SuggestionSimilarity.none - }, { - ...testSet.is_not, - matchLevel: SuggestionSimilarity.none - } - ]; - + const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)]; + expectation[0].metadata.matchLevel = SuggestionSimilarity.sameKey; // its + expectation[1].metadata.matchLevel = SuggestionSimilarity.sameText; // it_is + expectation[2].metadata.matchLevel = SuggestionSimilarity.none; // is + expectation[3].metadata.matchLevel = SuggestionSimilarity.none; // is_not processSimilarity(testModelWithCasing, distribution, context, trueInput); // Because we mucked with the casing here, there is no perfect 'keep' match. - const keep = distribution.find((entry) => entry.prediction.sample.tag == 'keep'); + const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep'); assert.isNotOk(keep); assert.sameDeepMembers(distribution, expectation); }); @@ -368,34 +348,20 @@ describe('processSimilarity', () => { // Have the predictions replace existing context parts with the lowercased equivalents. Object.values(testSet).forEach((entry) => { - const transform = entry.prediction.sample.transform; + const transform = entry.components.prediction.transform; transform.insert = transform.deleteLeft == 0 ? `it${transform.insert}` : `i${transform.insert}`; transform.deleteLeft = 2; }); const distribution = [...Object.values(testSet)]; - const expectation: CorrectionPredictionTuple[] = [ - { - ...testSet.its, - matchLevel: SuggestionSimilarity.none - }, { - ...testSet.it_is, - // case mismatch, detectable because we have access to a lowercasing/uppercasing function. - matchLevel: SuggestionSimilarity.none - }, { - ...testSet.is, - matchLevel: SuggestionSimilarity.none - }, { - ...testSet.is_not, - matchLevel: SuggestionSimilarity.none - } - ]; + const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)]; + expectation.forEach((entry) => entry.metadata.matchLevel = SuggestionSimilarity.none); processSimilarity(testModelWithoutCasing, distribution, context, trueInput); // Because we mucked with the casing here, there is no perfect 'keep' match. - const keep = distribution.find((entry) => entry.prediction.sample.tag == 'keep'); + const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep'); assert.isNotOk(keep); assert.sameDeepMembers(distribution, expectation); }); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-custom-punctuation.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-custom-punctuation.tests.ts index 9b9ab2c3121..4bac59cafc4 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-custom-punctuation.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-custom-punctuation.tests.ts @@ -81,6 +81,20 @@ describe('Custom Punctuation', function () { open: "'", close: "'" } + }, + // Some of the suggestions above actually wordbreak differently from + // what might be expected. So, we override the wordbreaker to ensure + // the tests run smoothly. + wordbreaker: (text) => { + const textLen = text.length; + if(text.charAt(textLen - 1) == " ") { + return [ + {text: text.substring(0, textLen-2), start: 0, end: textLen-1, length: textLen-1}, + {text: text.substring(textLen-1), start: textLen-1, end: textLen, length: 1} + ]; + } else { + return [{text, start: 0, end: textLen, length: textLen}]; + } } }); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts index 928b6e75c47..8c20df9626e 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts @@ -869,6 +869,9 @@ describe('ModelCompositor', function() { deleteLeft: 1 } + // Future adjustment: add the 'baseSuggestion' to DummyModel so that it actually + // returns the suggestion again. + // `new models.DummyModel(..., futureSuggestions: [[baseSuggestion]])` let model = new models.DummyModel({punctuation: englishPunctuation}); let compositor = new ModelCompositor(model, true); @@ -883,6 +886,7 @@ describe('ModelCompositor', function() { // As this test is a bit... 'hard-wired', we only get the 'keep' suggestion. // It should still be accurate, though. + // Can be fixed via the "Future adjustment" noted above. assert.equal(suggestions.length, 1); let expectedTransform = {