Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import { ContextTransition } from './correction/context-transition.js';
import { ExecutionTimer } from './correction/execution-timer.js';
import ModelCompositor from './model-compositor.js';
import { EDIT_DISTANCE_COST_SCALE, getBestTokenMatches } from './correction/distance-modeler.js';
import { TokenResultMapping } from './correction/token-result-mapping.js';

const searchForProperty = defaultWordbreaker.searchForProperty;

Expand All @@ -28,6 +27,7 @@ import Reversion = LexicalModelTypes.Reversion;
import Suggestion = LexicalModelTypes.Suggestion;
import SuggestionTag = LexicalModelTypes.SuggestionTag;
import Transform = LexicalModelTypes.Transform;
import { TokenResult } from './correction/tokenization-corrector.js';

/*
* The functions in this file exist to provide unit-testable stateless components for the
Expand Down Expand Up @@ -390,24 +390,55 @@ export function determineSuggestionRange(
}
}

/**
* Specifies the core, preprocessed data necessary for generating predictions,
* regardless of model type.
*/
export interface PredictionParameters {
/**
* The portion of context that should remain unchanged by generated suggestions
*/
rootContext: Context,

/**
* A tokenization of the corrected part of the context, usable to generate
* suggestions.
*
* Note that each correction will be applied iteratively to the rootContext.
* That is, when suggesting based on the correction at index 1, the
* "unchanged" (root) context used for that suggestion will include the
* changes from the entry at index 0 (or possibly, a suggestion derived from it).
*/
tokenizedCorrection: ProbabilityMass<Transform>[],

/**
* A closure to be applied to the generated suggestion's metadata.
* @param entry
* @returns
*/
applyInPost: (entry: CorrectionPredictionTuple) => void
}

/**
* This function takes in metadata about generated corrections (for models that
* implement Traversals) and uses that to construct predictions based upon those
* corrections.
* @param transition Context-transition data underlying the tokenization that led to the correction
* @param tokenization The tokenization from which the correction was generated.
* @param match The generated correction itself - the correction string and its cost
* @param costFactor A multiplicative factor used to adjust the cost when building prediction probabilities.
* implement Traversals) and uses that to produce the corresponding parameters
* to use for generating suggestions.
* @param transition Context-transition data underlying the tokenization that
* led to the correction
* @param tokenization The tokenization from which the correction was
* generated.
* @param match The generated correction itself - the correction string
* and its cost
* @param costFactor A multiplicative factor used to adjust the cost when
* building prediction probabilities.
* @returns
*/
export function buildAndMapPredictions(
export function determineTokenizedCorrectionSequence(
transition: ContextTransition,
tokenization: ContextTokenization,
match: Readonly<TokenResultMapping>,
match: Readonly<TokenResult>,
costFactor: number
): CorrectionPredictionTuple[] {
const model = transition.final.model;

): PredictionParameters {
const applicationTarget = transition.base.displayTokenization;
const { tokensToRemove, tokensToPredict } = determineSuggestionRange(applicationTarget, tokenization);

Expand All @@ -418,7 +449,10 @@ export function buildAndMapPredictions(
const correctionTransform: Transform = {
insert: match.matchString, // insert correction string
deleteLeft: 0,
id: transition.transitionId // The correction should always be based on the most recent external transform/transcription ID.
}

if(transition.transitionId) {
correctionTransform.id = transition.transitionId // The correction should always be based on the most recent external transform/transcription ID.
}

const rootCost = match.totalCost;
Expand All @@ -427,15 +461,16 @@ export function buildAndMapPredictions(
p: Math.exp(-rootCost * costFactor)
};

const predictions = predictFromCorrectionSequence(model, [predictionRoot], rootContext);
predictions.forEach((entry) => {
entry.preservationTransform = tokenization.taillessTrueKeystroke;
// // Will need an extra lookup layer if the suggestion is generated from within a cluster.
// entry.baseTokenization = transition.final.tokenizationSourceMap.get(tokenization);
entry.prediction.sample.transform.deleteLeft = deleteLeft;
});

return predictions;
return {
rootContext,
tokenizedCorrection: [predictionRoot],
applyInPost: (entry: CorrectionPredictionTuple) => {
entry.preservationTransform = tokenization.taillessTrueKeystroke;
// // Will need an extra lookup layer if the suggestion is generated from within a cluster.
// entry.baseTokenization = transition.final.tokenizationSourceMap.get(tokenization);
entry.prediction.sample.transform.deleteLeft = deleteLeft;
}
};
}

/**
Expand Down Expand Up @@ -549,7 +584,9 @@ export async function correctAndEnumerate(
*/
const costFactor = (tokenization.tail.inputCount <= 1) ? ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT : 1;

const predictions = buildAndMapPredictions(transition, tokenization, match, costFactor);
const predictionPrep = determineTokenizedCorrectionSequence(transition, tokenization, match, costFactor);
const predictions = predictFromCorrectionSequence(lexicalModel, predictionPrep.tokenizedCorrection, predictionPrep.rootContext);
predictions.forEach((p) => predictionPrep.applyInPost(p));

// Only set 'best correction' cost when a correction ACTUALLY YIELDS predictions.
if(predictions.length > 0 && bestCorrectionCost === undefined) {
Expand Down
Loading
Loading