diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts index 2476fcc350e..8753805d402 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts @@ -46,7 +46,13 @@ export class ContextState { /** * Denotes the possible tokenization(s) for the represented Context. */ - tokenization: ContextTokenization; + _tokenizations: ContextTokenization[]; + + /** + * Tracks the tokenization pattern best matching the word boundary + * patterns in the actual context. + */ + _displayTokenization: ContextTokenization; /** * Denotes the keystroke-sourced Transform that was last applied to a @@ -97,7 +103,14 @@ export class ContextState { * is visible to the user. */ get displayTokenization(): ContextTokenization { - return this.tokenization; + return this._displayTokenization; + } + + /** + * Denotes the possible tokenization(s) for the represented Context. + */ + get tokenizations(): ContextTokenization[] { + return this._tokenizations; } /** @@ -127,13 +140,16 @@ export class ContextState { * @param tokenization Precomputed tokenization for the context, leveraging previous * correction-search progress and results */ - constructor(context: Context, model: LexicalModel, tokenization?: ContextTokenization); - constructor(param1: Context | ContextState, model?: LexicalModel, tokenization?: ContextTokenization) { + constructor(context: Context, model: LexicalModel, tokenization?: ContextTokenization, tokenizations?: ContextTokenization[]); + constructor(param1: Context | ContextState, model?: LexicalModel, tokenization?: ContextTokenization, tokenizations?: ContextTokenization[]) { if(!(param1 instanceof ContextState)) { this.context = param1; this.model = model; if(tokenization) { - this.tokenization = tokenization; + this._tokenizations = tokenizations ? tokenizations : [tokenization]; + this._displayTokenization = tokenization; + + this.inputTransforms = new Map(); } else { this.initFromReset(); } @@ -142,7 +158,8 @@ export class ContextState { Object.assign(this, stateToClone); this.inputTransforms = new Map(stateToClone.inputTransforms); - this.tokenization = new ContextTokenization(stateToClone.tokenization); + this._displayTokenization = new ContextTokenization(stateToClone._displayTokenization); + this._tokenizations = stateToClone.tokenizations.map((t) => new ContextTokenization(t)); // A shallow copy of the array is fine, but we'd be best off // not aliasing the array itself. @@ -173,7 +190,8 @@ export class ContextState { if(baseTokens.length == 0) { baseTokens.push(ContextToken.fromRawText(this.model, '')); } - this.tokenization = new ContextTokenization(baseTokens); + this._displayTokenization = new ContextTokenization(baseTokens); + this._tokenizations = [this._displayTokenization]; this.inputTransforms = new Map(); } @@ -209,8 +227,7 @@ export class ContextState { const slideUpdateTransform = determineContextSlideTransform(this.context, context); - // Goal: allow multiple base tokenizations. - const startTokenizations = [this.tokenization].map((t) => { + const startTokenizations = this.tokenizations.map((t) => { return t.applyContextSlide(lexicalModel, slideUpdateTransform); }); @@ -222,14 +239,15 @@ export class ContextState { // If the tokenizations match, clone the ContextState; we want to preserve a post-application // context separately from pre-application contexts for predictions based on empty roots. const state = new ContextState(this); - state.tokenization = [...startTokenizations.values()][0]; + state._tokenizations = [...startTokenizations.values()]; + state._displayTokenization = this._displayTokenization.applyContextSlide(lexicalModel, slideUpdateTransform); transition.finalize(state, transformDistribution); return transition; } const { subsets, keyMatchingUserContext: trueInputSubsetKey } = precomputeTransitions(startTokenizations, transformDistribution); - const resultTokenization = transitionTokenizations(subsets, transformDistribution).get(trueInputSubsetKey); - + const possibleTokenizations = transitionTokenizations(subsets, transformDistribution); + const resultTokenization = possibleTokenizations.get(trueInputSubsetKey); // ------------ // So, if we have a suggestion transition ID at the end and didn't just apply... @@ -240,10 +258,11 @@ export class ContextState { // 'any'.) const state = new ContextState(applyTransform(trueInput, context), lexicalModel); - state.tokenization = resultTokenization; + state._tokenizations = [resultTokenization]; // TODO: [...possibleTokenizations.values()]; + state._displayTokenization = resultTokenization; state.appliedInput = transformDistribution?.[0].sample; transition.finalize(state, transformDistribution); - transition.revertableTransitionId = state.tokenization.tail.appliedTransitionId; + transition.revertableTransitionId = state._displayTokenization.tail.appliedTransitionId; return transition; } } diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts index a75245908e8..1ffb277fc30 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts @@ -179,9 +179,13 @@ export class ContextTransition { // body and after any appended whitespace. resultingTokenization.tail.appliedTransitionId = suggestion.transformId; - const resultingState = new ContextState(applyTransform(transformToApply, baseState.context), lexicalModel); - resultingState.tokenization = resultingTokenization; // [resultingTokenization].concat(preservedVariations); - resultingState.appliedInput = baseState.appliedInput; + const resultingState = new ContextState( + applyTransform(transformToApply, baseState.context), + lexicalModel, + resultingTokenization, + [resultingTokenization] // [resultingTokenization].concat(preservedVariations); + ); + resultingState.appliedInput = transformToApply; resultingState.appliedSuggestionId = suggestion.id; resultingState.suggestions = this.final.suggestions; diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts index ede4aa36363..891dd923cc8 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts @@ -301,8 +301,7 @@ export function determineContextTransition( if(inputIsEmpty) { // Directly build a simple empty transition that duplicates the last seen state. // This should also clear the preservation transform if it exists! - const tokenization = new ContextTokenization(contextTracker.latest.final.tokenization.tokens); - const priorState = new ContextState(context, transition.final.model, tokenization); + const priorState = new ContextState(contextTracker.latest.final); transition = new ContextTransition(priorState, inputTransform.id); transition.finalize(priorState, transformDistribution); } else if( @@ -569,7 +568,7 @@ export async function correctAndEnumerate( // Ideally, the answer (in the future) will be no, but leaving it in right now may pose an issue. // The 'eventual' logic will be significantly more complex, though still manageable. - const tokenizations = [transition.final.tokenization]; + const tokenizations = transition.final.tokenizations; const searchModules = tokenizations.map(t => t.tail.searchModule); // Only run the correction search when corrections are enabled. diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts index 5e579004777..677b541181a 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts @@ -36,7 +36,7 @@ describe('ContextState', () => { assert.equal(state.context, context); assert.equal(state.model, plainModel); - assert.isOk(state.tokenization); + assert.isOk(state.displayTokenization); assert.isUndefined(state.isManuallyApplied); assert.isNotOk(state.suggestions); assert.isNotOk(state.appliedSuggestionId); @@ -46,36 +46,36 @@ describe('ContextState', () => { it('creates one empty token for an empty context', () => { let context = { left: '', right: '', startOfBuffer: true, endOfBuffer: true }; let state = new ContextState(context, plainModel); - assert.isOk(state.tokenization); - assert.equal(state.tokenization.tokens.length, 1); - assert.equal(state.tokenization.tail.exampleInput, ''); + assert.isOk(state.displayTokenization); + assert.equal(state.displayTokenization.tokens.length, 1); + assert.equal(state.displayTokenization.tail.exampleInput, ''); }); it('creates tokens for initial text (without ending whitespace)', () => { let context = { left: 'the quick brown fox', right: '', startOfBuffer: true, endOfBuffer: true }; let state = new ContextState(context, plainModel); - assert.isOk(state.tokenization); - assert.equal(state.tokenization.tokens.length, 7); - assert.deepEqual(state.tokenization.exampleInput, ['the', ' ', 'quick', ' ', 'brown', ' ', 'fox']); + assert.isOk(state.displayTokenization); + assert.equal(state.displayTokenization.tokens.length, 7); + assert.deepEqual(state.displayTokenization.exampleInput, ['the', ' ', 'quick', ' ', 'brown', ' ', 'fox']); let context2 = { left: "an apple a day keeps the doctor", startOfBuffer: true, endOfBuffer: true }; let rawTokens = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor"]; let state2 = new ContextState(context2, plainModel); - assert.deepEqual(state2.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(state2.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); }); it('creates tokens for initial text (with extra empty token for ending whitespace)', () => { let context = { left: 'the quick brown fox ', right: '', startOfBuffer: true, endOfBuffer: true }; let state = new ContextState(context, plainModel); - assert.isOk(state.tokenization); - assert.equal(state.tokenization.tokens.length, 9); - assert.deepEqual(state.tokenization.exampleInput, ['the', ' ', 'quick', ' ', 'brown', ' ', 'fox', ' ', '']); + assert.isOk(state.displayTokenization); + assert.equal(state.displayTokenization.tokens.length, 9); + assert.deepEqual(state.displayTokenization.exampleInput, ['the', ' ', 'quick', ' ', 'brown', ' ', 'fox', ' ', '']); let context2 = { left: "an apple a day keeps the doctor ", startOfBuffer: true, endOfBuffer: true }; let rawTokens = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor", " ", ""]; let state2 = new ContextState(context2, plainModel); - assert.deepEqual(state2.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(state2.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); }); }); @@ -97,7 +97,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); // // Phrased this way to facilitate TS type-inference; assert.isTrue() does // // NOT do this for us! @@ -124,7 +124,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); // // Phrased this way to facilitate TS type-inference; assert.isTrue() does // // NOT do this for us! @@ -151,7 +151,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); @@ -176,7 +176,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); @@ -201,7 +201,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); @@ -223,7 +223,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); @@ -246,18 +246,18 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); // We want to preserve the added whitespace when predicting a token that follows after it. - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); + assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); // The 'wordbreak' transform let state = newContextMatch?.final; // space transform - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1); + assert.equal(state.displayTokenization.tokens[state.displayTokenization.tokens.length - 2].searchModule.inputCount, 1); // empty transform - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1); - assert.isTrue(state.tokenization.tail.searchModule instanceof SearchQuotientSpur); - assert.deepEqual((state.tokenization.tail.searchModule as SearchQuotientSpur).lastInput, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]); + assert.equal(state.displayTokenization.tokens[state.displayTokenization.tokens.length - 1].searchModule.inputCount, 1); + assert.isTrue(state.displayTokenization.tail.searchModule instanceof SearchQuotientSpur); + assert.deepEqual((state.displayTokenization.tail.searchModule as SearchQuotientSpur).lastInput, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]); }); it("properly matches and aligns when whitespace before final empty token is extended", function() { @@ -273,19 +273,19 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); // We want to preserve the added whitespace when predicting a token that follows after it. - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); + assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); // The 'wordbreak' transform let state = newContextMatch?.final; // Two whitespaces, one of which is new! - const preTail = state.tokenization.tokens[state.tokenization.tokens.length - 2]; + const preTail = state.displayTokenization.tokens[state.displayTokenization.tokens.length - 2]; assert.equal(preTail.searchModule.inputCount, 2); assert.deepEqual((preTail.searchModule.parents[0] as SearchQuotientSpur).lastInput, [{sample: transform, p: 1}]); - assert.equal(state.tokenization.tail.searchModule.inputCount, 1); - assert.isTrue(state.tokenization.tail.searchModule instanceof SearchQuotientSpur); - assert.deepEqual((state.tokenization.tail.searchModule as SearchQuotientSpur).lastInput, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]); + assert.equal(state.displayTokenization.tail.searchModule.inputCount, 1); + assert.isTrue(state.displayTokenization.tail.searchModule instanceof SearchQuotientSpur); + assert.deepEqual((state.displayTokenization.tail.searchModule as SearchQuotientSpur).lastInput, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]); }); it("properly matches and aligns when a 'wordbreak' is removed via backspace", function() { @@ -301,7 +301,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform)); assert.isOk(newContextMatch?.final); - assert.deepEqual(newContextMatch?.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch?.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); }); it("properly matches and aligns when an implied 'wordbreak' occurs (as when following \"'\")", function() { @@ -317,13 +317,13 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: '', deleteLeft: 0 }); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: '', deleteLeft: 0 }); // The 'wordbreak' transform let state = newContextMatch.final; - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1); - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1); + assert.equal(state.displayTokenization.tokens[state.displayTokenization.tokens.length - 2].searchModule.inputCount, 1); + assert.equal(state.displayTokenization.tokens[state.displayTokenization.tokens.length - 1].searchModule.inputCount, 1); }) // Needs improved context-state management (due to 2x tokens) @@ -343,15 +343,15 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); // We want to preserve the added whitespace when predicting a token that follows after it. - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); + assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); // The 'wordbreak' transform let state = newContextMatch.final; - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1); + assert.equal(state.displayTokenization.tokens[state.displayTokenization.tokens.length - 2].searchModule.inputCount, 1); assert.equal( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1 + state.displayTokenization.tokens[state.displayTokenization.tokens.length - 1].searchModule.inputCount, 1 ); // if(!newContextMatch.final.tokenization.alignment.canAlign) { @@ -374,15 +374,15 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, [{sample: transform, p: 1}]); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); // We want to preserve all text preceding the new token when applying a suggestion. - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: 'd ', deleteLeft: 0}); + assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: 'd ', deleteLeft: 0}); // The 'wordbreak' transform let state = newContextMatch.final; - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1); + assert.equal(state.displayTokenization.tokens[state.displayTokenization.tokens.length - 2].searchModule.inputCount, 1); assert.equal( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1 + state.displayTokenization.tokens[state.displayTokenization.tokens.length - 1].searchModule.inputCount, 1 ); }); @@ -399,14 +399,14 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, [{sample: transform, p: 1}]); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens); // We want to preserve all text preceding the new token when applying a suggestion. - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: 'tor ', deleteLeft: 0 }); + assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: 'tor ', deleteLeft: 0 }); // The 'wordbreak' transform let state = newContextMatch.final; - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1); - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1); + assert.equal(state.displayTokenization.tokens[state.displayTokenization.tokens.length - 2].searchModule.inputCount, 1); + assert.equal(state.displayTokenization.tokens[state.displayTokenization.tokens.length - 1].searchModule.inputCount, 1); }); it('handles case where tail token is split into three rather than two', function() { @@ -432,7 +432,7 @@ describe('ContextState', () => { let problemContextMatch = baseState.analyzeTransition({left: "text'", startOfBuffer: true, endOfBuffer: true}, [{sample: transform, p: 1}]); assert.isNotNull(problemContextMatch); - assert.deepEqual(problemContextMatch.final.tokenization.exampleInput, ['text', '\'', '"']); + assert.deepEqual(problemContextMatch.final.displayTokenization.exampleInput, ['text', '\'', '"']); }); }); }); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tracker.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tracker.tests.ts index f27ebd72f7d..989e950b24b 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tracker.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tracker.tests.ts @@ -76,10 +76,10 @@ describe('ContextTracker', function() { assert.equal(postContextMatch.final.appliedSuggestionId, baseSuggestion.id); // Penultimate token corresponds to whitespace, which does not have a 'raw' representation. - assert.equal(postContextMatch.final.tokenization.tokens[postContextMatch.final.tokenization.tokens.length - 2].exampleInput, ' '); + assert.equal(postContextMatch.final.displayTokenization.tokens[postContextMatch.final.displayTokenization.tokens.length - 2].exampleInput, ' '); // Final token is empty (follows a wordbreak) - assert.equal(postContextMatch.final.tokenization.tail.exampleInput, ''); + assert.equal(postContextMatch.final.displayTokenization.tail.exampleInput, ''); }); }); }); \ No newline at end of file diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-transition.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-transition.tests.ts index 263a24aa471..d3cfd914469 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-transition.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-transition.tests.ts @@ -25,9 +25,9 @@ var plainModel = new TrieModel(jsonFixture('models/tries/english-1000'), function assertClonedStateMatch(a: ContextState, b: ContextState) { assert.notEqual(a, b); - assert.notEqual(a.tokenization, b.tokenization); - assert.notSameOrderedMembers(a.tokenization.tokens, b.tokenization.tokens); - assert.sameOrderedMembers(a.tokenization.exampleInput, b.tokenization.exampleInput); + assert.notEqual(a.displayTokenization, b.displayTokenization); + assert.notSameOrderedMembers(a.displayTokenization.tokens, b.displayTokenization.tokens); + assert.sameOrderedMembers(a.displayTokenization.exampleInput, b.displayTokenization.exampleInput); assert.deepEqual(a.suggestions, b.suggestions); } @@ -50,7 +50,7 @@ describe('ContextTransition', () => { const transition = new ContextTransition(baseState, 1); assert.sameOrderedMembers( - transition.base.tokenization.tokens.map((t) => t.exampleInput), + transition.base.displayTokenization.tokens.map((t) => t.exampleInput), ['hello', ' ', 'world', ' ', ''] ); assert.equal(transition.transitionId, 1); @@ -67,7 +67,7 @@ describe('ContextTransition', () => { const transition = new ContextTransition(baseState, 1); assert.sameOrderedMembers( - transition.base.tokenization.tokens.map((t) => t.exampleInput), + transition.base.displayTokenization.tokens.map((t) => t.exampleInput), ['hello', ' ', 'world', ' ', ''] ); @@ -143,17 +143,17 @@ describe('ContextTransition', () => { assert.notEqual(appliedTransition.base, transition); assert.isOk(appliedTransition.appended); assert.notEqual(appliedTransition.appended, transition); - assert.sameOrderedMembers(appliedTransition.base.final.tokenization.exampleInput, [ + assert.sameOrderedMembers(appliedTransition.base.final.displayTokenization.exampleInput, [ 'hello', ' ', 'world' ]); - assert.sameOrderedMembers(appliedTransition.appended.final.tokenization.exampleInput, [ + assert.sameOrderedMembers(appliedTransition.appended.final.displayTokenization.exampleInput, [ 'hello', ' ', 'world', ' ', '' ]); assert.equal(appliedTransition.base.final.appliedSuggestionId, suggestions[0].id); assert.equal(appliedTransition.appended.final.appliedSuggestionId, suggestions[0].id); // 3 long, only last token was edited. - appliedTransition.base.final.tokenization.tokens.forEach((token, index) => { + appliedTransition.base.final.displayTokenization.tokens.forEach((token, index) => { if(index >= 2) { assert.equal(token.appliedTransitionId, suggestions[0].transformId); } else { @@ -161,7 +161,7 @@ describe('ContextTransition', () => { } }); - appliedTransition.appended.final.tokenization.tokens.forEach((token, index) => { + appliedTransition.appended.final.displayTokenization.tokens.forEach((token, index) => { if(index >= 2) { assert.equal(token.appliedTransitionId, suggestions[0].transformId); } else { @@ -227,17 +227,17 @@ describe('ContextTransition', () => { assert.notEqual(appliedTransition.base, transition); assert.isOk(appliedTransition.appended); assert.notEqual(appliedTransition.appended, transition); - assert.sameOrderedMembers(appliedTransition.base.final.tokenization.exampleInput, [ + assert.sameOrderedMembers(appliedTransition.base.final.displayTokenization.exampleInput, [ 'hello', ' ', 'world', ' ', 'the' ]); - assert.sameOrderedMembers(appliedTransition.appended.final.tokenization.exampleInput, [ + assert.sameOrderedMembers(appliedTransition.appended.final.displayTokenization.exampleInput, [ 'hello', ' ', 'world', ' ', 'the', ' ', '' ]); assert.equal(appliedTransition.base.final.appliedSuggestionId, suggestions[0].id); assert.equal(appliedTransition.appended.final.appliedSuggestionId, suggestions[0].id); // 3 long, only last token was edited. - appliedTransition.base.final.tokenization.tokens.forEach((token, index) => { + appliedTransition.base.final.displayTokenization.tokens.forEach((token, index) => { if(index >= 4) { assert.equal(token.appliedTransitionId, suggestions[0].transformId); } else { @@ -245,7 +245,7 @@ describe('ContextTransition', () => { } }); - appliedTransition.appended.final.tokenization.tokens.forEach((token, index) => { + appliedTransition.appended.final.displayTokenization.tokens.forEach((token, index) => { if(index >= 4) { assert.equal(token.appliedTransitionId, suggestions[0].transformId); } else { diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts index be81e711610..be2d1177571 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts @@ -48,7 +48,7 @@ describe('determineSuggestionAlignment', () => { transition.finalize(transition.base, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]); // transition, model - const results = determineSuggestionAlignment(transition, transition.final.tokenization, plainCasedModel); + const results = determineSuggestionAlignment(transition, transition.final.displayTokenization, plainCasedModel); assert.deepEqual(results.predictionContext, context); assert.equal(results.deleteLeft, "techn".length); @@ -65,7 +65,7 @@ describe('determineSuggestionAlignment', () => { const transition = baseState.analyzeTransition(context, [{sample: { insert: '', deleteLeft: 1 }, p: 1}]) // transition, model - const results = determineSuggestionAlignment(transition, transition.final.tokenization, plainCasedModel); + const results = determineSuggestionAlignment(transition, transition.final.displayTokenization, plainCasedModel); assert.deepEqual(results.predictionContext, context); assert.equal(results.deleteLeft, "tech".length + 1 /* for the deleted whitespace */); @@ -82,7 +82,7 @@ describe('determineSuggestionAlignment', () => { const transition = baseState.analyzeTransition(context, [{sample: { insert: 'n', deleteLeft: 1 }, p: 1}]) // transition, model - const results = determineSuggestionAlignment(transition, transition.final.tokenization, plainCasedModel); + const results = determineSuggestionAlignment(transition, transition.final.displayTokenization, plainCasedModel); assert.deepEqual(results.predictionContext, context); assert.equal(results.deleteLeft, "techn".length + 1 /* for the deleted whitespace */); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts index 218f18313ad..d232d90b1a7 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts @@ -103,12 +103,12 @@ describe('determineContextTransition', () => { assert.isOk(transition); assert.equal(transition, tracker.latest); assert.isFalse(warningEmitterSpy.called); - assert.sameOrderedMembers(transition.final.tokenization.exampleInput, ['this', ' ', 'is', ' ', 'for', ' ', 'techn']); - assert.isOk(transition.final.tokenization.transitionEdits); + assert.sameOrderedMembers(transition.final.displayTokenization.exampleInput, ['this', ' ', 'is', ' ', 'for', ' ', 'techn']); + assert.isOk(transition.final.displayTokenization.transitionEdits); assert.equal(transition.final.context.left, targetContext.left); assert.equal(transition.final.context.right ?? "", targetContext.right ?? ""); assert.sameDeepOrderedMembers(transition.inputDistribution, inputDistribution); - assert.isNotOk(transition.final.tokenization.taillessTrueKeystroke); + assert.isNotOk(transition.final.displayTokenization.taillessTrueKeystroke); assert.equal(transition.transitionId, 1); } finally { warningEmitterSpy.restore(); @@ -226,8 +226,8 @@ describe('determineContextTransition', () => { assert.notEqual(extendingTransition, baseTransition); // These values support delayed reversions. - assert.equal(extendingTransition.final.tokenization.tokens[6].appliedTransitionId, pred_testing.transformId); - assert.equal(extendingTransition.final.tokenization.tokens[7].appliedTransitionId, pred_testing.transformId); + assert.equal(extendingTransition.final.displayTokenization.tokens[6].appliedTransitionId, pred_testing.transformId); + assert.equal(extendingTransition.final.displayTokenization.tokens[7].appliedTransitionId, pred_testing.transformId); // We start a new token here, rather than continue (and/or replace) an old one; // this shouldn't be set here yet.