diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts index 942773fa57a..0c492c76255 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts @@ -15,7 +15,6 @@ import { LegacyQuotientSpur } from "./legacy-quotient-spur.js"; import { LegacyQuotientRoot } from "./legacy-quotient-root.js"; import { generateSubsetId } from './tokenization-subsets.js'; -import Distribution = LexicalModelTypes.Distribution; import LexicalModel = LexicalModelTypes.LexicalModel; import Transform = LexicalModelTypes.Transform; @@ -119,14 +118,6 @@ export class ContextToken { return new ContextToken(searchModule, isPartial); } - /** - * Call this to record the original keystroke Transforms for the context range - * corresponding to this token. - */ - addInput(inputSource: PathInputProperties, distribution: Distribution) { - this._searchModule = new LegacyQuotientSpur(this._searchModule, distribution, inputSource); - } - get inputCount() { return this._searchModule.inputCount; } diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts index 6ccaba1ead7..fc2f81615c1 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts @@ -15,12 +15,13 @@ import TransformUtils from '../transformUtils.js'; import { computeDistance, EditOperation, EditTuple } from './classical-calculation.js'; import { determineModelTokenizer } from '../model-helpers.js'; import { ExtendedEditOperation, SegmentableDistanceCalculation } from './segmentable-calculation.js'; +import { LegacyQuotientRoot } from './legacy-quotient-root.js'; +import { LegacyQuotientSpur } from './legacy-quotient-spur.js'; import { PathInputProperties } from './search-quotient-node.js'; import { TransitionEdge } from './tokenization-subsets.js'; import LexicalModel = LexicalModelTypes.LexicalModel; import Transform = LexicalModelTypes.Transform; -import { LegacyQuotientRoot } from './legacy-quotient-root.js'; // May be able to "get away" with 2 & 5 or so, but having extra will likely help // with edit path stability. @@ -637,8 +638,10 @@ export class ContextTokenization { inputSource.segment.end = appliedLength; } - affectedToken = new ContextToken(affectedToken); - affectedToken.addInput(inputSource, distribution); + affectedToken = new ContextToken( + new LegacyQuotientSpur(affectedToken.searchModule, distribution, inputSource), + affectedToken.isPartial + ); const tokenize = determineModelTokenizer(lexicalModel); affectedToken.isWhitespace = tokenize({left: affectedToken.exampleInput, startOfBuffer: false, endOfBuffer: false}).left[0]?.isWhitespace ?? false; diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts index 6a308264461..4b8169264c5 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts @@ -15,7 +15,7 @@ import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs' import { LexicalModelTypes } from '@keymanapp/common-types'; import { KMWString } from '@keymanapp/web-utils'; -import { ContextToken, correction, generateSubsetId, getBestMatches, InputSegment, LegacyQuotientRoot, models, SearchQuotientSpur } from '@keymanapp/lm-worker/test-index'; +import { ContextToken, correction, generateSubsetId, getBestMatches, InputSegment, LegacyQuotientRoot, LegacyQuotientSpur, models, SearchQuotientSpur } from '@keymanapp/lm-worker/test-index'; import { quotientPathHasInputs } from "../../helpers/quotientPathHasInputs.js"; @@ -127,36 +127,45 @@ describe('ContextToken', function() { const srcTransform = { insert: "can't", deleteLeft: 0, deleteRight: 0, id: 1 }; const srcSubsetId = generateSubsetId(); - const token1 = new ContextToken(new LegacyQuotientRoot(plainModel)); - const token2 = new ContextToken(new LegacyQuotientRoot(plainModel)); - const token3 = new ContextToken(new LegacyQuotientRoot(plainModel)); + let token1 = new ContextToken(new LegacyQuotientRoot(plainModel)); + let token2 = new ContextToken(new LegacyQuotientRoot(plainModel)); + let token3 = new ContextToken(new LegacyQuotientRoot(plainModel)); - token1.addInput({ - segment: { - transitionId: srcTransform.id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetId - }, [{sample: {insert: 'can', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); - - token2.addInput({ - segment: { - transitionId: srcTransform.id, - start: 3 - }, - bestProbFromSet: 1, - subsetId: srcSubsetId - }, [{sample: {insert: "'", deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); - - token3.addInput({ - segment: { - transitionId: srcTransform.id, - start: 4 - }, - bestProbFromSet: 1, - subsetId: srcSubsetId - }, [{sample: {insert: 't', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); + token1 = new ContextToken(new LegacyQuotientSpur( + token1.searchModule, + [{sample: {insert: 'can', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}], { + segment: { + transitionId: srcTransform.id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetId + } + )); + + token2 = new ContextToken(new LegacyQuotientSpur( + token2.searchModule, + [{sample: {insert: "'", deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}], { + segment: { + transitionId: srcTransform.id, + start: 3 + }, + bestProbFromSet: 1, + subsetId: srcSubsetId + } + )); + + token3 = new ContextToken(new LegacyQuotientSpur( + token3.searchModule, + [{sample: {insert: 't', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}], { + segment: { + transitionId: srcTransform.id, + start: 4 + }, + bestProbFromSet: 1, + subsetId: srcSubsetId + } + )); const merged = ContextToken.merge([token1, token2, token3]); assert.equal(merged.exampleInput, "can't"); @@ -185,67 +194,85 @@ describe('ContextToken', function() { ]; // apples - const token1 = new ContextToken(new LegacyQuotientRoot(plainModel)); + let token1 = new ContextToken(new LegacyQuotientRoot(plainModel)); // and - const token2 = new ContextToken(new LegacyQuotientRoot(plainModel)); + let token2 = new ContextToken(new LegacyQuotientRoot(plainModel)); // sour - const token3 = new ContextToken(new LegacyQuotientRoot(plainModel)); + let token3 = new ContextToken(new LegacyQuotientRoot(plainModel)); // grapes - const token4 = new ContextToken(new LegacyQuotientRoot(plainModel)); - const tokensToMerge = [token1, token2, token3, token4] + let token4 = new ContextToken(new LegacyQuotientRoot(plainModel)); - token1.addInput({ - segment: { - transitionId: srcTransforms[0].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[0] - }, [{sample: srcTransforms[0], p: 1}]); - token1.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - - token2.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 1 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: "and", deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - - token3.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 4 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - token3.addInput({ - segment: { - transitionId: srcTransforms[2].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[2] - }, [{sample: srcTransforms[2], p: 1}]); + token1 = new ContextToken(new LegacyQuotientSpur( + token1.searchModule, + [{sample: srcTransforms[0], p: 1}], { + segment: { + transitionId: srcTransforms[0].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[0] + } + )); + token1 = new ContextToken(new LegacyQuotientSpur( + token1.searchModule, + [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); - token4.addInput({ - segment: { - transitionId: srcTransforms[3].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[3] - }, [{sample: srcTransforms[3], p: 1}]); + token2 = new ContextToken(new LegacyQuotientSpur( + token2.searchModule, + [{sample: {insert: "and", deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 1 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); + token3 = new ContextToken(new LegacyQuotientSpur( + token3.searchModule, + [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 4 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); + token3 = new ContextToken(new LegacyQuotientSpur( + token3.searchModule, + [{sample: srcTransforms[2], p: 1}], { + segment: { + transitionId: srcTransforms[2].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[2] + } + )); + + token4 = new ContextToken(new LegacyQuotientSpur( + token4.searchModule, + [{sample: srcTransforms[3], p: 1}], { + segment: { + transitionId: srcTransforms[3].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[3] + } + )); + + const tokensToMerge = [token1, token2, token3, token4]; const merged = ContextToken.merge(tokensToMerge); assert.equal(merged.exampleInput, "applesandsourgrapes"); assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({ @@ -274,68 +301,86 @@ describe('ContextToken', function() { generateSubsetId() ]; - // apples - const token1 = new ContextToken(new LegacyQuotientRoot(plainModel)); + // apples + let token1 = new ContextToken(new LegacyQuotientRoot(plainModel)); // and - const token2 = new ContextToken(new LegacyQuotientRoot(plainModel)); + let token2 = new ContextToken(new LegacyQuotientRoot(plainModel)); // sour - const token3 = new ContextToken(new LegacyQuotientRoot(plainModel)); + let token3 = new ContextToken(new LegacyQuotientRoot(plainModel)); // grapes - const token4 = new ContextToken(new LegacyQuotientRoot(plainModel)); - const tokensToMerge = [token1, token2, token3, token4] + let token4 = new ContextToken(new LegacyQuotientRoot(plainModel)); - token1.addInput({ - segment: { - transitionId: srcTransforms[0].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[0] - }, [{sample: srcTransforms[0], p: 1}]); - token1.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - - token2.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 1 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: toMathematicalSMP("and"), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - - token3.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 4 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - token3.addInput({ - segment: { - transitionId: srcTransforms[2].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[2] - }, [{sample: srcTransforms[2], p: 1}]); + token1 = new ContextToken(new LegacyQuotientSpur( + token1.searchModule, + [{sample: srcTransforms[0], p: 1}], { + segment: { + transitionId: srcTransforms[0].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[0] + } + )); + token1 = new ContextToken(new LegacyQuotientSpur( + token1.searchModule, + [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); - token4.addInput({ - segment: { - transitionId: srcTransforms[3].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[3] - }, [{sample: srcTransforms[3], p: 1}]); + token2 = new ContextToken(new LegacyQuotientSpur( + token2.searchModule, + [{sample: {insert: toMathematicalSMP("and"), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 1 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); + + token3 = new ContextToken(new LegacyQuotientSpur( + token3.searchModule, + [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 4 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); + token3 = new ContextToken(new LegacyQuotientSpur( + token3.searchModule, + [{sample: srcTransforms[2], p: 1}], { + segment: { + transitionId: srcTransforms[2].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[2] + } + )); + + token4 = new ContextToken(new LegacyQuotientSpur( + token4.searchModule, + [{sample: srcTransforms[3], p: 1}], { + segment: { + transitionId: srcTransforms[3].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[3] + } + )); + const tokensToMerge = [token1, token2, token3, token4]; const merged = ContextToken.merge(tokensToMerge); assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes")); assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({ @@ -371,15 +416,18 @@ describe('ContextToken', function() { ] ] - const tokenToSplit = new ContextToken(new LegacyQuotientRoot(plainModel)); + let tokenToSplit = new ContextToken(new LegacyQuotientRoot(plainModel)); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({ - segment: { - transitionId: keystrokeDistributions[i][0].sample.id, - start: 0 - }, bestProbFromSet: .75, - subsetId: generateSubsetId() - }, keystrokeDistributions[i]); + tokenToSplit = new ContextToken(new LegacyQuotientSpur( + tokenToSplit.searchModule, + keystrokeDistributions[i], { + segment: { + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, bestProbFromSet: .75, + subsetId: generateSubsetId() + } + )); }; assert.equal(tokenToSplit.sourceRangeKey, 'T11+T12+T13+T14'); @@ -414,16 +462,19 @@ describe('ContextToken', function() { const splitTextArray = ['big', 'large', 'transform']; const subsetId = generateSubsetId(); - const tokenToSplit = new ContextToken(new LegacyQuotientRoot(plainModel)); + let tokenToSplit = new ContextToken(new LegacyQuotientRoot(plainModel)); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({ - segment: { - transitionId: keystrokeDistributions[i][0].sample.id, - start: 0 - }, - bestProbFromSet: 1, - subsetId - }, keystrokeDistributions[i]); + tokenToSplit = new ContextToken(new LegacyQuotientSpur( + tokenToSplit.searchModule, + keystrokeDistributions[i], { + segment: { + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1, + subsetId + } + )); }; assert.equal(tokenToSplit.sourceRangeKey, `T${keystrokeDistributions[0][0].sample.id}`); @@ -485,16 +536,19 @@ describe('ContextToken', function() { generateSubsetId() ]; - const tokenToSplit = new ContextToken(new LegacyQuotientRoot(plainModel)); + let tokenToSplit = new ContextToken(new LegacyQuotientRoot(plainModel)); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({ - segment: { - transitionId: keystrokeDistributions[i][0].sample.id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: subsetIds[i] - }, keystrokeDistributions[i]); + tokenToSplit = new ContextToken(new LegacyQuotientSpur( + tokenToSplit.searchModule, + keystrokeDistributions[i], { + segment: { + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: subsetIds[i] + } + )); }; assert.equal(tokenToSplit.exampleInput, 'largelongtransforms'); @@ -612,16 +666,19 @@ describe('ContextToken', function() { generateSubsetId() ]; - const tokenToSplit = new ContextToken(new LegacyQuotientRoot(plainModel)); + let tokenToSplit = new ContextToken(new LegacyQuotientRoot(plainModel)); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({ - segment: { - transitionId: keystrokeDistributions[i][0].sample.id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: subsetIds[i] - }, keystrokeDistributions[i]); + tokenToSplit = new ContextToken(new LegacyQuotientSpur( + tokenToSplit.searchModule, + keystrokeDistributions[i], { + segment: { + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: subsetIds[i] + } + )); }; assert.equal(tokenToSplit.exampleInput, toMathematicalSMP('largelongtransforms')); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts index 60e87317552..ebb33a66b59 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts @@ -28,7 +28,8 @@ import { models, TransitionEdge, SearchQuotientSpur, - traceInsertEdits + traceInsertEdits, + LegacyQuotientSpur } from '@keymanapp/lm-worker/test-index'; import Transform = LexicalModelTypes.Transform; @@ -50,13 +51,15 @@ function toTransformToken(text: string, transformId?: number) { let isWhitespace = text == ' '; let token = ContextToken.fromRawText(plainModel, ''); const textAsTransform = { insert: text, deleteLeft: 0, id: idSeed }; - token.addInput({ + token = new ContextToken(new LegacyQuotientSpur( + token.searchModule, + [ { sample: textAsTransform, p: 1 } ], { segment: { transitionId: textAsTransform.id, start: 0 }, bestProbFromSet: 1, subsetId: generateSubsetId() - }, [ { sample: textAsTransform, p: 1 } ]); + })); token.isWhitespace = isWhitespace; return token; } diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts index 48304cc667c..70bdf2e687a 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts @@ -20,7 +20,7 @@ import { buildEdgeWindow, ContextToken, ContextTokenization, - generateSubsetId, + LegacyQuotientSpur, models, precomputationSubsetKeyer, TokenizationTransitionEdits, @@ -181,16 +181,11 @@ describe('precomputationSubsetKeyer', function() { [...tokenization.tokens, (() => { const token = ContextToken.fromRawText(plainModel, 'da'); // source text: 'date' - token.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [ + const dist = [ {sample: {insert: 'te', deleteLeft: 0, id: 13}, p: 1} - ]); - return token; + ]; + const space = new LegacyQuotientSpur(token.searchModule, dist, dist[0]); + return new ContextToken(space); })()], { insert: 's', deleteLeft: 0, deleteRight: 0 }, false @@ -213,16 +208,11 @@ describe('precomputationSubsetKeyer', function() { [...tokenization.tokens, (() => { const token = ContextToken.fromRawText(plainModel, 'da'); // source text: 'date' - token.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [ - {sample: {insert: 't', deleteLeft: 0}, p: 1} - ]); - return token; + const dist = [ + {sample: {insert: 't', deleteLeft: 0, id: 13}, p: 1} + ]; + const space = new LegacyQuotientSpur(token.searchModule, dist, dist[0]); + return new ContextToken(space); })()], { insert: 'es', deleteLeft: 0, deleteRight: 0, id: 14 }, false @@ -257,17 +247,15 @@ describe('precomputationSubsetKeyer', function() { ...buildEdgeWindow( [...tokenization.tokens, (() => { const token = ContextToken.fromRawText(plainModel, 'da'); - token.isPartial = true; // source text: 'dat' - token.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [{sample: {insert: 'ts', deleteLeft: 0, id: 13}, p: 1} - ]); - return token; + const dist = [ + {sample: {insert: 'ts', deleteLeft: 0, id: 13}, p: 1} + ]; + const space = new LegacyQuotientSpur(token.searchModule, dist, dist[0]); + let token2 = new ContextToken(space); + token2.isPartial = true; + + return token2; })()], { insert: 'e', deleteLeft: 1, deleteRight: 0, id: 14 }, false @@ -289,18 +277,15 @@ describe('precomputationSubsetKeyer', function() { ...buildEdgeWindow( [...tokenization.tokens, (() => { const token = ContextToken.fromRawText(plainModel, 'da'); - token.isPartial = true; // source text: 'dat' - token.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [ + const dist = [ {sample: {insert: 't', deleteLeft: 0, id: 13}, p: 1} - ]); - return token; + ]; + const space = new LegacyQuotientSpur(token.searchModule, dist, dist[0]); + let token2 = new ContextToken(space); + token2.isPartial = true; + + return token2; })()], { insert: 'e', deleteLeft: 0, deleteRight: 0, id: 14 }, false @@ -747,27 +732,25 @@ describe('TokenizationSubsetBuilder', function() { const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1, id: 13 }; - const fourCharTailToken = new ContextToken(baseTokenization.tail); - fourCharTailToken.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [ - { sample: trueSourceTransform, p: .6 } - ]); - - const fiveCharTailToken = new ContextToken(baseTokenization.tail); - fiveCharTailToken.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [ + let fourCharTailToken = new ContextToken(baseTokenization.tail); + let fourCharTailDist = [{sample: trueSourceTransform, p: .6}]; + let fourCharTailSpace = new LegacyQuotientSpur( + fourCharTailToken.searchModule, + fourCharTailDist, + fourCharTailDist[0] + ); + fourCharTailToken = new ContextToken(fourCharTailSpace); + + let fiveCharTailToken = new ContextToken(baseTokenization.tail); + let fiveCharTailDist = [ { sample: { insert: 's', deleteLeft: 0, id: 13 }, p: .4 } - ]); + ]; + let fiveCharTailSpace = new LegacyQuotientSpur( + fiveCharTailToken.searchModule, + fiveCharTailDist, + fiveCharTailDist[0] + ); + fiveCharTailToken = new ContextToken(fiveCharTailSpace); const subsetBuilder = new TokenizationSubsetBuilder(); const fourCharTokenization = new ContextTokenization([...baseTokenization.tokens.slice(0, -1), fourCharTailToken]); @@ -796,27 +779,25 @@ describe('TokenizationSubsetBuilder', function() { const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1, id: 13 }; - const twoCharTailToken = new ContextToken(baseTokenization.tail); - twoCharTailToken.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: .6, - subsetId: generateSubsetId() - }, [ - { sample: trueSourceTransform, p: .6 } - ]); - - const threeCharTailToken = new ContextToken(baseTokenization.tail); - threeCharTailToken.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: .6, - subsetId: generateSubsetId() - }, [ - { sample: { insert: 'a', deleteLeft: 0, id: 13}, p: .4 } - ]); + let twoCharTailToken = new ContextToken(baseTokenization.tail); + let twoCharTailDist = [{sample: trueSourceTransform, p: .6}]; + let twoCharTailSpace = new LegacyQuotientSpur( + twoCharTailToken.searchModule, + twoCharTailDist, + twoCharTailDist[0] + ); + twoCharTailToken = new ContextToken(twoCharTailSpace); + + let threeCharTailToken = new ContextToken(baseTokenization.tail); + let threeCharTailDist = [ + { sample: { insert: 'a', deleteLeft: 0, id: 13 }, p: .4 } + ]; + let threeCharTailSpace = new LegacyQuotientSpur( + threeCharTailToken.searchModule, + threeCharTailDist, + threeCharTailDist[0] + ); + threeCharTailToken = new ContextToken(threeCharTailSpace); const subsetBuilder = new TokenizationSubsetBuilder(); const twoCharTokenization = new ContextTokenization([...baseTokenization.tokens.slice(0, -1), twoCharTailToken]);