diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts index 2bbcc778b4b..5e37a7d5a72 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts @@ -15,6 +15,8 @@ import { PathResult } from './correction-searchable.js'; import { SearchNode } from './distance-modeler.js'; import { SearchQuotientNode, PathInputProperties } from './search-quotient-node.js'; import { SearchQuotientSpur } from './search-quotient-spur.js'; +import { SearchQuotientRoot } from './search-quotient-root.js'; +import { LegacyQuotientRoot } from './legacy-quotient-root.js'; import { TokenResultMapping } from './token-result-mapping.js'; import Distribution = LexicalModelTypes.Distribution; @@ -51,6 +53,10 @@ export class LegacyQuotientSpur extends SearchQuotientSpur { return new LegacyQuotientSpur(parentNode, inputs, inputSource) as this; } + constructRoot(): SearchQuotientRoot { + return new LegacyQuotientRoot(this.model); + } + protected buildEdgesFromResults(priorResults: ReadonlyArray): SearchNode[] { // With a newly-available input, we can extend new input-dependent paths from // our previously-reached 'extractedResults' nodes. diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts index f9ddedfd5e6..e892a100f75 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts @@ -12,8 +12,8 @@ import { PriorityQueue } from '@keymanapp/web-utils'; import { LexicalModelTypes } from '@keymanapp/common-types'; import { CORRECTION_QUEUE_COMPARATOR, PathResult } from './correction-searchable.js'; -import { LegacyQuotientRoot } from './legacy-quotient-root.js'; import { generateSpaceSeed, InputSegment, SearchQuotientNode } from './search-quotient-node.js'; +import { SearchQuotientRoot } from './search-quotient-root.js'; import { SearchQuotientSpur } from './search-quotient-spur.js'; import { TokenResultMapping } from './token-result-mapping.js'; @@ -70,6 +70,10 @@ export class SearchQuotientCluster extends SearchQuotientNode { throw new Error(`SearchQuotientNode does not share the same source identifiers as others in the cluster`); } + if(path instanceof SearchQuotientRoot) { + throw new Error(`SearchQuotientRoot instances may not be part of clusters`); + } + lowestPossibleSingleCost = Math.min(lowestPossibleSingleCost, path.lowestPossibleSingleCost); } @@ -217,7 +221,9 @@ export class SearchQuotientCluster extends SearchQuotientNode { split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][] { // Don't rebuild if this is already a perfect split point! if(this.codepointLength <= charIndex) { - return [[this, new LegacyQuotientRoot(this.model)]]; + // We'll assume that the search path is either using legacy nodes or is not; + // we shouldn't see mixed-use cases. + return [[this, (this.parents[0] as SearchQuotientSpur).constructRoot()]]; } const results = this.parents.flatMap((p) => p.split(charIndex)); diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index a7dff8776a2..967ffc23e7b 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -17,7 +17,6 @@ import { EDIT_DISTANCE_COST_SCALE, SearchNode } from './distance-modeler.js'; import { generateSpaceSeed, InputSegment, PathInputProperties, SearchQuotientNode } from './search-quotient-node.js'; import { generateSubsetId } from './tokenization-subsets.js'; import { SearchQuotientRoot } from './search-quotient-root.js'; -import { LegacyQuotientRoot } from './legacy-quotient-root.js'; import { TokenResultMapping } from './token-result-mapping.js'; import Distribution = LexicalModelTypes.Distribution; @@ -45,7 +44,7 @@ export abstract class SearchQuotientSpur extends SearchQuotientNode { readonly inputs?: Distribution; readonly inputSource?: PathInputProperties; - private parentNode: SearchQuotientNode; + protected readonly parentNode: SearchQuotientNode; readonly spaceId: number; readonly inputCount: number; @@ -177,6 +176,11 @@ export abstract class SearchQuotientSpur extends SearchQuotientNode { inputSource: PathInputProperties ): this; + // TODO: Remove once LegacyQuotientRoot + LegacyQuotientSpur are removed! + constructRoot(): SearchQuotientRoot { + return new SearchQuotientRoot(this.model); + } + // spaces are in sequence here. // `this` = head 'space'. public merge(space: SearchQuotientNode): SearchQuotientNode { @@ -267,7 +271,7 @@ export abstract class SearchQuotientSpur extends SearchQuotientNode { // // stopgap: maybe go ahead and check each input for any that are longer? // won't matter shortly, though. - return [[this, new LegacyQuotientRoot(this.model)]]; + return [[this, this.constructRoot()]]; } else { const firstSet: Distribution = this.inputs.map((input) => ({ // keep insert head @@ -304,7 +308,7 @@ export abstract class SearchQuotientSpur extends SearchQuotientNode { // construct two SearchPath instances based on the two sets! return [[ parent, - this.construct(new LegacyQuotientRoot(this.model), secondSet, { + this.construct(this.constructRoot(), secondSet, { ...this.inputSource, segment: { ...this.inputSource.segment, diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/substitution-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/substitution-quotient-spur.ts new file mode 100644 index 00000000000..1b34afcb507 --- /dev/null +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/substitution-quotient-spur.ts @@ -0,0 +1,48 @@ +/** + * Keyman is copyright (C) SIL Global. MIT License. + * + * Created by jahorton on 2026-02-03 + * + * This file adds a SearchQuotientSpur variant modeling match & substitute edit + * operations in regard to the corresponding keystroke. + */ + +import { LexicalModelTypes } from "@keymanapp/common-types"; +import { KMWString } from "@keymanapp/web-utils"; + +import { SearchNode } from "./distance-modeler.js"; +import { PathInputProperties, SearchQuotientNode } from "./search-quotient-node.js"; +import { SearchQuotientSpur } from "./search-quotient-spur.js"; +import { TokenResultMapping } from "./token-result-mapping.js"; + +import Distribution = LexicalModelTypes.Distribution; +import ProbabilityMass = LexicalModelTypes.ProbabilityMass; +import Transform = LexicalModelTypes.Transform; + +export class SubstitutionQuotientSpur extends SearchQuotientSpur { + public readonly insertLength: number; + public readonly leftDeleteLength: number; + + constructor( + parentNode: SearchQuotientNode, + inputs: Distribution>, + inputSource: PathInputProperties | ProbabilityMass + ) { + // Compute this SearchPath's codepoint length & edge length. + const inputSample = inputs?.[0].sample ?? { insert: '', deleteLeft: 0 }; + const insertLength = KMWString.length(inputSample.insert); + super(parentNode, inputs, inputSource, parentNode.codepointLength + insertLength - inputSample.deleteLeft); + + // Compute this SearchPath's codepoint length & edge length. + this.insertLength = insertLength; + this.leftDeleteLength = inputSample.deleteLeft; + } + + construct(parentNode: SearchQuotientNode, inputs: ProbabilityMass>[], inputSource: PathInputProperties): this { + return new SubstitutionQuotientSpur(parentNode, inputs, inputSource) as this; + } + + protected buildEdgesFromResults(baseResults: ReadonlyArray): SearchNode[] { + return baseResults.flatMap((n) => n.buildSubstitutionEdges(this.inputs, this.spaceId)); + } +} \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts b/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts index da9cef8adb4..2f28b93bda0 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts @@ -7,6 +7,7 @@ export { ContextTransition } from './correction/context-transition.js'; export * from './correction/correction-searchable.js'; export * from './correction/correction-result-mapping.js'; export * from './correction/distance-modeler.js'; +export * from './correction/substitution-quotient-spur.js'; export * from './correction/search-quotient-cluster.js'; export * from './correction/search-quotient-spur.js'; export * from './correction/search-quotient-node.js'; diff --git a/web/src/test/auto/headless/engine/predictive-text/helpers/buildAlphabeticClusteredFixture.ts b/web/src/test/auto/headless/engine/predictive-text/helpers/buildAlphabeticClusteredFixture.ts index 086193a387a..d45cffc7e4c 100644 --- a/web/src/test/auto/headless/engine/predictive-text/helpers/buildAlphabeticClusteredFixture.ts +++ b/web/src/test/auto/headless/engine/predictive-text/helpers/buildAlphabeticClusteredFixture.ts @@ -13,9 +13,9 @@ import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs' import { models, - LegacyQuotientSpur, SearchQuotientCluster, - LegacyQuotientRoot + SearchQuotientRoot, + SubstitutionQuotientSpur } from '@keymanapp/lm-worker/test-index'; import Distribution = LexicalModelTypes.Distribution; @@ -31,7 +31,7 @@ const testModel = new TrieModel(jsonFixture('models/tries/english-1000')); * @returns */ export const buildAlphabeticClusterFixtures = () => { - const rootPath = new LegacyQuotientRoot(testModel); + const rootPath = new SearchQuotientRoot(testModel); // consonant-cluster 1, insert 1, delete 0 const distrib_c1_i1d0: Distribution = [ @@ -48,9 +48,9 @@ export const buildAlphabeticClusterFixtures = () => { ]; // keystrokes 1, codepoints 1, total inserts 1, delete 0 - const path_k1c1_i1d0 = new LegacyQuotientSpur(rootPath, distrib_c1_i1d0, distrib_c1_i1d0[0]); + const path_k1c1_i1d0 = new SubstitutionQuotientSpur(rootPath, distrib_c1_i1d0, distrib_c1_i1d0[0]); // keystrokes 1, codepoints 2, total inserts 2, delete 0 - const path_k1c2_i2d0 = new LegacyQuotientSpur(rootPath, distrib_c1_i2d0, distrib_c1_i1d0[0]); + const path_k1c2_i2d0 = new SubstitutionQuotientSpur(rootPath, distrib_c1_i2d0, distrib_c1_i1d0[0]); // Second input @@ -62,8 +62,8 @@ export const buildAlphabeticClusterFixtures = () => { { sample: { insert: 'u', deleteLeft: 0, deleteRight: 0, id: 12 }, p: 0.1 }, ]; - const path_k2c2_i2d0 = new LegacyQuotientSpur(path_k1c1_i1d0, distrib_v1_i1d0, distrib_v1_i1d0[0]); - const path_k2c3_i3d0 = new LegacyQuotientSpur(path_k1c2_i2d0, distrib_v1_i1d0, distrib_v1_i1d0[0]); + const path_k2c2_i2d0 = new SubstitutionQuotientSpur(path_k1c1_i1d0, distrib_v1_i1d0, distrib_v1_i1d0[0]); + const path_k2c3_i3d0 = new SubstitutionQuotientSpur(path_k1c2_i2d0, distrib_v1_i1d0, distrib_v1_i1d0[0]); // Third input const distrib_v2_i1d0: Distribution = [ @@ -90,15 +90,15 @@ export const buildAlphabeticClusterFixtures = () => { { sample: { insert: 'รบรบ', deleteLeft: 1, deleteRight: 0, id: 13 }, p: 0.02 }, ]; // 0.2 total - const path_k3c2_i3d1 = new LegacyQuotientSpur(path_k2c2_i2d0, distrib_v2_i1d1, distrib_v2_i1d0[0]); + const path_k3c2_i3d1 = new SubstitutionQuotientSpur(path_k2c2_i2d0, distrib_v2_i1d1, distrib_v2_i1d0[0]); - const path_k3c3_i3d0 = new LegacyQuotientSpur(path_k2c2_i2d0, distrib_v2_i1d0, distrib_v2_i1d0[0]); - const path_k3c3_i4d1a = new LegacyQuotientSpur(path_k2c2_i2d0, distrib_v2_i2d1, distrib_v2_i1d0[0]); - const path_k3c3_i4d1b = new LegacyQuotientSpur(path_k2c3_i3d0, distrib_v2_i1d1, distrib_v2_i1d0[0]); + const path_k3c3_i3d0 = new SubstitutionQuotientSpur(path_k2c2_i2d0, distrib_v2_i1d0, distrib_v2_i1d0[0]); + const path_k3c3_i4d1a = new SubstitutionQuotientSpur(path_k2c2_i2d0, distrib_v2_i2d1, distrib_v2_i1d0[0]); + const path_k3c3_i4d1b = new SubstitutionQuotientSpur(path_k2c3_i3d0, distrib_v2_i1d1, distrib_v2_i1d0[0]); // both are built on path k1c2 (splits at index 1) - const path_k3c4_i4d0 = new LegacyQuotientSpur(path_k2c3_i3d0, distrib_v2_i1d0, distrib_v2_i1d0[0]); - const path_k3c4_i5d1 = new LegacyQuotientSpur(path_k2c3_i3d0, distrib_v2_i2d1, distrib_v2_i1d0[0]); + const path_k3c4_i4d0 = new SubstitutionQuotientSpur(path_k2c3_i3d0, distrib_v2_i1d0, distrib_v2_i1d0[0]); + const path_k3c4_i5d1 = new SubstitutionQuotientSpur(path_k2c3_i3d0, distrib_v2_i2d1, distrib_v2_i1d0[0]); const cluster_k3c3 = new SearchQuotientCluster([path_k3c3_i3d0, path_k3c3_i4d1a, path_k3c3_i4d1b]); // both are built on path k1c2. @@ -116,13 +116,13 @@ export const buildAlphabeticClusterFixtures = () => { { sample: { insert: 'vw', deleteLeft: 0, deleteRight: 0, id: 14 }, p: 0.1 } ]; - const path_k4c4_i2 = new LegacyQuotientSpur(path_k3c2_i3d1, distrib_c2_i2d0, distrib_c2_i2d0[0]); - const path_k4c4_i1 = new LegacyQuotientSpur(cluster_k3c3, distrib_c2_i1d0, distrib_c2_i2d0[0]); + const path_k4c4_i2 = new SubstitutionQuotientSpur(path_k3c2_i3d1, distrib_c2_i2d0, distrib_c2_i2d0[0]); + const path_k4c4_i1 = new SubstitutionQuotientSpur(cluster_k3c3, distrib_c2_i1d0, distrib_c2_i2d0[0]); - const path_k4c5_i2 = new LegacyQuotientSpur(cluster_k3c3, distrib_c2_i2d0, distrib_c2_i2d0[0]); - const path_k4c5_i1 = new LegacyQuotientSpur(cluster_k3c4, distrib_c2_i1d0, distrib_c2_i2d0[0]); + const path_k4c5_i2 = new SubstitutionQuotientSpur(cluster_k3c3, distrib_c2_i2d0, distrib_c2_i2d0[0]); + const path_k4c5_i1 = new SubstitutionQuotientSpur(cluster_k3c4, distrib_c2_i1d0, distrib_c2_i2d0[0]); - const path_k4c6 = new LegacyQuotientSpur(cluster_k3c4, distrib_c2_i2d0, distrib_c2_i2d0[0]); + const path_k4c6 = new SubstitutionQuotientSpur(cluster_k3c4, distrib_c2_i2d0, distrib_c2_i2d0[0]); const cluster_k4c4 = new SearchQuotientCluster([path_k4c4_i2, path_k4c4_i1]); const cluster_k4c5 = new SearchQuotientCluster([path_k4c5_i2, path_k4c5_i1]); @@ -135,8 +135,8 @@ export const buildAlphabeticClusterFixtures = () => { { sample: { insert: 'z', deleteLeft: 0, deleteRight: 0, id: 15 }, p: 0.4 } ]; - const path_k5c6_a = new LegacyQuotientSpur(cluster_k4c4, distrib_c3_i2d0, distrib_c3_i2d0[0]); - const path_k5c6_b = new LegacyQuotientSpur(cluster_k4c5, distrib_c3_i1d0, distrib_c3_i2d0[0]); + const path_k5c6_a = new SubstitutionQuotientSpur(cluster_k4c4, distrib_c3_i2d0, distrib_c3_i2d0[0]); + const path_k5c6_b = new SubstitutionQuotientSpur(cluster_k4c5, distrib_c3_i1d0, distrib_c3_i2d0[0]); const cluster_k5c6 = new SearchQuotientCluster([path_k5c6_a, path_k5c6_b]); diff --git a/web/src/test/auto/headless/engine/predictive-text/helpers/buildCantLinearFixture.ts b/web/src/test/auto/headless/engine/predictive-text/helpers/buildCantLinearFixture.ts index 3de7986562c..640c6c206e6 100644 --- a/web/src/test/auto/headless/engine/predictive-text/helpers/buildCantLinearFixture.ts +++ b/web/src/test/auto/headless/engine/predictive-text/helpers/buildCantLinearFixture.ts @@ -8,7 +8,7 @@ * divergence occurring within the fixture. */ -import { LegacyQuotientRoot, LegacyQuotientSpur, models } from "@keymanapp/lm-worker/test-index"; +import { SearchQuotientRoot, SubstitutionQuotientSpur, models } from "@keymanapp/lm-worker/test-index"; import { jsonFixture } from "@keymanapp/common-test-resources/model-helpers.mjs"; import TrieModel = models.TrieModel; @@ -19,31 +19,31 @@ const testModel = new TrieModel(jsonFixture('models/tries/english-1000')); * Build a linear fixture that models the word 'cant' and words close to that. */ export function buildCantLinearFixture() { - const rootPath = new LegacyQuotientRoot(testModel); + const rootPath = new SearchQuotientRoot(testModel); const distrib1 = [ { sample: {insert: 'c', deleteLeft: 0, id: 11}, p: 0.5 }, { sample: {insert: 'r', deleteLeft: 0, id: 11}, p: 0.4 }, { sample: {insert: 't', deleteLeft: 0, id: 11}, p: 0.1 } ]; - const path1 = new LegacyQuotientSpur(rootPath, distrib1, distrib1[0]); + const path1 = new SubstitutionQuotientSpur(rootPath, distrib1, distrib1[0]); const distrib2 = [ { sample: {insert: 'a', deleteLeft: 0, id: 12}, p: 0.7 }, { sample: {insert: 'e', deleteLeft: 0, id: 12}, p: 0.3 } ]; - const path2 = new LegacyQuotientSpur(path1, distrib2, distrib2[0]); + const path2 = new SubstitutionQuotientSpur(path1, distrib2, distrib2[0]); const distrib3 = [ { sample: {insert: 'n', deleteLeft: 0, id: 13}, p: 0.8 }, { sample: {insert: 'r', deleteLeft: 0, id: 13}, p: 0.2 } ]; - const path3 = new LegacyQuotientSpur(path2, distrib3, distrib3[0]); + const path3 = new SubstitutionQuotientSpur(path2, distrib3, distrib3[0]); const distrib4 = [ { sample: {insert: 't', deleteLeft: 0, id: 14}, p: 1 } ]; - const path4 = new LegacyQuotientSpur(path3, distrib4, distrib4[0]); + const path4 = new SubstitutionQuotientSpur(path3, distrib4, distrib4[0]); return { paths: [null, path1, path2, path3, path4], diff --git a/web/src/test/auto/headless/engine/predictive-text/helpers/buildQuotientDocFixture.tests.ts b/web/src/test/auto/headless/engine/predictive-text/helpers/buildQuotientDocFixture.tests.ts new file mode 100644 index 00000000000..6bf9f0dea7b --- /dev/null +++ b/web/src/test/auto/headless/engine/predictive-text/helpers/buildQuotientDocFixture.tests.ts @@ -0,0 +1,32 @@ +import { assert } from "chai"; + +import { buildQuotientDocFixture } from "./buildQuotientDocFixture.js"; + +describe('buildQuotientDocFixture() fixture', () => { + it('constructs paths properly', () => { + const {searchRoot, nodes} = buildQuotientDocFixture(); + + [searchRoot /*, nodes.sc1, nodes.sc2*/].forEach((n) => { + assert.equal(n.inputCount, 0); + }); + [/*nodes.k1c0,*/ nodes.k1c1, nodes.k1c2 /*, nodes.k1c3*/].forEach((n) => { + assert.equal(n.inputCount, 1); + }); + [/*nodes.k2c0, nodes.k2c1,*/ nodes.k2c2, nodes.k2c3].forEach((n) => { + assert.equal(n.inputCount, 2); + }); + + [searchRoot/*, nodes.k1c0, nodes.k2c0*/].forEach((n) => { + assert.equal(n.codepointLength, 0); + }); + [/*nodes.sc1, */nodes.k1c1/*, nodes.k2c1*/].forEach((n) => { + assert.equal(n.codepointLength, 1); + }); + [/*nodes.sc2,*/ nodes.k1c2, nodes.k2c2].forEach((n) => { + assert.equal(n.codepointLength, 2); + }); + [/*nodes.k1c3,*/ nodes.k2c3].forEach((n) => { + assert.equal(n.codepointLength, 3); + }); + }); +}); \ No newline at end of file diff --git a/web/src/test/auto/headless/engine/predictive-text/helpers/buildQuotientDocFixture.ts b/web/src/test/auto/headless/engine/predictive-text/helpers/buildQuotientDocFixture.ts new file mode 100644 index 00000000000..66ee12fe631 --- /dev/null +++ b/web/src/test/auto/headless/engine/predictive-text/helpers/buildQuotientDocFixture.ts @@ -0,0 +1,246 @@ +/* + * Keyman is copyright (C) SIL Global. MIT License. + * + * Created by jahorton on 2026-03-09 + * + * This file defines a unit-text fixture designed for testing + * the internal mechanisms of a search quotient graph built from + * quotient-spurs specialized for each of the three main edit-distance + * operation types. + */ + +import { LexicalModelTypes } from '@keymanapp/common-types'; + +import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; + +import { + generateSubsetId, + models, + SearchQuotientCluster, + SearchQuotientRoot, + SubstitutionQuotientSpur +} from '@keymanapp/lm-worker/test-index'; + +import Distribution = LexicalModelTypes.Distribution; +import Transform = LexicalModelTypes.Transform; +import TrieModel = models.TrieModel; + +const testModel = new TrieModel(jsonFixture('models/tries/english-1000')); + + +/** + * Builds a text fixture matching the [final quotient-graph example]( + * ../../../../../../../engine/predictive-text/worker-thread/docs/correction-search-graph.md) + * documenting the internal SearchQuotientNode design. + * @returns + */ +export function buildQuotientDocFixture() { + const searchRoot = new SearchQuotientRoot(testModel); + let idSeed = 0; + + const key1Id = idSeed++; + const abDistrib: Distribution = [ + { sample: { insert: 'a', deleteLeft: 0, id: key1Id }, p: .45 }, + { sample: { insert: 'b', deleteLeft: 0, id: key1Id }, p: .35 } + ]; + + const cdDistrib: Distribution = [ + { sample: { insert: 'cd', deleteLeft: 0, id: key1Id }, p: .2 } + ]; + + // const sc1 = new InsertionQuotientSpur(searchRoot); + // const sc2 = new InsertionQuotientSpur(sc1); + + // // K1C0 + // const k1c0 = new DeletionQuotientSpur(searchRoot, abDistrib.concat(cdDistrib), { + // segment: { + // transitionId: key1Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: abDistrib[0].p + // }); + + // // K1C1 + // const k1c1_del = new DeletionQuotientSpur(sc1, abDistrib.concat(cdDistrib), { + // segment: { + // transitionId: key1Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: abDistrib[0].p + // }); + const k1c1_ab = new SubstitutionQuotientSpur(searchRoot, abDistrib, { + segment: { + transitionId: key1Id, + start: 0 + }, + // Deletions always get their own unique subset ID. + subsetId: generateSubsetId(), + bestProbFromSet: abDistrib[0].p + }); + // const k1c1_ins = new InsertionQuotientSpur(k1c0); + const k1c1 = new SearchQuotientCluster([/*k1c1_del,*/ k1c1_ab, /*k1c1_ins*/]); + + // const k1c2_del = new DeletionQuotientSpur(sc2, abDistrib.concat(cdDistrib), { + // segment: { + // transitionId: key1Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: abDistrib[0].p + // }); + // const k1c2_ab = new SubstitutionQuotientSpur(sc1, abDistrib, { + // segment: { + // transitionId: key1Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: abDistrib[0].p + // }); + const k1c2_cd = new SubstitutionQuotientSpur(searchRoot, cdDistrib, { + segment: { + transitionId: key1Id, + start: 0 + }, + // Deletions always get their own unique subset ID. + subsetId: generateSubsetId(), + bestProbFromSet: abDistrib[0].p + }); + // const k1c2_ins = new InsertionQuotientSpur(k1c1); + const k1c2 = new SearchQuotientCluster([/*k1c2_del, k1c2_ab, */ k1c2_cd, /*k1c2_ins*/]); + + // const k1c3_ab = new SubstitutionQuotientSpur(sc2, abDistrib, { + // segment: { + // transitionId: key1Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: abDistrib[0].p + // }); + // const k1c3_cd = new SubstitutionQuotientSpur(sc1, cdDistrib, { + // segment: { + // transitionId: key1Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: abDistrib[0].p + // }); + // const k1c3_ins = new InsertionQuotientSpur(k1c2); + // const k1c3 = new SearchQuotientCluster([k1c3_ab, k1c3_cd, k1c3_ins]); + + // Onto keystroke 2. + + const key2Id = idSeed++; + const efDistrib: Distribution = [ + { sample: { insert: 'e', deleteLeft: 0, id: key2Id }, p: .4 }, + { sample: { insert: 'f', deleteLeft: 0, id: key2Id }, p: .3 } + ]; + + const ghDistrib: Distribution = [ + { sample: { insert: 'gh', deleteLeft: 0, id: key2Id }, p: .3 } + ]; + + // const k2c0 = new DeletionQuotientSpur(k1c0, efDistrib.concat(ghDistrib), { + // segment: { + // transitionId: key2Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: efDistrib[0].p + // }); + + // const k2c1_del = new DeletionQuotientSpur(k1c1, efDistrib.concat(ghDistrib), { + // segment: { + // transitionId: key2Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: efDistrib[0].p + // }); + // const k2c1_ef = new SubstitutionQuotientSpur(k1c0, efDistrib, { + // segment: { + // transitionId: key2Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: efDistrib[0].p + // }); + // const k2c1_ins = new InsertionQuotientSpur(k2c0); + // const k2c1 = new SearchQuotientCluster([k2c1_del, k2c1_ef, k2c1_ins]); + + // const k2c2_del = new DeletionQuotientSpur(k1c2, efDistrib.concat(ghDistrib), { + // segment: { + // transitionId: key2Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: efDistrib[0].p + // }); + const k2c2_ef = new SubstitutionQuotientSpur(k1c1, efDistrib, { + segment: { + transitionId: key2Id, + start: 0 + }, + // Deletions always get their own unique subset ID. + subsetId: generateSubsetId(), + bestProbFromSet: efDistrib[0].p + }); + // const k2c2_gh = new SubstitutionQuotientSpur(k1c0, ghDistrib, { + // segment: { + // transitionId: key2Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: efDistrib[0].p + // }); + // const k2c2_ins = new InsertionQuotientSpur(k2c1); + const k2c2 = new SearchQuotientCluster([/*k2c2_del,*/ k2c2_ef /*, k2c2_gh, k2c2_ins*/]); + + // const k2c3_del = new DeletionQuotientSpur(k1c3, efDistrib.concat(ghDistrib), { + // segment: { + // transitionId: key2Id, + // start: 0 + // }, + // // Deletions always get their own unique subset ID. + // subsetId: generateSubsetId(), + // bestProbFromSet: efDistrib[0].p + // }); + const k2c3_ef = new SubstitutionQuotientSpur(k1c2, efDistrib, { + segment: { + transitionId: key2Id, + start: 0 + }, + // Deletions always get their own unique subset ID. + subsetId: generateSubsetId(), + bestProbFromSet: efDistrib[0].p + }); + const k2c3_gh = new SubstitutionQuotientSpur(k1c1, ghDistrib, { + segment: { + transitionId: key2Id, + start: 0 + }, + // Deletions always get their own unique subset ID. + subsetId: generateSubsetId(), + bestProbFromSet: efDistrib[0].p + }); + // const k2c3_ins = new InsertionQuotientSpur(k2c2); + const k2c3 = new SearchQuotientCluster([/*k2c3_del, */ k2c3_ef, k2c3_gh, /*k2c3_ins*/]); + + return { + searchRoot, + spurs: {/*sc1, sc2,*/ k1c1_ab, /*k1c2_ab,*/ k1c2_cd, /*k1c2_ins, k1c3_ab, k1c3_cd, k1c3_ins,*/ k2c2_ef, k2c3_ef, k2c3_gh /*, k2c3_ins*/}, + nodes: {/* sc1, sc2, k1c0, */ k1c1, k1c2, /* k1c3, k2c0, k2c1, */ k2c2, k2c3} + }; +} diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/legacy-quotient-spur.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/legacy-quotient-spur.tests.ts index 0989ab9d64e..21bdd5026e6 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/legacy-quotient-spur.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/legacy-quotient-spur.tests.ts @@ -17,13 +17,46 @@ import { models } from '@keymanapp/lm-worker/test-index'; -import { buildCantLinearFixture } from '../../helpers/buildCantLinearFixture.js'; import { analyzeQuotientNodeResults } from '../../helpers/analyzeQuotientNodeResults.js'; import TrieModel = models.TrieModel; const testModel = new TrieModel(jsonFixture('models/tries/english-1000')); +// Similar to the fixture version, but with the LegacyQuotientRoot & LegacyQuotientSpur classes. +export function buildCantLinearFixture() { + const rootPath = new LegacyQuotientRoot(testModel); + + const distrib1 = [ + { sample: {insert: 'c', deleteLeft: 0, id: 11}, p: 0.5 }, + { sample: {insert: 'r', deleteLeft: 0, id: 11}, p: 0.4 }, + { sample: {insert: 't', deleteLeft: 0, id: 11}, p: 0.1 } + ]; + const path1 = new LegacyQuotientSpur(rootPath, distrib1, distrib1[0]); + + const distrib2 = [ + { sample: {insert: 'a', deleteLeft: 0, id: 12}, p: 0.7 }, + { sample: {insert: 'e', deleteLeft: 0, id: 12}, p: 0.3 } + ]; + const path2 = new LegacyQuotientSpur(path1, distrib2, distrib2[0]); + + const distrib3 = [ + { sample: {insert: 'n', deleteLeft: 0, id: 13}, p: 0.8 }, + { sample: {insert: 'r', deleteLeft: 0, id: 13}, p: 0.2 } + ]; + const path3 = new LegacyQuotientSpur(path2, distrib3, distrib3[0]); + + const distrib4 = [ + { sample: {insert: 't', deleteLeft: 0, id: 14}, p: 1 } + ]; + const path4 = new LegacyQuotientSpur(path3, distrib4, distrib4[0]); + + return { + paths: [null, path1, path2, path3, path4], + distributions: [distrib1, distrib2, distrib3, distrib4] + }; +} + describe('LegacyQuotientSpur', () => { describe('constructor', () => { it('initializes from a lexical model', () => { diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/quotient-node-finalizer.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/quotient-node-finalizer.tests.ts index ee6ee088e65..9dec509ad5b 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/quotient-node-finalizer.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/quotient-node-finalizer.tests.ts @@ -15,14 +15,15 @@ import { default as defaultBreaker } from '@keymanapp/models-wordbreakers'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; import { + EDIT_DISTANCE_COST_SCALE, generateSubsetId, - LegacyQuotientSpur, models, PathInputProperties, PathResult, QuotientNodeFinalizer, SearchQuotientNode, SearchQuotientRoot, + SubstitutionQuotientSpur, TokenResultMapping } from '@keymanapp/lm-worker/test-index'; @@ -76,10 +77,9 @@ function buildFixture_therefore(): SearchQuotientNode[] { }; }); - // TODO: Use SubstitutionQuotientSpur instead! let quotientNodes: SearchQuotientNode[] = [new SearchQuotientRoot(plainModel)]; for(let i=0; i < 9; i++) { - quotientNodes.push(new LegacyQuotientSpur(quotientNodes[i], distributions[i], inputSources[i])); + quotientNodes.push(new SubstitutionQuotientSpur(quotientNodes[i], distributions[i], inputSources[i])); } return quotientNodes; @@ -99,7 +99,7 @@ describe('QuotientNodeFinalizer', () => { assert.equal(searchResult.type, 'complete'); if(searchResult.type == 'complete') { - assert.equal(searchResult.mapping.totalCost, -Math.log(therefo.bestExample.p)); + assert.approximately(searchResult.mapping.totalCost, -Math.log(therefo.bestExample.p), Number.EPSILON * 1000); assert.isNotNaN(searchResult.cost); assert.equal(searchResult.cost, searchResult.mapping.totalCost); } else { @@ -107,14 +107,16 @@ describe('QuotientNodeFinalizer', () => { } searchResult = therefo.handleNextNode(); - // There should be more results that may be found. + // There should be more searching to perform before aborting. assert.notEqual(searchResult.type, 'none'); + // However, no other valid results are within correction range + // while rooted on 6 input transforms. do { searchResult = therefo.handleNextNode(); } while(searchResult.type == 'intermediate'); - assert.notEqual(searchResult.type, 'none'); + assert.equal(searchResult.type, 'none'); }); it('finds only corrections when predictions are forbidden', () => { @@ -130,6 +132,11 @@ describe('QuotientNodeFinalizer', () => { assert.equal(searchResult.type, 'complete'); if(searchResult.type == 'complete') { assert.isAbove(searchResult.mapping.totalCost, -Math.log(therefo.bestExample.p)); + + // There are two codepoints missing that are necessary to complete a + // full word with the represented prefix. Check that the penalty is set + // appropriately, accounting for floating-point precision issues. + assert.isAtLeast(searchResult.mapping.totalCost, -Math.log(therefo.bestExample.p) + EDIT_DISTANCE_COST_SCALE * 1.99); assert.isNotNaN(searchResult.cost); assert.equal(searchResult.cost, searchResult.mapping.totalCost); } else { @@ -137,14 +144,16 @@ describe('QuotientNodeFinalizer', () => { } searchResult = therefo.handleNextNode(); - // There should be more results that may be found. + // There should be more searching to perform before aborting. assert.notEqual(searchResult.type, 'none'); do { searchResult = therefo.handleNextNode(); } while(searchResult.type == 'intermediate'); - assert.notEqual(searchResult.type, 'none'); + // However, no other valid results are within correction range + // while rooted on 6 input transforms. + assert.equal(searchResult.type, 'none'); }); }); }); \ No newline at end of file diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-cluster.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-cluster.tests.ts index 7b12d8a8577..029aa6bed31 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-cluster.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-cluster.tests.ts @@ -118,16 +118,6 @@ const determineTargetSplitSequences = (constituentPaths: SearchQuotientSpur[][], describe('SearchQuotientCluster', () => { describe('constructor()', () => { - it('initializes from LegacySearchRoot', () => { - const path = new LegacyQuotientRoot(testModel); - const cluster = new SearchQuotientCluster([path]); - assert.equal(cluster.inputCount, 0); - assert.equal(cluster.codepointLength, 0); - assert.isNumber(cluster.spaceId); - assert.deepEqual(cluster.bestExample, {text: '', p: 1}); - assert.deepEqual(cluster.parents, [path]); - }); - it('initializes from arbitrary SearchQuotientSpur', () => { const rootPath = new LegacyQuotientRoot(testModel); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/substitution-quotient-spur.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/substitution-quotient-spur.tests.ts new file mode 100644 index 00000000000..a8401aa3b75 --- /dev/null +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/substitution-quotient-spur.tests.ts @@ -0,0 +1,305 @@ +/* + * Keyman is copyright (C) SIL Global. MIT License. + * + * Created by jahorton on 2026-03-04 + * + * This file defines tests for the SubstitutionQuotientSpur class of the + * predictive-text correction-search engine's search graph. + */ + +import { assert } from 'chai'; + +import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; +import { + generateSubsetId, + models, + SearchQuotientRoot, + SubstitutionQuotientSpur +} from '@keymanapp/lm-worker/test-index'; + +import { buildCantLinearFixture } from '../../helpers/buildCantLinearFixture.js'; +import { analyzeQuotientNodeResults } from '../../helpers/analyzeQuotientNodeResults.js'; + +import TrieModel = models.TrieModel; + +const testModel = new TrieModel(jsonFixture('models/tries/english-1000')); + +describe('SubstitutionQuotientSpur', () => { + describe('constructor', () => { + it('may be extended from root path', () => { + const rootPath = new SearchQuotientRoot(testModel); + + const leadEdgeDistribution = [ + {sample: {insert: 't', deleteLeft: 0, id: 13 }, p: 0.5}, + {sample: {insert: 'a', deleteLeft: 0, id: 13 }, p: 0.3}, + {sample: {insert: 'o', deleteLeft: 0, id: 13 }, p: 0.2} + ]; + + const extendedPath = new SubstitutionQuotientSpur(rootPath, leadEdgeDistribution, leadEdgeDistribution[0]); + + assert.equal(extendedPath.inputCount, 1); + assert.equal(extendedPath.codepointLength, 1); + assert.isNumber(extendedPath.spaceId); + assert.notEqual(extendedPath.spaceId, rootPath.spaceId); + assert.deepEqual(extendedPath.bestExample, {text: 't', p: 0.5}); + assert.deepEqual(extendedPath.parents, [rootPath]); + assert.deepEqual(extendedPath.inputs, leadEdgeDistribution); + assert.deepEqual(extendedPath.inputSegments, [ + { + transitionId: leadEdgeDistribution[0].sample.id, + start: 0 + } + ]); + + // Assert the root is unchanged. + assert.equal(rootPath.inputCount, 0); + // Should (still) have codepointLength == 0 once it's defined. + assert.deepEqual(rootPath.bestExample, {text: '', p: 1}); + assert.deepEqual(rootPath.parents, []); + }); + + // TODO: for each of the possible ancestor Spur types, + the root! + // Properties should have proper relationship with each test's ancestor. + it('may be built from arbitrary prior SearchQuotientSpur', () => { + const rootPath = new SearchQuotientRoot(testModel); + + const leadEdgeDistribution = [ + {sample: {insert: 't', deleteLeft: 0, id: 13 }, p: 0.5}, + {sample: {insert: 'a', deleteLeft: 0, id: 13 }, p: 0.3}, + {sample: {insert: 'o', deleteLeft: 0, id: 13 }, p: 0.2} + ]; + const inputClone = leadEdgeDistribution.map(e => ({...e})); + + const length1Path = new SubstitutionQuotientSpur( + rootPath, + leadEdgeDistribution, + leadEdgeDistribution[0] + ); + + const tailEdgeDistribution = [ + {sample: {insert: 'r', deleteLeft: 0, id: 17 }, p: 0.6}, + {sample: {insert: 'e', deleteLeft: 0, id: 17 }, p: 0.25}, + {sample: {insert: 'h', deleteLeft: 0, id: 17 }, p: 0.15} + ]; + + const length2Path = new SubstitutionQuotientSpur( + length1Path, + tailEdgeDistribution, + tailEdgeDistribution[0] + ); + + // Verify that the prior distribution remains fully unaltered. + assert.deepEqual(leadEdgeDistribution, inputClone); + + assert.equal(length2Path.inputCount, 2); + assert.equal(length2Path.codepointLength, 2); + assert.isNumber(length2Path.spaceId); + assert.notEqual(length2Path.spaceId, length1Path.spaceId); + assert.deepEqual(length2Path.bestExample, {text: 'tr', p: leadEdgeDistribution[0].p * tailEdgeDistribution[0].p}); + assert.deepEqual(length2Path.parents, [length1Path]); + assert.deepEqual(length2Path.inputs, tailEdgeDistribution); + assert.deepEqual(length2Path.inputSegments, [ + { + transitionId: leadEdgeDistribution[0].sample.id, + start: 0 + }, { + transitionId: tailEdgeDistribution[0].sample.id, + start: 0 + } + ]); + + assert.equal(length1Path.inputCount, 1); + assert.equal(length1Path.codepointLength, 1); + assert.isNumber(length1Path.spaceId); + assert.notEqual(length1Path.spaceId, rootPath.spaceId); + assert.deepEqual(length1Path.bestExample, {text: 't', p: 0.5}); + assert.deepEqual(length1Path.parents, [rootPath]); + assert.deepEqual(length1Path.inputs, leadEdgeDistribution); + }); + + it('may extend with a Transform inserting multiple codepoints', () => { + const rootPath = new SearchQuotientRoot(testModel); + + const leadEdgeDistribution = [ + {sample: {insert: 't', deleteLeft: 0, id: 13 }, p: 0.5}, + {sample: {insert: 'a', deleteLeft: 0, id: 13 }, p: 0.3}, + {sample: {insert: 'o', deleteLeft: 0, id: 13 }, p: 0.2} + ]; + const inputClone = leadEdgeDistribution.map(e => ({...e})); + + const length1Path = new SubstitutionQuotientSpur( + rootPath, + leadEdgeDistribution, + leadEdgeDistribution[0] + ); + + const tailEdgeDistribution = [ + {sample: {insert: 'ri', deleteLeft: 0, id: 17 }, p: 0.6}, + {sample: {insert: 'er', deleteLeft: 0, id: 17 }, p: 0.25}, + {sample: {insert: 'hi', deleteLeft: 0, id: 17 }, p: 0.15} + ]; + + const length2Path = new SubstitutionQuotientSpur( + length1Path, + tailEdgeDistribution, + tailEdgeDistribution[0] + ); + + // Verify that the prior distribution remains fully unaltered. + assert.deepEqual(leadEdgeDistribution, inputClone); + + assert.equal(length2Path.inputCount, 2); + assert.equal(length2Path.codepointLength, 3); + assert.isNumber(length2Path.spaceId); + assert.notEqual(length2Path.spaceId, length1Path.spaceId); + assert.deepEqual(length2Path.bestExample, {text: 'tri', p: leadEdgeDistribution[0].p * tailEdgeDistribution[0].p}); + assert.deepEqual(length2Path.parents, [length1Path]); + assert.deepEqual(length2Path.inputs, tailEdgeDistribution); + assert.deepEqual(length2Path.inputSegments, [ + { + transitionId: leadEdgeDistribution[0].sample.id, + start: 0 + }, { + transitionId: tailEdgeDistribution[0].sample.id, + start: 0 + } + ]); + + assert.equal(length1Path.inputCount, 1); + assert.equal(length1Path.codepointLength, 1); + assert.isNumber(length1Path.spaceId); + assert.notEqual(length1Path.spaceId, rootPath.spaceId); + assert.deepEqual(length1Path.bestExample, {text: 't', p: 0.5}); + assert.deepEqual(length1Path.parents, [rootPath]); + assert.deepEqual(length1Path.inputs, leadEdgeDistribution); + }); + }); + + describe('.edgeKey', () => { + it('changes when input source subset IDs differ', () => { + const root = new SearchQuotientRoot(testModel); + + const {distributions} = buildCantLinearFixture(); + const inputSrc = { + segment: { + transitionId: distributions[0][0].sample.id, + start: 0 + }, + subsetId: generateSubsetId(), + bestProbFromSet: distributions[0][0].p + }; + + const spur1 = new SubstitutionQuotientSpur(root, distributions[0], { + ...inputSrc, + subsetId: generateSubsetId() + }); + const spur2 = new SubstitutionQuotientSpur(root, distributions[0], { + ...inputSrc, + subsetId: generateSubsetId() + }); + + assert.notEqual(spur1.edgeKey, spur2.edgeKey); + }); + + it('changes when different parts of the same input source are used', () => { + const root = new SearchQuotientRoot(testModel); + + const {distributions} = buildCantLinearFixture(); + const inputSrc = { + segment: { + transitionId: distributions[0][0].sample.id, + start: 0 + }, + subsetId: generateSubsetId(), + bestProbFromSet: distributions[0][0].p + }; + + const spur1 = new SubstitutionQuotientSpur(root, distributions[0], inputSrc); + const spur2 = new SubstitutionQuotientSpur(root, distributions[0], { + ...inputSrc, + segment: { + ...inputSrc.segment, + end: 1 + } + }); + const spur3 = new SubstitutionQuotientSpur(root, distributions[0], { + ...inputSrc, + segment: { + ...inputSrc.segment, + start: inputSrc.segment.start + 1 + } + }); + + assert.notEqual(spur1.edgeKey, spur2.edgeKey); + assert.notEqual(spur2.edgeKey, spur3.edgeKey); + assert.notEqual(spur3.edgeKey, spur1.edgeKey); + }); + }); + + describe('handleNextNode()', () => { + it('outputs results that directly match inputs', () => { + const canPath = buildCantLinearFixture().paths[3]; + + const matchTargets = [ + 'can', + 'car', + 'cen', // 'cent' and 'center' are supported in this test model. + ]; + const analysis = analyzeQuotientNodeResults(canPath, matchTargets); + + assert.sameMembers(analysis.found, matchTargets); + assert.isEmpty(analysis.foundWithDuplicates); + }); + + it('outputs results that substitute inputs', () => { + const canPath = buildCantLinearFixture().paths[3]; + + const matchTargets = [ + // Replacement of first char + 'man', + 'far', + // Replacement of second char + 'con', // 'consider' and variants thereof are also supported. + 'cor', // 'corner' + // Replacement of third char + 'cal', // 'call' + ]; + const analysis = analyzeQuotientNodeResults(canPath, matchTargets); + + assert.sameMembers(analysis.found, matchTargets); + assert.isEmpty(analysis.foundWithDuplicates); + }); + + it('does not output results that insert characters as needed', () => { + const canPath = buildCantLinearFixture().paths[3]; + + const matchTargets = [ + 'char', // 'c' + (insert) 'h' + 'ar' + 'than', // 't' + (insert) 'h' + 'an' + ]; + const analysis = analyzeQuotientNodeResults(canPath, matchTargets); + + assert.sameMembers(analysis.found, []); + assert.sameMembers(analysis.missing, matchTargets); + assert.isEmpty(analysis.foundWithDuplicates); + }); + + it('does not output results that delete incoming keystrokes as needed', () => { + const canPath = buildCantLinearFixture().paths[3]; + + const matchTargets = [ + // Delete only first + 'an', // (delete) 'c'/'r'/'t' + 'an', for 'and' and 'any' + 'en', // (delete) 'c'/'r'/'t' + 'en', for 'end', + // Even delete second + 'n', // model possesses words starting with just 'n' + 't' // ... and 't'. + ]; + const analysis = analyzeQuotientNodeResults(canPath, matchTargets); + + assert.sameMembers(analysis.found, []); + assert.sameMembers(analysis.missing, matchTargets); + assert.isEmpty(analysis.foundWithDuplicates); + }); + }); +}); \ No newline at end of file diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/tokenization-corrector.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/tokenization-corrector.tests.ts index 5d7feb362b8..c9900fc96fe 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/tokenization-corrector.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/tokenization-corrector.tests.ts @@ -21,12 +21,12 @@ import { correctionValidForAutoSelect, generateSubsetId, getBestMatches, - LegacyQuotientSpur, models, PathInputProperties, PathResult, SearchQuotientNode, SearchQuotientRoot, + SubstitutionQuotientSpur, TokenizationCorrector, TokenResult, TokenizationResultMapping @@ -86,22 +86,21 @@ function buildFixture_therefore() { const therefTokens: ContextToken[] = []; // as in "therefore" const the_efTokens: ContextToken[] = []; // as in "the effect" - // TODO: Use SubstitutionQuotientSpur instead! let firstTokenNode: SearchQuotientNode = new SearchQuotientRoot(plainModel); for(let i=0; i < 3; i++) { - firstTokenNode = new LegacyQuotientSpur(firstTokenNode, distributions[i], inputSources[i]); + firstTokenNode = new SubstitutionQuotientSpur(firstTokenNode, distributions[i], inputSources[i]); } the_efTokens.push(new ContextToken(firstTokenNode, false)); - firstTokenNode = new LegacyQuotientSpur(firstTokenNode, [distributions[3][1]], { + firstTokenNode = new SubstitutionQuotientSpur(firstTokenNode, [distributions[3][1]], { ...inputSources[3], subsetId: generateSubsetId() }); // whitespace token alternate - using the ' ' input instead. const whitespaceToken = new ContextToken( - new LegacyQuotientSpur( + new SubstitutionQuotientSpur( new SearchQuotientRoot(plainModel), [distributions[3][0]], { ...inputSources[3], subsetId: generateSubsetId() } @@ -112,11 +111,11 @@ function buildFixture_therefore() { let secondTokenNode: SearchQuotientNode = new SearchQuotientRoot(plainModel); for(let i=4; i < distributions.length; i++) { - firstTokenNode = new LegacyQuotientSpur(firstTokenNode, distributions[i], { + firstTokenNode = new SubstitutionQuotientSpur(firstTokenNode, distributions[i], { ...inputSources[i], subsetId: generateSubsetId() }); - secondTokenNode = new LegacyQuotientSpur(secondTokenNode, distributions[i], { + secondTokenNode = new SubstitutionQuotientSpur(secondTokenNode, distributions[i], { ...inputSources[i], subsetId: generateSubsetId() }) @@ -171,17 +170,16 @@ function buildFixture_terminalWhitespace() { const fullTokens: ContextToken[] = []; const lastToken: ContextToken[] = []; - // TODO: Use SubstitutionQuotientSpur instead! let firstTokenNode: SearchQuotientNode = new SearchQuotientRoot(plainModel); for(let i=0; i < 5; i++) { - firstTokenNode = new LegacyQuotientSpur(firstTokenNode, distributions[i], inputSources[i]); + firstTokenNode = new SubstitutionQuotientSpur(firstTokenNode, distributions[i], inputSources[i]); } fullTokens.push(new ContextToken(firstTokenNode, false)); // whitespace token alternate - using the ' ' input instead. const whitespaceToken = new ContextToken( - new LegacyQuotientSpur( + new SubstitutionQuotientSpur( new SearchQuotientRoot(plainModel), distributions[5], inputSources[5], @@ -316,14 +314,16 @@ describe('TokenizationCorrector', () => { } searchResult = instance.handleNextNode(); - // There should be more results that may be found. + // There should be more searching to perform before aborting. assert.notEqual(searchResult.type, 'none'); do { searchResult = instance.handleNextNode(); } while(searchResult.type == 'intermediate'); - assert.notEqual(searchResult.type, 'none'); + // However, no other valid results are within correction range + // while rooted on 6 input transforms. + assert.equal(searchResult.type, 'none'); }); it('finds corrections for a group of tokens with two correctable', () => {