Skip to content

Commit 8d684b7

Browse files
More UnicodeSet and CharMap method (#76)
1 parent ceb928d commit 8d684b7

8 files changed

Lines changed: 1756 additions & 97 deletions

File tree

src/char-map.ts

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,27 @@ import { CharRange, CharSet } from "./char-set";
33
import { filterMut } from "./util";
44

55
export interface ReadonlyCharMap<T> extends Iterable<[CharRange, T]> {
6+
/**
7+
* Returns whether this map is empty.
8+
*
9+
* This is equivalent to `this.size === 0` and `this.entryCount === 0`.
10+
*/
611
readonly isEmpty: boolean;
12+
/**
13+
* The number of characters in this map. This is different from {@link entryCount}.
14+
*
15+
* This is equivalent to `[...this.keys()].reduce((count, range) => count + range.max - range.min + 1, 0)`.
16+
*/
17+
readonly size: number;
18+
/**
19+
* The number of entires in this map.
20+
*
21+
* This is different from {@link size}. In general, you should use {@link size}, because it has the same semantics
22+
* as `Set#size` and `Map#size`.
23+
*
24+
* This is equivalent to `[...this.entries()].length`.
25+
*/
26+
readonly entryCount: number;
727

828
/**
929
* Returns whether the given character is a key in the map.
@@ -44,14 +64,37 @@ export interface ReadonlyCharMap<T> extends Iterable<[CharRange, T]> {
4464
* @param callback
4565
*/
4666
forEach(callback: (value: T, chars: CharRange, map: ReadonlyCharMap<T>) => void): void;
67+
/**
68+
* Returns all ranges of characters that are keys in the map.
69+
*
70+
* Keys will be returned in the same order as `this.entries()`.
71+
*/
4772
keys(): Iterable<CharRange>;
73+
/**
74+
* Returns all values in the map. Values might not be unique if more than one range maps to the same value.
75+
*
76+
* Values will be returned in the same order as `this.entries()`.
77+
*/
4878
values(): Iterable<T>;
49-
entries(range?: CharRange): Iterable<[CharRange, T]>;
79+
/**
80+
* Returns all key-value pairs in the map.
81+
*
82+
* Entries will be returned in the order of ascending ranges.
83+
*/
84+
entries(): Iterable<[CharRange, T]>;
5085

5186
/**
5287
* Returns a mapping from the values of this map to its keys.
5388
*/
5489
invert(maxCharacter: Char): Map<T, CharSet>;
90+
91+
/**
92+
* Returns a new map with all values mapped by the given function.
93+
*
94+
* If no function is given, the identity function is used.
95+
*/
96+
copy(): CharMap<T>;
97+
copy<U>(mapFn: (value: T) => U): CharMap<U>;
5598
}
5699

57100
interface Item<T> {
@@ -76,6 +119,16 @@ export class CharMap<T> implements ReadonlyCharMap<T> {
76119
get isEmpty(): boolean {
77120
return this._array.length === 0;
78121
}
122+
get size(): number {
123+
let count = 0;
124+
for (const { range } of this._array) {
125+
count += range.max - range.min + 1;
126+
}
127+
return count;
128+
}
129+
get entryCount(): number {
130+
return this._array.length;
131+
}
79132

80133
private _indexOf(char: Char): number | undefined {
81134
let l = 0;
@@ -402,6 +455,34 @@ export class CharMap<T> implements ReadonlyCharMap<T> {
402455
this._array = [];
403456
}
404457

458+
copy(): CharMap<T>;
459+
copy<U>(mapFn: (value: T) => U): CharMap<U>;
460+
copy<U>(mapFn?: (value: T) => U): CharMap<U> {
461+
if (!mapFn) {
462+
const map = new CharMap<T>();
463+
map._array = this._array.map(item => {
464+
return { range: item.range, value: item.value };
465+
});
466+
return map as unknown as CharMap<U>;
467+
}
468+
469+
const map = new CharMap<U>();
470+
map._array = this._array.map(item => {
471+
return { range: item.range, value: mapFn(item.value) };
472+
});
473+
474+
// merge adjacent
475+
filterMut(map._array, (item, prev) => {
476+
if (prev && prev.range.max + 1 === item.range.min && prev.value === item.value) {
477+
prev.range = { min: prev.range.min, max: item.range.max };
478+
return false;
479+
}
480+
return true;
481+
});
482+
483+
return map;
484+
}
485+
405486
map(mapFn: (value: T, chars: CharRange, map: ReadonlyCharMap<T>) => T): void {
406487
for (const item of this._array) {
407488
item.value = mapFn(item.value, item.range, this);

src/js/char-case-folding.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,23 @@ import { Maximum } from "./maximum";
55
import { UnicodeCaseFolding } from "./unicode";
66
import { UTF16CaseFolding } from "./utf16-case-folding";
77

8+
/**
9+
* A set of functions that can be used to perform case-insensitive matching.
10+
*
11+
* It must fulfill the following conditions:
12+
*
13+
* 1. `canonicalize` must be idempotent, i.e. `canonicalize(canonicalize(char)) === canonicalize(char)`.
14+
* 2. `toCharSet(canonicalize(a))` is the set of all characters `c` such that `canonicalize(a) === canonicalize(c)`.
15+
*/
816
export interface CharCaseFolding {
17+
/**
18+
* The canonicalization function. This typically maps characters to their lowercase form.
19+
*
20+
* If no function is given, then the identity function is used. This also implies that `toCharSet` must return a
21+
* set containing only the given character.
22+
*
23+
* @default char => char
24+
*/
925
readonly canonicalize?: (char: Char) => Char;
1026
readonly toCharSet: (char: Char) => CharSet;
1127
}

src/js/parser.ts

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -730,27 +730,8 @@ export class Parser {
730730
return context.nc.newCharClass(element, chars.chars);
731731
}
732732

733-
// ECMAScript spec says that alternatives are sorted by descending length.
734-
// This isn't enough for uniqueness though, so we also sort by code point.
735-
const words = [...chars.accept.wordSets];
736-
if (!chars.chars.isEmpty) {
737-
words.push([chars.chars]);
738-
}
739-
words.sort((a, b) => {
740-
if (a.length !== b.length) {
741-
return b.length - a.length;
742-
}
743-
for (let i = 0; i < a.length; i++) {
744-
const diff = a[i].compare(b[i]);
745-
if (diff !== 0) {
746-
return diff;
747-
}
748-
}
749-
return 0;
750-
});
751-
752733
const alternation = context.nc.newAlt(element);
753-
for (const word of words) {
734+
for (const word of chars.wordSets) {
754735
const alternative = context.nc.newConcat(element);
755736
alternation.alternatives.push(alternative);
756737

0 commit comments

Comments
 (0)