22//! It is intended to be completely decoupled from the
33//! parser, so as to allow to evolve the tree representation
44//! and the parser algorithm independently.
5- use std:: mem;
5+ use std:: { mem, num :: NonZeroU32 } ;
66
77use crate :: {
88 SyntaxKind :: { self , * } ,
@@ -12,6 +12,12 @@ use crate::{
1212/// `Parser` produces a flat list of `Event`s.
1313/// They are converted to a tree-structure in
1414/// a separate pass, via `TreeBuilder`.
15+ ///
16+ /// Kept to 8 bytes: error messages live in a side table on the `Parser`
17+ /// (the `errors` vec) and `Event::Error` only stores an index into it.
18+ /// `forward_parent` uses `NonZeroU32` so `Option` is niche-optimised away
19+ /// (the offset is always ≥ 1 because the forward parent sits later in the
20+ /// event stream).
1521#[ derive( Debug , PartialEq ) ]
1622pub ( crate ) enum Event {
1723 /// This event signifies the start of the node.
@@ -53,10 +59,7 @@ pub(crate) enum Event {
5359 /// ```
5460 ///
5561 /// See also `CompletedMarker::precede`.
56- Start {
57- kind : SyntaxKind ,
58- forward_parent : Option < u32 > ,
59- } ,
62+ Start { kind : SyntaxKind , forward_parent : Option < NonZeroU32 > } ,
6063
6164 /// Complete the previous `Start` event
6265 Finish ,
@@ -65,20 +68,14 @@ pub(crate) enum Event {
6568 /// `n_raw_tokens` is used to glue complex contextual tokens.
6669 /// For example, lexer tokenizes `>>` as `>`, `>`, and
6770 /// `n_raw_tokens = 2` is used to produced a single `>>`.
68- Token {
69- kind : SyntaxKind ,
70- n_raw_tokens : u8 ,
71- } ,
71+ Token { kind : SyntaxKind , n_raw_tokens : u8 } ,
7272 /// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal
7373 /// instead of an integer literal followed by a dot as the lexer has no contextual knowledge.
7474 /// This event instructs whatever consumes the events to split the float literal into
7575 /// the corresponding parts.
76- FloatSplitHack {
77- ends_in_dot : bool ,
78- } ,
79- Error {
80- msg : String ,
81- } ,
76+ FloatSplitHack { ends_in_dot : bool } ,
77+ /// Index into the parser's side `errors` vec.
78+ Error { err : u32 } ,
8279}
8380
8481impl Event {
@@ -87,9 +84,12 @@ impl Event {
8784 }
8885}
8986
90- /// Generate the syntax tree with the control of events.
91- pub ( super ) fn process ( mut events : Vec < Event > ) -> Output {
92- let mut res = Output :: default ( ) ;
87+ /// Generate the syntax tree with the control of events. `errors` is the
88+ /// side table of error messages built up alongside the `events` stream.
89+ pub ( super ) fn process ( mut events : Vec < Event > , mut errors : Vec < String > ) -> Output {
90+ // Each event becomes roughly one u32 in Output, so preallocate to avoid
91+ // the amortized grow-one churn we used to see in Output::enter_node.
92+ let mut res = Output :: with_event_capacity ( events. len ( ) ) ;
9393 let mut forward_parents = Vec :: new ( ) ;
9494
9595 for i in 0 ..events. len ( ) {
@@ -104,7 +104,7 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
104104 let mut idx = i;
105105 let mut fp = forward_parent;
106106 while let Some ( fwd) = fp {
107- idx += fwd as usize ;
107+ idx += fwd. get ( ) as usize ;
108108 // append `A`'s forward_parent `B`
109109 fp = match mem:: replace ( & mut events[ idx] , Event :: tombstone ( ) ) {
110110 Event :: Start { kind, forward_parent } => {
@@ -131,7 +131,13 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
131131 let ev = mem:: replace ( & mut events[ i + 1 ] , Event :: tombstone ( ) ) ;
132132 assert ! ( matches!( ev, Event :: Finish ) , "{ev:?}" ) ;
133133 }
134- Event :: Error { msg } => res. error ( msg) ,
134+ Event :: Error { err } => {
135+ // Move the string out of the side table; each index is visited
136+ // exactly once, so swapping with an empty String is cheap and
137+ // avoids any clone.
138+ let msg = mem:: take ( & mut errors[ err as usize ] ) ;
139+ res. error ( msg) ;
140+ }
135141 }
136142 }
137143
0 commit comments