Skip to content

Commit 0e2d810

Browse files
Merge pull request #22078 from landaire-contrib/push-lnvzwpkxmypz
perf: optimize allocation strategies of output/parser/event
2 parents 4295875 + ec88d12 commit 0e2d810

4 files changed

Lines changed: 67 additions & 35 deletions

File tree

crates/parser/src/event.rs

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//! It is intended to be completely decoupled from the
33
//! parser, so as to allow to evolve the tree representation
44
//! and the parser algorithm independently.
5-
use std::mem;
5+
use std::{mem, num::NonZeroU32};
66

77
use crate::{
88
SyntaxKind::{self, *},
@@ -12,6 +12,12 @@ use crate::{
1212
/// `Parser` produces a flat list of `Event`s.
1313
/// They are converted to a tree-structure in
1414
/// a separate pass, via `TreeBuilder`.
15+
///
16+
/// Kept to 8 bytes: error messages live in a side table on the `Parser`
17+
/// (the `errors` vec) and `Event::Error` only stores an index into it.
18+
/// `forward_parent` uses `NonZeroU32` so `Option` is niche-optimised away
19+
/// (the offset is always ≥ 1 because the forward parent sits later in the
20+
/// event stream).
1521
#[derive(Debug, PartialEq)]
1622
pub(crate) enum Event {
1723
/// This event signifies the start of the node.
@@ -53,10 +59,7 @@ pub(crate) enum Event {
5359
/// ```
5460
///
5561
/// See also `CompletedMarker::precede`.
56-
Start {
57-
kind: SyntaxKind,
58-
forward_parent: Option<u32>,
59-
},
62+
Start { kind: SyntaxKind, forward_parent: Option<NonZeroU32> },
6063

6164
/// Complete the previous `Start` event
6265
Finish,
@@ -65,20 +68,14 @@ pub(crate) enum Event {
6568
/// `n_raw_tokens` is used to glue complex contextual tokens.
6669
/// For example, lexer tokenizes `>>` as `>`, `>`, and
6770
/// `n_raw_tokens = 2` is used to produced a single `>>`.
68-
Token {
69-
kind: SyntaxKind,
70-
n_raw_tokens: u8,
71-
},
71+
Token { kind: SyntaxKind, n_raw_tokens: u8 },
7272
/// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal
7373
/// instead of an integer literal followed by a dot as the lexer has no contextual knowledge.
7474
/// This event instructs whatever consumes the events to split the float literal into
7575
/// the corresponding parts.
76-
FloatSplitHack {
77-
ends_in_dot: bool,
78-
},
79-
Error {
80-
msg: String,
81-
},
76+
FloatSplitHack { ends_in_dot: bool },
77+
/// Index into the parser's side `errors` vec.
78+
Error { err: u32 },
8279
}
8380

8481
impl Event {
@@ -87,9 +84,12 @@ impl Event {
8784
}
8885
}
8986

90-
/// Generate the syntax tree with the control of events.
91-
pub(super) fn process(mut events: Vec<Event>) -> Output {
92-
let mut res = Output::default();
87+
/// Generate the syntax tree with the control of events. `errors` is the
88+
/// side table of error messages built up alongside the `events` stream.
89+
pub(super) fn process(mut events: Vec<Event>, mut errors: Vec<String>) -> Output {
90+
// Each event becomes roughly one u32 in Output, so preallocate to avoid
91+
// the amortized grow-one churn we used to see in Output::enter_node.
92+
let mut res = Output::with_event_capacity(events.len());
9393
let mut forward_parents = Vec::new();
9494

9595
for i in 0..events.len() {
@@ -104,7 +104,7 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
104104
let mut idx = i;
105105
let mut fp = forward_parent;
106106
while let Some(fwd) = fp {
107-
idx += fwd as usize;
107+
idx += fwd.get() as usize;
108108
// append `A`'s forward_parent `B`
109109
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
110110
Event::Start { kind, forward_parent } => {
@@ -131,7 +131,13 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
131131
let ev = mem::replace(&mut events[i + 1], Event::tombstone());
132132
assert!(matches!(ev, Event::Finish), "{ev:?}");
133133
}
134-
Event::Error { msg } => res.error(msg),
134+
Event::Error { err } => {
135+
// Move the string out of the side table; each index is visited
136+
// exactly once, so swapping with an empty String is cheap and
137+
// avoids any clone.
138+
let msg = mem::take(&mut errors[err as usize]);
139+
res.error(msg);
140+
}
135141
}
136142
}
137143

crates/parser/src/lib.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ impl TopEntryPoint {
104104
};
105105
let mut p = parser::Parser::new(input);
106106
entry_point(&mut p);
107-
let events = p.finish();
108-
let res = event::process(events);
107+
let (events, errors) = p.finish();
108+
let res = event::process(events, errors);
109109

110110
if cfg!(debug_assertions) {
111111
let mut depth = 0;
@@ -169,8 +169,8 @@ impl PrefixEntryPoint {
169169
};
170170
let mut p = parser::Parser::new(input);
171171
entry_point(&mut p);
172-
let events = p.finish();
173-
event::process(events)
172+
let (events, errors) = p.finish();
173+
event::process(events, errors)
174174
}
175175
}
176176

@@ -195,7 +195,7 @@ impl Reparser {
195195
let Reparser(r) = self;
196196
let mut p = parser::Parser::new(tokens);
197197
r(&mut p);
198-
let events = p.finish();
199-
event::process(events)
198+
let (events, errors) = p.finish();
199+
event::process(events, errors)
200200
}
201201
}

crates/parser/src/output.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ pub enum Step<'a> {
3333
}
3434

3535
impl Output {
36+
/// Preallocate the event buffer. Each `Event` in the input stream maps to
37+
/// roughly one `u32` in the output, so passing the event count as the
38+
/// initial capacity avoids most of the amortized `Vec::grow` allocations
39+
/// during `event::process`.
40+
pub(crate) fn with_event_capacity(cap: usize) -> Self {
41+
Output { event: Vec::with_capacity(cap), error: Vec::new() }
42+
}
43+
3644
const EVENT_MASK: u32 = 0b1;
3745
const TAG_MASK: u32 = 0x0000_00F0;
3846
const N_INPUT_TOKEN_MASK: u32 = 0x0000_FF00;

crates/parser/src/parser.rs

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! See [`Parser`].
22
3-
use std::cell::Cell;
3+
use std::{cell::Cell, num::NonZeroU32};
44

55
use drop_bomb::DropBomb;
66

@@ -12,6 +12,14 @@ use crate::{
1212
input::Input,
1313
};
1414

15+
/// Build a forward-parent offset. The offset is always ≥ 1 because the
16+
/// forward-parent event is created *after* the event it forwards to, so
17+
/// `NonZeroU32` is always valid here. Panics only on a parser bug.
18+
#[inline]
19+
fn fwd_parent(offset: u32) -> NonZeroU32 {
20+
NonZeroU32::new(offset).expect("forward-parent offset must be non-zero")
21+
}
22+
1523
/// `Parser` struct provides the low-level API for
1624
/// navigating through the stream of tokens and
1725
/// constructing the parse tree. The actual parsing
@@ -25,18 +33,27 @@ pub(crate) struct Parser<'t> {
2533
inp: &'t Input,
2634
pos: usize,
2735
events: Vec<Event>,
36+
/// Side table of error messages. `Event::Error { err }` carries an index
37+
/// into this vec, keeping `Event` itself a flat 8-byte enum.
38+
errors: Vec<String>,
2839
steps: Cell<u32>,
2940
}
3041

3142
const PARSER_STEP_LIMIT: usize = if cfg!(debug_assertions) { 150_000 } else { 15_000_000 };
3243

3344
impl<'t> Parser<'t> {
3445
pub(super) fn new(inp: &'t Input) -> Parser<'t> {
35-
Parser { inp, pos: 0, events: Vec::with_capacity(2 * inp.len()), steps: Cell::new(0) }
46+
Parser {
47+
inp,
48+
pos: 0,
49+
events: Vec::with_capacity(2 * inp.len()),
50+
errors: Vec::new(),
51+
steps: Cell::new(0),
52+
}
3653
}
3754

38-
pub(crate) fn finish(self) -> Vec<Event> {
39-
self.events
55+
pub(crate) fn finish(self) -> (Vec<Event>, Vec<String>) {
56+
(self.events, self.errors)
4057
}
4158

4259
/// Returns the kind of the current token.
@@ -206,7 +223,7 @@ impl<'t> Parser<'t> {
206223
match &mut self.events[idx] {
207224
Event::Start { forward_parent, kind } => {
208225
*kind = SyntaxKind::FIELD_EXPR;
209-
*forward_parent = Some(new_marker.pos - marker.pos);
226+
*forward_parent = Some(fwd_parent(new_marker.pos - marker.pos));
210227
}
211228
_ => unreachable!(),
212229
}
@@ -237,8 +254,9 @@ impl<'t> Parser<'t> {
237254
/// structured errors with spans and notes, like rustc
238255
/// does.
239256
pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
240-
let msg = message.into();
241-
self.push_event(Event::Error { msg });
257+
let err = self.errors.len() as u32;
258+
self.errors.push(message.into());
259+
self.push_event(Event::Error { err });
242260
}
243261

244262
/// Consume the next token if it is `kind` or emit an error
@@ -366,7 +384,7 @@ impl CompletedMarker {
366384
let idx = self.start_pos as usize;
367385
match &mut p.events[idx] {
368386
Event::Start { forward_parent, .. } => {
369-
*forward_parent = Some(new_pos.pos - self.start_pos);
387+
*forward_parent = Some(fwd_parent(new_pos.pos - self.start_pos));
370388
}
371389
_ => unreachable!(),
372390
}
@@ -379,7 +397,7 @@ impl CompletedMarker {
379397
let idx = m.pos as usize;
380398
match &mut p.events[idx] {
381399
Event::Start { forward_parent, .. } => {
382-
*forward_parent = Some(self.start_pos - m.pos);
400+
*forward_parent = Some(fwd_parent(self.start_pos - m.pos));
383401
}
384402
_ => unreachable!(),
385403
}

0 commit comments

Comments
 (0)