Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 26 additions & 20 deletions crates/parser/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//! It is intended to be completely decoupled from the
//! parser, so as to allow to evolve the tree representation
//! and the parser algorithm independently.
use std::mem;
use std::{mem, num::NonZeroU32};

use crate::{
SyntaxKind::{self, *},
Expand All @@ -12,6 +12,12 @@ use crate::{
/// `Parser` produces a flat list of `Event`s.
/// They are converted to a tree-structure in
/// a separate pass, via `TreeBuilder`.
///
/// Kept to 8 bytes: error messages live in a side table on the `Parser`
/// (the `errors` vec) and `Event::Error` only stores an index into it.
/// `forward_parent` uses `NonZeroU32` so `Option` is niche-optimised away
/// (the offset is always ≥ 1 because the forward parent sits later in the
/// event stream).
#[derive(Debug, PartialEq)]
pub(crate) enum Event {
/// This event signifies the start of the node.
Expand Down Expand Up @@ -53,10 +59,7 @@ pub(crate) enum Event {
/// ```
///
/// See also `CompletedMarker::precede`.
Start {
kind: SyntaxKind,
forward_parent: Option<u32>,
},
Start { kind: SyntaxKind, forward_parent: Option<NonZeroU32> },

/// Complete the previous `Start` event
Finish,
Expand All @@ -65,20 +68,14 @@ pub(crate) enum Event {
/// `n_raw_tokens` is used to glue complex contextual tokens.
/// For example, lexer tokenizes `>>` as `>`, `>`, and
/// `n_raw_tokens = 2` is used to produced a single `>>`.
Token {
kind: SyntaxKind,
n_raw_tokens: u8,
},
Token { kind: SyntaxKind, n_raw_tokens: u8 },
/// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal
/// instead of an integer literal followed by a dot as the lexer has no contextual knowledge.
/// This event instructs whatever consumes the events to split the float literal into
/// the corresponding parts.
FloatSplitHack {
ends_in_dot: bool,
},
Error {
msg: String,
},
FloatSplitHack { ends_in_dot: bool },
/// Index into the parser's side `errors` vec.
Error { err: u32 },
}

impl Event {
Expand All @@ -87,9 +84,12 @@ impl Event {
}
}

/// Generate the syntax tree with the control of events.
pub(super) fn process(mut events: Vec<Event>) -> Output {
let mut res = Output::default();
/// Generate the syntax tree with the control of events. `errors` is the
/// side table of error messages built up alongside the `events` stream.
pub(super) fn process(mut events: Vec<Event>, mut errors: Vec<String>) -> Output {
// Each event becomes roughly one u32 in Output, so preallocate to avoid
// the amortized grow-one churn we used to see in Output::enter_node.
let mut res = Output::with_event_capacity(events.len());
let mut forward_parents = Vec::new();

for i in 0..events.len() {
Expand All @@ -104,7 +104,7 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
let mut idx = i;
let mut fp = forward_parent;
while let Some(fwd) = fp {
idx += fwd as usize;
idx += fwd.get() as usize;
// append `A`'s forward_parent `B`
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
Event::Start { kind, forward_parent } => {
Expand All @@ -131,7 +131,13 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
let ev = mem::replace(&mut events[i + 1], Event::tombstone());
assert!(matches!(ev, Event::Finish), "{ev:?}");
}
Event::Error { msg } => res.error(msg),
Event::Error { err } => {
// Move the string out of the side table; each index is visited
// exactly once, so swapping with an empty String is cheap and
// avoids any clone.
let msg = mem::take(&mut errors[err as usize]);
res.error(msg);
}
}
}

Expand Down
12 changes: 6 additions & 6 deletions crates/parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ impl TopEntryPoint {
};
let mut p = parser::Parser::new(input);
entry_point(&mut p);
let events = p.finish();
let res = event::process(events);
let (events, errors) = p.finish();
let res = event::process(events, errors);

if cfg!(debug_assertions) {
let mut depth = 0;
Expand Down Expand Up @@ -169,8 +169,8 @@ impl PrefixEntryPoint {
};
let mut p = parser::Parser::new(input);
entry_point(&mut p);
let events = p.finish();
event::process(events)
let (events, errors) = p.finish();
event::process(events, errors)
}
}

Expand All @@ -195,7 +195,7 @@ impl Reparser {
let Reparser(r) = self;
let mut p = parser::Parser::new(tokens);
r(&mut p);
let events = p.finish();
event::process(events)
let (events, errors) = p.finish();
event::process(events, errors)
}
}
8 changes: 8 additions & 0 deletions crates/parser/src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ pub enum Step<'a> {
}

impl Output {
/// Preallocate the event buffer. Each `Event` in the input stream maps to
/// roughly one `u32` in the output, so passing the event count as the
/// initial capacity avoids most of the amortized `Vec::grow` allocations
/// during `event::process`.
pub(crate) fn with_event_capacity(cap: usize) -> Self {
Output { event: Vec::with_capacity(cap), error: Vec::new() }
}

const EVENT_MASK: u32 = 0b1;
const TAG_MASK: u32 = 0x0000_00F0;
const N_INPUT_TOKEN_MASK: u32 = 0x0000_FF00;
Expand Down
36 changes: 27 additions & 9 deletions crates/parser/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! See [`Parser`].

use std::cell::Cell;
use std::{cell::Cell, num::NonZeroU32};

use drop_bomb::DropBomb;

Expand All @@ -12,6 +12,14 @@ use crate::{
input::Input,
};

/// Build a forward-parent offset. The offset is always ≥ 1 because the
/// forward-parent event is created *after* the event it forwards to, so
/// `NonZeroU32` is always valid here. Panics only on a parser bug.
#[inline]
fn fwd_parent(offset: u32) -> NonZeroU32 {
NonZeroU32::new(offset).expect("forward-parent offset must be non-zero")
}

/// `Parser` struct provides the low-level API for
/// navigating through the stream of tokens and
/// constructing the parse tree. The actual parsing
Expand All @@ -25,18 +33,27 @@ pub(crate) struct Parser<'t> {
inp: &'t Input,
pos: usize,
events: Vec<Event>,
/// Side table of error messages. `Event::Error { err }` carries an index
/// into this vec, keeping `Event` itself a flat 8-byte enum.
errors: Vec<String>,
steps: Cell<u32>,
}

const PARSER_STEP_LIMIT: usize = if cfg!(debug_assertions) { 150_000 } else { 15_000_000 };

impl<'t> Parser<'t> {
pub(super) fn new(inp: &'t Input) -> Parser<'t> {
Parser { inp, pos: 0, events: Vec::with_capacity(2 * inp.len()), steps: Cell::new(0) }
Parser {
inp,
pos: 0,
events: Vec::with_capacity(2 * inp.len()),
errors: Vec::new(),
steps: Cell::new(0),
}
}

pub(crate) fn finish(self) -> Vec<Event> {
self.events
pub(crate) fn finish(self) -> (Vec<Event>, Vec<String>) {
(self.events, self.errors)
}

/// Returns the kind of the current token.
Expand Down Expand Up @@ -206,7 +223,7 @@ impl<'t> Parser<'t> {
match &mut self.events[idx] {
Event::Start { forward_parent, kind } => {
*kind = SyntaxKind::FIELD_EXPR;
*forward_parent = Some(new_marker.pos - marker.pos);
*forward_parent = Some(fwd_parent(new_marker.pos - marker.pos));
}
_ => unreachable!(),
}
Expand Down Expand Up @@ -237,8 +254,9 @@ impl<'t> Parser<'t> {
/// structured errors with spans and notes, like rustc
/// does.
pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
let msg = message.into();
self.push_event(Event::Error { msg });
let err = self.errors.len() as u32;
self.errors.push(message.into());
self.push_event(Event::Error { err });
}

/// Consume the next token if it is `kind` or emit an error
Expand Down Expand Up @@ -366,7 +384,7 @@ impl CompletedMarker {
let idx = self.start_pos as usize;
match &mut p.events[idx] {
Event::Start { forward_parent, .. } => {
*forward_parent = Some(new_pos.pos - self.start_pos);
*forward_parent = Some(fwd_parent(new_pos.pos - self.start_pos));
}
_ => unreachable!(),
}
Expand All @@ -379,7 +397,7 @@ impl CompletedMarker {
let idx = m.pos as usize;
match &mut p.events[idx] {
Event::Start { forward_parent, .. } => {
*forward_parent = Some(self.start_pos - m.pos);
*forward_parent = Some(fwd_parent(self.start_pos - m.pos));
}
_ => unreachable!(),
}
Expand Down