From a23bc4e88fc7fb2aa62dae67dbec9228e0921ce2 Mon Sep 17 00:00:00 2001 From: Peter Travers Date: Mon, 24 Nov 2025 10:29:58 -0500 Subject: [PATCH 1/5] Add support for nested CSS in ts, js and tsx --- src/parse/syntax.rs | 63 +++++++++++ src/parse/tree_sitter_parser.rs | 193 +++++++++++++++++++++++++++++++- 2 files changed, 251 insertions(+), 5 deletions(-) diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index 33143e39e2..afc3ee36c1 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -117,6 +117,69 @@ pub(crate) enum Syntax<'a> { }, } +pub struct SyntaxTreeDisplay<'a>(Vec<&'a Syntax<'a>>); + +#[allow(dead_code)] +impl<'a> SyntaxTreeDisplay<'a> { + pub fn from(tree: Vec<&'a Syntax<'a>>) -> Self { + Self(tree) + } + + fn print_node( + f: &mut fmt::Formatter<'_>, + node: &Syntax, + prefix: &str, + is_last: bool, + ) -> fmt::Result { + let connector = if is_last { "└── " } else { "├── " }; + + match node { + Syntax::List { + open_position, + close_position, + children, + .. + } => { + writeln!( + f, + "{}{}List (open: {:?}, close: {:?})", + prefix, connector, open_position, close_position + )?; + + // Prepare prefix for children + // If this was the last node, children don't need the vertical bar │ + let child_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " }); + + for (i, child) in children.iter().enumerate() { + Self::print_node(f, child, &child_prefix, i == children.len() - 1)?; + } + } + Syntax::Atom { + content, + position, + kind, + .. + } => { + writeln!( + f, + "{}{}Atom: {:?} {:#?} ({:?})", + prefix, connector, content, kind, position + )?; + } + } + Ok(()) + } +} + +impl<'a> fmt::Display for SyntaxTreeDisplay<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (i, node) in self.0.iter().enumerate() { + SyntaxTreeDisplay::print_node(f, node, "", i == self.0.len() - 1)?; + } + Ok(()) + } +} + fn dbg_pos(pos: &[SingleLineSpan]) -> String { match pos { [] => "-".into(), diff --git a/src/parse/tree_sitter_parser.rs b/src/parse/tree_sitter_parser.rs index ffb2046c05..721fa08ef2 100644 --- a/src/parse/tree_sitter_parser.rs +++ b/src/parse/tree_sitter_parser.rs @@ -94,6 +94,23 @@ const OCAML_ATOM_NODES: [&str; 6] = [ "attribute_id", ]; +const TS_CSS_INJECTION_QUERY: &str = r#" +(_ + ; Capture the 'callee' or 'left' side. + ; We accept any named node here to act as the anchor. + (_) @callee + + ; Capture the template string contents. + (template_string) @contents + + ; Predicate: The 'callee' text must start with 'styled' or 'css'. + ; This matches "styled.div", "styled(C)", and even "styled.div<{}>" + ; (which might be parsed as a binary expression 'styled.div < {}' or + ; a call expression depending on the grammar version). + (#match? @callee "^(styled|css)") +) +"#; + pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { use guess::Language::*; match language { @@ -590,7 +607,10 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ], highlight_query: ts::Query::new(&language, tree_sitter_javascript::HIGHLIGHT_QUERY) .unwrap(), - sub_languages: vec![], + sub_languages: vec![TreeSitterSubLanguage { + query: ts::Query::new(&language, TS_CSS_INJECTION_QUERY).unwrap(), + parse_as: Css, + }], } } Json => { @@ -1053,7 +1073,10 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { atom_nodes: ["string", "template_string"].into_iter().collect(), delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")], highlight_query: ts::Query::new(&language, &highlight_query).unwrap(), - sub_languages: vec![], + sub_languages: vec![TreeSitterSubLanguage { + query: ts::Query::new(&language, TS_CSS_INJECTION_QUERY).unwrap(), + parse_as: Css, + }], } } TypeScript => { @@ -1070,7 +1093,10 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { .collect(), delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")], highlight_query: ts::Query::new(&language, &highlight_query).unwrap(), - sub_languages: vec![], + sub_languages: vec![TreeSitterSubLanguage { + query: ts::Query::new(&language, TS_CSS_INJECTION_QUERY).unwrap(), + parse_as: Css, + }], } } Xml => { @@ -1198,8 +1224,39 @@ pub(crate) fn parse_subtrees( let mut query_matches = query_cursor.matches(&language.query, tree.root_node(), src.as_bytes()); + let content_idx = language + .query + .capture_index_for_name("contents") + .unwrap_or(0); + while let Some(m) = query_matches.next() { - let node = m.nodes_for_capture_index(0).next().unwrap(); + let node = match m.nodes_for_capture_index(content_idx).next() { + None => continue, + Some(node) => node, + }; + + let mut range = node.range(); + match (language.parse_as, node.grammar_name()) { + (guess::Language::Css, "template_string") => { + // If this is a template string (starts/ends with backtick), shrink the range. + // We check the text to be safe, or just assume based on the node kind. + let node_text = &src[node.start_byte()..node.end_byte()]; + if node_text.starts_with('`') + && node_text.ends_with('`') + && node_text.len() >= 2 + { + range.start_byte += 1; + range.end_byte -= 1; + + // We also need to update start_point and end_point for Tree-sitter to be happy. + // Since we know a backtick is 1 column wide and doesn't span lines: + range.start_point.column += 1; + range.end_point.column -= 1; + } + } + _ => {} + }; + if node.byte_range().is_empty() { continue; } @@ -1210,7 +1267,7 @@ pub(crate) fn parse_subtrees( .set_language(&subconfig.language) .expect("Incompatible tree-sitter version"); parser - .set_included_ranges(&[node.range()]) + .set_included_ranges(&[range]) .expect("Incompatible tree-sitter version"); let tree = parser.parse(src, None).unwrap(); @@ -1833,8 +1890,12 @@ fn atom_from_cursor<'a>( #[cfg(test)] mod tests { + use std::collections::VecDeque; + use strum::IntoEnumIterator as _; + use crate::parse::syntax::SyntaxTreeDisplay; + use super::*; /// Simple smoke test for tree-sitter parsing. Having a test also @@ -1880,6 +1941,128 @@ mod tests { }; } + fn assert_contains_atoms<'a>(nodes: &[&'a Syntax<'a>], expected_sequence: Vec>) { + let mut to_search = VecDeque::from(nodes.to_vec()); + let mut expected_iter = expected_sequence.into_iter(); + let mut current_expected = expected_iter.next(); + + while let Some(node) = to_search.pop_front() { + if let Some(expected) = ¤t_expected { + match node { + Syntax::List { children, .. } => { + // Extract just the normal atoms from this list to see if they match the line + let atom_texts: Vec<&str> = children + .iter() + .filter_map(|child| match child { + Syntax::Atom { content, .. } => Some(content.as_str()), + _ => None, + }) + .collect(); + + // If this list matches the current expected line, advance expectation + if !atom_texts.is_empty() && atom_texts == *expected { + current_expected = expected_iter.next(); + } + + for child in children.iter().rev() { + to_search.push_front(child); + } + } + _ => {} + } + } else { + // All expectations met + return; + } + } + + if let Some(remaining) = current_expected { + panic!( + "Could not find all atom sequences. \nMissing: {:?}\nDebug Tree:\n{}", + remaining, + SyntaxTreeDisplay::from(nodes.to_vec()) + ); + } + } + + #[test] + fn test_typescript_css_injection_table() { + let arena = Arena::new(); + let configs = vec![ + from_language(guess::Language::TypeScript), + from_language(guess::Language::TypeScriptTsx), + from_language(guess::Language::JavaScript), + from_language(guess::Language::JavascriptJsx), + ]; + + let cases = vec![ + // Case 1: Standard styled.button + ( + r#" + const Button = styled.button` + background: #BF4F74; + border-radius: 3px; + border: none; + color: white; + ` + "#, + vec![ + vec!["background", ":", "#BF4F74", ";"], + vec!["border-radius", ":", "3px", ";"], + vec!["border", ":", "none", ";"], + vec!["color", ":", "white", ";"], + ], + ), + // Case 2: Component wrapping styled(C) + ( + r#" + const Button = styled(OtherButton)` + color: white; + ` + "#, + vec![vec!["color", ":", "white", ";"]], + ), + // Case 3: Helper css`...` + ( + r#" + const Button = css` + color: white; + ` + "#, + vec![vec!["color", ":", "white", ";"]], + ), + // Case 4: Nested Interpolation + ( + r#" + const Button = styled.button` + color: white; + ${props => props.$withSeparator && css`padding-top: 22px;`} + ` + "#, + vec![vec!["color", ":", "white", ";"]], + ), + // Case 5: Generics + ( + r#" + export const Button = styled.button<{}>` color: white; `; + "#, + vec![vec!["color", ":", "white", ";"]], + ), + // Case 6: Multiline Edge Case + ( + "\nconst X = styled.div`\ncolor: white;\n`", + vec![vec!["color", ":", "white", ";"]], + ), + ]; + + for config in configs { + for (src, expected_atoms) in cases.iter() { + let nodes = parse(&arena, src, &config, false); + assert_contains_atoms(&nodes, expected_atoms.clone()); + } + } + } + /// Ensure that we don't crash when loading any of the /// configs. This can happen on bad highlighting/foo.scm files. #[test] From 3f741de281034b285f92f6e28fd034a7ffdaf503 Mon Sep 17 00:00:00 2001 From: Peter Travers Date: Mon, 24 Nov 2025 13:54:51 -0500 Subject: [PATCH 2/5] refactor to pretty print --- src/parse/syntax.rs | 63 --------------------------------- src/parse/tree_sitter_parser.rs | 8 +++-- 2 files changed, 6 insertions(+), 65 deletions(-) diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index afc3ee36c1..33143e39e2 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -117,69 +117,6 @@ pub(crate) enum Syntax<'a> { }, } -pub struct SyntaxTreeDisplay<'a>(Vec<&'a Syntax<'a>>); - -#[allow(dead_code)] -impl<'a> SyntaxTreeDisplay<'a> { - pub fn from(tree: Vec<&'a Syntax<'a>>) -> Self { - Self(tree) - } - - fn print_node( - f: &mut fmt::Formatter<'_>, - node: &Syntax, - prefix: &str, - is_last: bool, - ) -> fmt::Result { - let connector = if is_last { "└── " } else { "├── " }; - - match node { - Syntax::List { - open_position, - close_position, - children, - .. - } => { - writeln!( - f, - "{}{}List (open: {:?}, close: {:?})", - prefix, connector, open_position, close_position - )?; - - // Prepare prefix for children - // If this was the last node, children don't need the vertical bar │ - let child_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " }); - - for (i, child) in children.iter().enumerate() { - Self::print_node(f, child, &child_prefix, i == children.len() - 1)?; - } - } - Syntax::Atom { - content, - position, - kind, - .. - } => { - writeln!( - f, - "{}{}Atom: {:?} {:#?} ({:?})", - prefix, connector, content, kind, position - )?; - } - } - Ok(()) - } -} - -impl<'a> fmt::Display for SyntaxTreeDisplay<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for (i, node) in self.0.iter().enumerate() { - SyntaxTreeDisplay::print_node(f, node, "", i == self.0.len() - 1)?; - } - Ok(()) - } -} - fn dbg_pos(pos: &[SingleLineSpan]) -> String { match pos { [] => "-".into(), diff --git a/src/parse/tree_sitter_parser.rs b/src/parse/tree_sitter_parser.rs index 721fa08ef2..1d7f3f11d1 100644 --- a/src/parse/tree_sitter_parser.rs +++ b/src/parse/tree_sitter_parser.rs @@ -1978,9 +1978,13 @@ mod tests { if let Some(remaining) = current_expected { panic!( - "Could not find all atom sequences. \nMissing: {:?}\nDebug Tree:\n{}", + "Could not find all atom sequences. \nMissing: {:?}\nDebug Tree:\n{:?}", remaining, - SyntaxTreeDisplay::from(nodes.to_vec()) + nodes + .iter() + .map(|node| node.dbg_content()) + .collect::>() + .join("\n") ); } } From e9898dc722630779cc3826c39c1a10bf94e67ac7 Mon Sep 17 00:00:00 2001 From: Peter Travers Date: Mon, 24 Nov 2025 14:41:43 -0500 Subject: [PATCH 3/5] fix bad refactor --- src/parse/tree_sitter_parser.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/parse/tree_sitter_parser.rs b/src/parse/tree_sitter_parser.rs index 1d7f3f11d1..080f9be131 100644 --- a/src/parse/tree_sitter_parser.rs +++ b/src/parse/tree_sitter_parser.rs @@ -1894,8 +1894,6 @@ mod tests { use strum::IntoEnumIterator as _; - use crate::parse::syntax::SyntaxTreeDisplay; - use super::*; /// Simple smoke test for tree-sitter parsing. Having a test also From d0620a5321b43ce74205ff5e184064f326922351 Mon Sep 17 00:00:00 2001 From: Peter Travers Date: Mon, 24 Nov 2025 14:43:28 -0500 Subject: [PATCH 4/5] refactor TS -> TEMPLATE_STRING --- src/parse/tree_sitter_parser.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parse/tree_sitter_parser.rs b/src/parse/tree_sitter_parser.rs index 080f9be131..7528c30c28 100644 --- a/src/parse/tree_sitter_parser.rs +++ b/src/parse/tree_sitter_parser.rs @@ -94,7 +94,7 @@ const OCAML_ATOM_NODES: [&str; 6] = [ "attribute_id", ]; -const TS_CSS_INJECTION_QUERY: &str = r#" +const TEMPLATE_STRING_CSS_INJECTION_QUERY: &str = r#" (_ ; Capture the 'callee' or 'left' side. ; We accept any named node here to act as the anchor. @@ -608,7 +608,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_javascript::HIGHLIGHT_QUERY) .unwrap(), sub_languages: vec![TreeSitterSubLanguage { - query: ts::Query::new(&language, TS_CSS_INJECTION_QUERY).unwrap(), + query: ts::Query::new(&language, TEMPLATE_STRING_CSS_INJECTION_QUERY).unwrap(), parse_as: Css, }], } @@ -1074,7 +1074,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")], highlight_query: ts::Query::new(&language, &highlight_query).unwrap(), sub_languages: vec![TreeSitterSubLanguage { - query: ts::Query::new(&language, TS_CSS_INJECTION_QUERY).unwrap(), + query: ts::Query::new(&language, TEMPLATE_STRING_CSS_INJECTION_QUERY).unwrap(), parse_as: Css, }], } @@ -1094,7 +1094,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")], highlight_query: ts::Query::new(&language, &highlight_query).unwrap(), sub_languages: vec![TreeSitterSubLanguage { - query: ts::Query::new(&language, TS_CSS_INJECTION_QUERY).unwrap(), + query: ts::Query::new(&language, TEMPLATE_STRING_CSS_INJECTION_QUERY).unwrap(), parse_as: Css, }], } From 58ce7fbaa700390290ae1b05056b325741ce509f Mon Sep 17 00:00:00 2001 From: Peter Travers Date: Mon, 24 Nov 2025 15:14:03 -0500 Subject: [PATCH 5/5] refactor to less brittle tree sitter points --- src/parse/tree_sitter_parser.rs | 49 +++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/src/parse/tree_sitter_parser.rs b/src/parse/tree_sitter_parser.rs index 7528c30c28..e80ccf724d 100644 --- a/src/parse/tree_sitter_parser.rs +++ b/src/parse/tree_sitter_parser.rs @@ -1216,6 +1216,7 @@ pub(crate) fn parse_subtrees( src: &str, config: &TreeSitterConfig, tree: &tree_sitter::Tree, + nl_pos: &LinePositions, ) -> DftHashMap { let mut subtrees = DftHashMap::default(); @@ -1249,9 +1250,20 @@ pub(crate) fn parse_subtrees( range.end_byte -= 1; // We also need to update start_point and end_point for Tree-sitter to be happy. - // Since we know a backtick is 1 column wide and doesn't span lines: - range.start_point.column += 1; - range.end_point.column -= 1; + // Use line positions to calculate the exact row/col for the new byte offsets. + // this handles cases where the backtick is followed by a newline or + // other complex formatting. + let start_span = nl_pos.from_region(range.start_byte, range.start_byte)[0]; + let end_span = nl_pos.from_region(range.end_byte, range.end_byte)[0]; + + range.start_point = ts::Point { + row: start_span.line.0 as usize, + column: start_span.start_col as usize, + }; + range.end_point = ts::Point { + row: end_span.line.0 as usize, + column: end_span.start_col as usize, + }; } } _ => {} @@ -1482,11 +1494,13 @@ pub(crate) fn to_syntax<'a>( let highlights = tree_highlights(tree, src, config); + // Use line numbers to handle sub-language ranges correctly. + let nl_pos = LinePositions::from(src); + // Parse sub-languages, if any, which will be used both for // highlighting and for more precise Syntax nodes where applicable. - let subtrees = parse_subtrees(src, config, tree); + let subtrees = parse_subtrees(src, config, tree, &nl_pos); - let nl_pos = LinePositions::from(src); let mut cursor = tree.walk(); let mut error_count: usize = 0; @@ -1941,11 +1955,10 @@ mod tests { fn assert_contains_atoms<'a>(nodes: &[&'a Syntax<'a>], expected_sequence: Vec>) { let mut to_search = VecDeque::from(nodes.to_vec()); - let mut expected_iter = expected_sequence.into_iter(); - let mut current_expected = expected_iter.next(); + let mut to_check = VecDeque::from(expected_sequence); while let Some(node) = to_search.pop_front() { - if let Some(expected) = ¤t_expected { + if let Some(expected) = to_check.front() { match node { Syntax::List { children, .. } => { // Extract just the normal atoms from this list to see if they match the line @@ -1959,9 +1972,10 @@ mod tests { // If this list matches the current expected line, advance expectation if !atom_texts.is_empty() && atom_texts == *expected { - current_expected = expected_iter.next(); + to_check.pop_front(); } + // Continue searching children in order (DFS) for child in children.iter().rev() { to_search.push_front(child); } @@ -1974,14 +1988,15 @@ mod tests { } } - if let Some(remaining) = current_expected { + if !to_check.is_empty() { panic!( - "Could not find all atom sequences. \nMissing: {:?}\nDebug Tree:\n{:?}", - remaining, + "Could not find all atom sequences. \nMissing: {:?}\nDebug Tree:\n{}", + to_check, nodes + .to_vec() .iter() .map(|node| node.dbg_content()) - .collect::>() + .collect::>() .join("\n") ); } @@ -2052,8 +2067,12 @@ mod tests { ), // Case 6: Multiline Edge Case ( - "\nconst X = styled.div`\ncolor: white;\n`", - vec![vec!["color", ":", "white", ";"]], + r#" + const Button = styled.button` + color: green; + `; + "#, + vec![vec!["color", ":", "green", ";"]], ), ];