diff --git a/src/parse/tree_sitter_parser.rs b/src/parse/tree_sitter_parser.rs index ffb2046c05..e80ccf724d 100644 --- a/src/parse/tree_sitter_parser.rs +++ b/src/parse/tree_sitter_parser.rs @@ -94,6 +94,23 @@ const OCAML_ATOM_NODES: [&str; 6] = [ "attribute_id", ]; +const TEMPLATE_STRING_CSS_INJECTION_QUERY: &str = r#" +(_ + ; Capture the 'callee' or 'left' side. + ; We accept any named node here to act as the anchor. + (_) @callee + + ; Capture the template string contents. + (template_string) @contents + + ; Predicate: The 'callee' text must start with 'styled' or 'css'. + ; This matches "styled.div", "styled(C)", and even "styled.div<{}>" + ; (which might be parsed as a binary expression 'styled.div < {}' or + ; a call expression depending on the grammar version). + (#match? @callee "^(styled|css)") +) +"#; + pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { use guess::Language::*; match language { @@ -590,7 +607,10 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ], highlight_query: ts::Query::new(&language, tree_sitter_javascript::HIGHLIGHT_QUERY) .unwrap(), - sub_languages: vec![], + sub_languages: vec![TreeSitterSubLanguage { + query: ts::Query::new(&language, TEMPLATE_STRING_CSS_INJECTION_QUERY).unwrap(), + parse_as: Css, + }], } } Json => { @@ -1053,7 +1073,10 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { atom_nodes: ["string", "template_string"].into_iter().collect(), delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")], highlight_query: ts::Query::new(&language, &highlight_query).unwrap(), - sub_languages: vec![], + sub_languages: vec![TreeSitterSubLanguage { + query: ts::Query::new(&language, TEMPLATE_STRING_CSS_INJECTION_QUERY).unwrap(), + parse_as: Css, + }], } } TypeScript => { @@ -1070,7 +1093,10 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { .collect(), delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")], highlight_query: ts::Query::new(&language, &highlight_query).unwrap(), - sub_languages: vec![], + sub_languages: vec![TreeSitterSubLanguage { + query: ts::Query::new(&language, TEMPLATE_STRING_CSS_INJECTION_QUERY).unwrap(), + parse_as: Css, + }], } } Xml => { @@ -1190,6 +1216,7 @@ pub(crate) fn parse_subtrees( src: &str, config: &TreeSitterConfig, tree: &tree_sitter::Tree, + nl_pos: &LinePositions, ) -> DftHashMap { let mut subtrees = DftHashMap::default(); @@ -1198,8 +1225,50 @@ pub(crate) fn parse_subtrees( let mut query_matches = query_cursor.matches(&language.query, tree.root_node(), src.as_bytes()); + let content_idx = language + .query + .capture_index_for_name("contents") + .unwrap_or(0); + while let Some(m) = query_matches.next() { - let node = m.nodes_for_capture_index(0).next().unwrap(); + let node = match m.nodes_for_capture_index(content_idx).next() { + None => continue, + Some(node) => node, + }; + + let mut range = node.range(); + match (language.parse_as, node.grammar_name()) { + (guess::Language::Css, "template_string") => { + // If this is a template string (starts/ends with backtick), shrink the range. + // We check the text to be safe, or just assume based on the node kind. + let node_text = &src[node.start_byte()..node.end_byte()]; + if node_text.starts_with('`') + && node_text.ends_with('`') + && node_text.len() >= 2 + { + range.start_byte += 1; + range.end_byte -= 1; + + // We also need to update start_point and end_point for Tree-sitter to be happy. + // Use line positions to calculate the exact row/col for the new byte offsets. + // this handles cases where the backtick is followed by a newline or + // other complex formatting. + let start_span = nl_pos.from_region(range.start_byte, range.start_byte)[0]; + let end_span = nl_pos.from_region(range.end_byte, range.end_byte)[0]; + + range.start_point = ts::Point { + row: start_span.line.0 as usize, + column: start_span.start_col as usize, + }; + range.end_point = ts::Point { + row: end_span.line.0 as usize, + column: end_span.start_col as usize, + }; + } + } + _ => {} + }; + if node.byte_range().is_empty() { continue; } @@ -1210,7 +1279,7 @@ pub(crate) fn parse_subtrees( .set_language(&subconfig.language) .expect("Incompatible tree-sitter version"); parser - .set_included_ranges(&[node.range()]) + .set_included_ranges(&[range]) .expect("Incompatible tree-sitter version"); let tree = parser.parse(src, None).unwrap(); @@ -1425,11 +1494,13 @@ pub(crate) fn to_syntax<'a>( let highlights = tree_highlights(tree, src, config); + // Use line numbers to handle sub-language ranges correctly. + let nl_pos = LinePositions::from(src); + // Parse sub-languages, if any, which will be used both for // highlighting and for more precise Syntax nodes where applicable. - let subtrees = parse_subtrees(src, config, tree); + let subtrees = parse_subtrees(src, config, tree, &nl_pos); - let nl_pos = LinePositions::from(src); let mut cursor = tree.walk(); let mut error_count: usize = 0; @@ -1833,6 +1904,8 @@ fn atom_from_cursor<'a>( #[cfg(test)] mod tests { + use std::collections::VecDeque; + use strum::IntoEnumIterator as _; use super::*; @@ -1880,6 +1953,137 @@ mod tests { }; } + fn assert_contains_atoms<'a>(nodes: &[&'a Syntax<'a>], expected_sequence: Vec>) { + let mut to_search = VecDeque::from(nodes.to_vec()); + let mut to_check = VecDeque::from(expected_sequence); + + while let Some(node) = to_search.pop_front() { + if let Some(expected) = to_check.front() { + match node { + Syntax::List { children, .. } => { + // Extract just the normal atoms from this list to see if they match the line + let atom_texts: Vec<&str> = children + .iter() + .filter_map(|child| match child { + Syntax::Atom { content, .. } => Some(content.as_str()), + _ => None, + }) + .collect(); + + // If this list matches the current expected line, advance expectation + if !atom_texts.is_empty() && atom_texts == *expected { + to_check.pop_front(); + } + + // Continue searching children in order (DFS) + for child in children.iter().rev() { + to_search.push_front(child); + } + } + _ => {} + } + } else { + // All expectations met + return; + } + } + + if !to_check.is_empty() { + panic!( + "Could not find all atom sequences. \nMissing: {:?}\nDebug Tree:\n{}", + to_check, + nodes + .to_vec() + .iter() + .map(|node| node.dbg_content()) + .collect::>() + .join("\n") + ); + } + } + + #[test] + fn test_typescript_css_injection_table() { + let arena = Arena::new(); + let configs = vec![ + from_language(guess::Language::TypeScript), + from_language(guess::Language::TypeScriptTsx), + from_language(guess::Language::JavaScript), + from_language(guess::Language::JavascriptJsx), + ]; + + let cases = vec![ + // Case 1: Standard styled.button + ( + r#" + const Button = styled.button` + background: #BF4F74; + border-radius: 3px; + border: none; + color: white; + ` + "#, + vec![ + vec!["background", ":", "#BF4F74", ";"], + vec!["border-radius", ":", "3px", ";"], + vec!["border", ":", "none", ";"], + vec!["color", ":", "white", ";"], + ], + ), + // Case 2: Component wrapping styled(C) + ( + r#" + const Button = styled(OtherButton)` + color: white; + ` + "#, + vec![vec!["color", ":", "white", ";"]], + ), + // Case 3: Helper css`...` + ( + r#" + const Button = css` + color: white; + ` + "#, + vec![vec!["color", ":", "white", ";"]], + ), + // Case 4: Nested Interpolation + ( + r#" + const Button = styled.button` + color: white; + ${props => props.$withSeparator && css`padding-top: 22px;`} + ` + "#, + vec![vec!["color", ":", "white", ";"]], + ), + // Case 5: Generics + ( + r#" + export const Button = styled.button<{}>` color: white; `; + "#, + vec![vec!["color", ":", "white", ";"]], + ), + // Case 6: Multiline Edge Case + ( + r#" + const Button = styled.button` + color: green; + `; + "#, + vec![vec!["color", ":", "green", ";"]], + ), + ]; + + for config in configs { + for (src, expected_atoms) in cases.iter() { + let nodes = parse(&arena, src, &config, false); + assert_contains_atoms(&nodes, expected_atoms.clone()); + } + } + } + /// Ensure that we don't crash when loading any of the /// configs. This can happen on bad highlighting/foo.scm files. #[test]