Skip to content

Commit 2759457

Browse files
committed
refactor(services): optimize merge_by_symbol_deps with indexed lookups
Pre-index added symbols by name and extract potential function calls once per file instead of per-symbol, reducing redundant iterations and improving performance for large symbol sets.
1 parent 79d37ed commit 2759457

1 file changed

Lines changed: 53 additions & 34 deletions

File tree

src/services/splitter.rs

Lines changed: 53 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -395,48 +395,67 @@ impl CommitSplitter {
395395
}
396396
}
397397

398-
// Find symbol pairs that indicate dependency between groups:
399-
// A removed symbol in group X and an added symbol with same name in group Y
400-
let mut merge_pairs: Vec<(usize, usize)> = Vec::new();
401-
402-
let removed: Vec<_> = symbols.iter().filter(|s| !s.is_added).collect();
398+
// Pre-index added symbols by name for fast lookup
403399
let added: Vec<_> = symbols.iter().filter(|s| s.is_added).collect();
400+
let mut added_by_name: HashMap<&str, Vec<&CodeSymbol>> = HashMap::new();
401+
for s in &added {
402+
added_by_name.entry(&s.name).or_default().push(s);
403+
}
404404

405-
for rem in &removed {
406-
for add in &added {
407-
if rem.name == add.name
408-
&& rem.kind == add.kind
409-
&& rem.file != add.file
410-
&& let (Some(&g1), Some(&g2)) = (
411-
file_to_group.get(rem.file.as_path()),
412-
file_to_group.get(add.file.as_path()),
413-
)
414-
&& g1 != g2
415-
{
416-
merge_pairs.push((g1.min(g2), g1.max(g2)));
405+
let mut merge_pairs: Vec<(usize, usize)> = Vec::new();
406+
407+
// 1. Merge when a symbol is moved: removed in one group, added in another.
408+
for rem in symbols.iter().filter(|s| !s.is_added) {
409+
if let Some(matches) = added_by_name.get(rem.name.as_str()) {
410+
for add in matches {
411+
if rem.kind == add.kind
412+
&& rem.file != add.file
413+
&& let (Some(&g1), Some(&g2)) = (
414+
file_to_group.get(rem.file.as_path()),
415+
file_to_group.get(add.file.as_path()),
416+
)
417+
&& g1 != g2
418+
{
419+
merge_pairs.push((g1.min(g2), g1.max(g2)));
420+
}
417421
}
418422
}
419423
}
420424

421-
// Also merge when a file's diff adds a line that directly CALLS a new function
422-
// from another group. Only matches `+` lines containing `sym_name(` — much more
423-
// precise than the previous `diff.contains(sym_name)` which caused cascading merges
424-
// from import statements and type references.
425+
// 2. Merge when a file calls a NEW function from another group.
426+
// Optimization: Scan each file's diff ONCE for call-like patterns instead of per-symbol.
425427
for (idx, group) in groups.iter().enumerate() {
426428
for file in group {
427-
for sym in &added {
428-
if let Some(&sym_group) = file_to_group.get(sym.file.as_path())
429-
&& sym_group != idx
430-
{
431-
// Only match added lines (`+`) that contain a function call pattern
432-
let call_pattern = format!("{}(", sym.name);
433-
let has_call = file.diff.lines().any(|line| {
434-
line.starts_with('+')
435-
&& !line.starts_with("+++")
436-
&& line.contains(&call_pattern)
437-
});
438-
if has_call {
439-
merge_pairs.push((idx.min(sym_group), idx.max(sym_group)));
429+
let mut potential_calls = HashSet::new();
430+
431+
for line in file.diff.lines() {
432+
if line.starts_with('+') && !line.starts_with("+++") {
433+
// Extract words followed by '(' as potential function calls
434+
let mut current_word = String::new();
435+
for c in line[1..].chars() {
436+
if c.is_alphanumeric() || c == '_' {
437+
current_word.push(c);
438+
} else if c == '(' {
439+
if !current_word.is_empty() {
440+
potential_calls.insert(current_word.clone());
441+
}
442+
current_word.clear();
443+
} else {
444+
current_word.clear();
445+
}
446+
}
447+
}
448+
}
449+
450+
// Check if any potential calls match an added symbol from another group
451+
for call in &potential_calls {
452+
if let Some(matches) = added_by_name.get(call.as_str()) {
453+
for sym in matches {
454+
if let Some(&sym_group) = file_to_group.get(sym.file.as_path())
455+
&& sym_group != idx
456+
{
457+
merge_pairs.push((idx.min(sym_group), idx.max(sym_group)));
458+
}
440459
}
441460
}
442461
}

0 commit comments

Comments
 (0)