Skip to content

Commit 79d37ed

Browse files
committed
refactor(safety): track accurate line numbers in full diff scanning
Replace simple line counter with hunk header parsing to accurately calculate source line numbers from unified diff format. This ensures secrets detected in full diffs report the correct file line numbers even when processing truncated diffs.
1 parent 332edaa commit 79d37ed

2 files changed

Lines changed: 65 additions & 15 deletions

File tree

src/services/safety.rs

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -252,23 +252,28 @@ pub fn scan_full_diff_for_secrets(full_diff: &str) -> Vec<SecretMatch> {
252252
scan_full_diff_with_patterns(full_diff, &DEFAULT_PATTERNS)
253253
}
254254

255+
static HUNK_HEADER: LazyLock<Regex> =
256+
LazyLock::new(|| Regex::new(r"^@@ -\d+,?\d* \+(\d+),?\d* @@").unwrap());
257+
255258
/// Scan the full unified diff for secrets using the given pattern set.
256259
///
257260
/// This catches secrets that would be missed by `scan_for_secrets` when
258261
/// file diffs are truncated to `max_file_lines`. Parses the raw `git diff`
259-
/// output directly, tracking file paths from diff headers.
262+
/// output directly, tracking file paths from diff headers and calculating
263+
/// accurate source line numbers from hunk headers (`@@ -L,l +R,r @@`).
260264
pub fn scan_full_diff_with_patterns(
261265
full_diff: &str,
262266
patterns: &[SecretPattern],
263267
) -> Vec<SecretMatch> {
264268
let mut found = Vec::new();
265269
let mut current_file = String::new();
266-
let mut line_num: usize = 0;
270+
let mut current_line: Option<usize> = None;
267271

268272
for line in full_diff.lines() {
269273
// Track current file from diff headers
270274
if line.starts_with("diff --git ") {
271-
line_num = 0;
275+
current_file.clear();
276+
current_line = None;
272277
continue;
273278
}
274279

@@ -278,7 +283,7 @@ pub fn scan_full_diff_with_patterns(
278283
}
279284

280285
if line == "+++ /dev/null" {
281-
// Deleted file — keep current_file from --- header
286+
// Deleted file — keep current_file from --- header if we have one
282287
continue;
283288
}
284289

@@ -290,22 +295,41 @@ pub fn scan_full_diff_with_patterns(
290295
continue;
291296
}
292297

293-
line_num += 1;
298+
// Parse hunk header: @@ -1,5 +1,6 @@
299+
if let Some(caps) = HUNK_HEADER.captures(line) {
300+
if let Ok(start) = caps[1].parse::<usize>() {
301+
current_line = Some(start);
302+
continue;
303+
}
304+
}
305+
306+
let Some(ref mut line_num) = current_line else {
307+
continue; // Not inside a hunk
308+
};
294309

295-
// Only check added lines
296-
if !line.starts_with('+') || line.starts_with("+++") {
310+
// Skip diff headers like "index ...", "old mode ...", "--- ...", etc.
311+
// Also skip "No newline at end of file"
312+
if line.starts_with('\\') || line.starts_with("index") || line.starts_with("old mode") {
297313
continue;
298314
}
299315

300-
for pat in patterns {
301-
if pat.regex.is_match(line) {
302-
found.push(SecretMatch {
303-
pattern_name: pat.name.to_string(),
304-
file: current_file.clone(),
305-
line: Some(line_num),
306-
});
307-
break;
316+
// Only check added lines
317+
if line.starts_with('+') && !line.starts_with("+++") {
318+
let content = &line[1..];
319+
for pat in patterns {
320+
if pat.regex.is_match(content) {
321+
found.push(SecretMatch {
322+
pattern_name: pat.name.to_string(),
323+
file: current_file.clone(),
324+
line: Some(*line_num),
325+
});
326+
break;
327+
}
308328
}
329+
*line_num += 1;
330+
} else if line.starts_with(' ') {
331+
// Context line
332+
*line_num += 1;
309333
}
310334
}
311335

tests/safety.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,32 @@ diff --git a/src/b.rs b/src/b.rs
424424
assert_eq!(matches[0].pattern_name, "OpenAI Key");
425425
}
426426

427+
#[test]
428+
fn full_diff_accurate_line_numbers() {
429+
let full_diff = "\
430+
diff --git a/src/main.rs b/src/main.rs
431+
--- a/src/main.rs
432+
+++ b/src/main.rs
433+
@@ -10,5 +10,6 @@
434+
fn old() {}
435+
context line
436+
-removed line
437+
+API_KEY=abcdefghijklmnopqrstuvwxyz1234567890abcdef
438+
+another line
439+
fn new() {}
440+
";
441+
let matches = scan_full_diff_for_secrets(full_diff);
442+
assert_eq!(matches.len(), 1);
443+
// Hunk starts at +10
444+
// Line 10: fn old() {} (this should be the 10th line of the file, starting the hunk)
445+
// Actually, in a diff, context lines also count towards the line number in the new file.
446+
// Line 10: fn old() {}
447+
// Line 11: context line
448+
// (removed line is skipped)
449+
// Line 12: +API_KEY=...
450+
assert_eq!(matches[0].line, Some(12));
451+
}
452+
427453
// ─── New pattern detection tests ──────────────────────────────────────────────
428454

429455
#[test]

0 commit comments

Comments
 (0)