nubster-opensources · pierrick-fonquerne · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/tools/ai-review/src/github.rs b/tools/ai-review/src/github.rs
@@ -20,10 +20,14 @@ pub struct InlineComment {
 /// finding has no per-file patch (file-level or cross-file findings).
 const PATCH_FALLBACK_CHARS: usize = 8_000;
 
+/// Number of head-file lines shown on each side of a finding's line to a lens.
+const LENS_WINDOW_LINES: u32 = 40;
+
 /// The PR diff in two shapes: a concatenated view and a per-file patch map.
 pub struct DiffContext {
     pub full: String,
     pub by_file: HashMap<String, String>,
+    pub head_files: HashMap<String, String>,
     pub file_count: usize,
 }
 
@@ -38,6 +42,26 @@ impl DiffContext {
         }
     }
 
+    /// Returns the context handed to a lens for `finding`: the file patch plus,
+    /// when the head file is available and the finding is line-located, a
+    /// numbered window of the real file around that line. Falls back to the
+    /// patch alone for file-level findings or files without head content.
+    #[must_use]
+    pub fn lens_context(&self, finding: &SynthFinding) -> String {
+        let patch = self.patch_for(finding).to_string();
+        if finding.line == 0 {
+            return patch;
+        }
+        let Some(head) = self.head_files.get(&finding.file) else {
+            return patch;
+        };
+        let window = head_window(head, finding.line, LENS_WINDOW_LINES);
+        if window.is_empty() {
+            return patch;
+        }
+        format!("{patch}\n\nFull-file context around the finding (head):\n{window}")
+    }
+
     /// Reports whether the finding's line falls inside a hunk of its file
     /// patch: `Some(true)` when it does, `Some(false)` when the patch is
     /// known and the line belongs to no hunk, and `None` when the question
@@ -96,6 +120,24 @@ fn parse_hunk_header(line: &str) -> Option<(u32, u32)> {
     }
 }
 
+/// Returns the lines of `content` within `radius` of `line` (1-based), each
+/// prefixed by its line number, joined by newlines. Empty when out of range.
+fn head_window(content: &str, line: u32, radius: u32) -> String {
+    let line = usize::try_from(line).unwrap_or(usize::MAX);
+    let radius = usize::try_from(radius).unwrap_or(usize::MAX);
+    let start = line.saturating_sub(radius).max(1);
+    let end = line.saturating_add(radius);
+    content
+        .lines()
+        .enumerate()
+        .filter_map(|(idx, text)| {
+            let number = idx + 1;
+            (number >= start && number <= end).then(|| format!("{number:>5} {text}"))
+        })
+        .collect::<Vec<_>>()
+        .join("\n")
+}
+
 /// Truncates `s` to at most `max` bytes without splitting a UTF-8 sequence.
 fn safe_truncate(s: &str, max: usize) -> &str {
     if s.len() <= max {
@@ -137,6 +179,7 @@ pub async fn fetch_diff_context(
     Ok(DiffContext {
         full,
         by_file,
+        head_files: HashMap::new(),
         file_count,
     })
 }
@@ -184,6 +227,61 @@ pub async fn fetch_head_sha(
     Ok(pr.head.sha)
 }
 
+/// Fetches the decoded text content of `path` at commit `sha`. Returns
+/// `Ok(None)` when the content cannot be decoded as UTF-8 text (binary file or
+/// submodule). A missing path surfaces as an `Err`; callers that want a missing
+/// file to be a no-op should treat the error as a skip.
+pub async fn fetch_head_file(
+    octo: &Octocrab,
+    owner: &str,
+    repo: &str,
+    path: &str,
+    sha: &str,
+) -> anyhow::Result<Option<String>> {
+    let contents = octo
+        .repos(owner, repo)
+        .get_content()
+        .path(path)
+        .r#ref(sha)
+        .send()
+        .await
+        .context("failed to fetch head file content")?;
+    Ok(contents
+        .items
+        .into_iter()
+        .next()
+        .and_then(|item| item.decoded_content()))
+}
+
+/// Fills `ctx.head_files` with the head content of every distinct file named by
+/// a line-located finding that also has a per-file patch. Failures degrade
+/// gracefully: a missing or unreadable file is simply skipped.
+pub async fn populate_head_files(
+    octo: &Octocrab,
+    owner: &str,
+    repo: &str,
+    sha: &str,
+    findings: &[SynthFinding],
+    ctx: &mut DiffContext,
+) {
+    let mut wanted: Vec<String> = findings
+        .iter()
+        .filter(|f| f.line > 0 && ctx.by_file.contains_key(&f.file))
+        .map(|f| f.file.clone())
+        .collect();
+    wanted.sort();
+    wanted.dedup();
+    for path in wanted {
+        match fetch_head_file(octo, owner, repo, &path, sha).await {
+            Ok(Some(content)) => {
+                ctx.head_files.insert(path, content);
+            }
+            Ok(None) => {}
+            Err(error) => eprintln!("warning: could not fetch head file {path}: {error}"),
+        }
+    }
+}
+
 /// Upsert the global bot comment (edit if marker found, create otherwise).
 pub async fn upsert_global_comment(
     octo: &Octocrab,
@@ -336,6 +434,7 @@ mod tests {
         DiffContext {
             full: format!("--- {file}\n{patch}\n"),
             by_file,
+            head_files: HashMap::new(),
             file_count: 1,
         }
     }
@@ -368,6 +467,52 @@ mod tests {
         assert_eq!((hunks[1].new_start, hunks[1].new_count), (11, 4));
     }
 
+    const HEAD_FILE: &str = "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8";
+
+    #[test]
+    fn head_window_returns_numbered_lines_around_target() {
+        let window = head_window(HEAD_FILE, 4, 2);
+        assert!(window.contains("    2 line2"));
+        assert!(window.contains("    6 line6"));
+        assert!(!window.contains("line1"));
+        assert!(!window.contains("line7"));
+    }
+
+    #[test]
+    fn head_window_clamps_at_file_bounds() {
+        let window = head_window(HEAD_FILE, 1, 2);
+        assert!(window.contains("    1 line1"));
+        assert!(window.contains("    3 line3"));
+        assert!(!window.contains("line4"));
+    }
+
+    #[test]
+    fn lens_context_appends_head_window_when_available() {
+        let mut ctx = ctx_with("a.rs", "@@ -1,2 +1,3 @@\n+changed");
+        ctx.head_files
+            .insert("a.rs".to_string(), HEAD_FILE.to_string());
+        let out = ctx.lens_context(&finding_at("a.rs", 4));
+        assert!(out.contains("+changed"));
+        assert!(out.contains("Full-file context"));
+        assert!(out.contains("line4"));
+    }
+
+    #[test]
+    fn lens_context_falls_back_to_patch_without_head_or_line() {
+        let ctx = ctx_with("a.rs", "@@ -1,2 +1,3 @@\n+changed");
+        assert_eq!(
+            ctx.lens_context(&finding_at("a.rs", 4)),
+            ctx.patch_for(&finding_at("a.rs", 4))
+        );
+        let mut ctx2 = ctx_with("a.rs", "@@ -1,2 +1,3 @@\n+changed");
+        ctx2.head_files
+            .insert("a.rs".to_string(), HEAD_FILE.to_string());
+        assert_eq!(
+            ctx2.lens_context(&finding_at("a.rs", 0)),
+            ctx2.patch_for(&finding_at("a.rs", 0))
+        );
+    }
+
     #[test]
     fn line_in_patch_reports_hunk_membership() {
         let ctx = ctx_with("src/lib.rs", TWO_HUNK_PATCH);
@@ -381,4 +526,14 @@ mod tests {
         assert_eq!(ctx.line_in_patch(&finding_at("src/lib.rs", 0)), None);
         assert_eq!(ctx.line_in_patch(&finding_at("unknown.rs", 3)), None);
     }
+
+    #[test]
+    fn lens_context_falls_back_when_line_beyond_eof() {
+        assert_eq!(head_window(HEAD_FILE, 1000, 40), "");
+        let mut ctx = ctx_with("a.rs", "@@ -1,2 +1,3 @@\n+changed");
+        ctx.head_files
+            .insert("a.rs".to_string(), HEAD_FILE.to_string());
+        let finding = finding_at("a.rs", 1000);
+        assert_eq!(ctx.lens_context(&finding), ctx.patch_for(&finding));
+    }
 }
diff --git a/tools/ai-review/src/mistral.rs b/tools/ai-review/src/mistral.rs
@@ -111,43 +111,46 @@ Return valid JSON only, no markdown fences."#
 }
 
 fn security_system_prompt() -> String {
-    r#"You are a security engineer auditing a pull request for Nubster, a sovereign hybrid DevOps platform. Focus exclusively on security vulnerabilities.
-
-## Nubster security context
-- Identity provider handling OIDC/OAuth2/SAML/SCIM — auth/authz bugs are critical
-- Multi-tenant: data isolation between organizations is mandatory (tenant data leakage = critical)
-- Sovereign platform: EU data residency enforced, no data exfiltration
-- Secrets: GITHUB_TOKEN, MISTRAL_API_KEY, DB credentials — must never appear in logs or error messages
-
-## Security checklist
-1. Injection: SQL injection, command injection (std::process::Command with user input), path traversal, SSRF
-2. Auth/Authz: missing permission checks, privilege escalation, insecure token handling, JWT alg confusion
-3. Secrets exposure: hardcoded credentials, secrets in logs, secrets in error messages or debug output
-4. Cryptography: weak algorithms (MD5/SHA1 for integrity), IV reuse, predictable PRNG for security-sensitive values
-5. Race conditions: TOCTOU vulnerabilities, concurrent state mutation without synchronization
-6. Integer handling: overflow in security-sensitive arithmetic (use checked_add, saturating_add)
-7. Rust-specific: unsafe blocks with justification missing, transmute misuse, raw pointer lifetime issues
-8. Input validation: missing bounds checks on external inputs, deserializing untrusted data without limits
-9. Multi-tenancy: missing tenant scoping in queries, cross-tenant data access
-10. Data leakage: PII in logs, internal paths/structure in error responses returned to clients
+    r#"You are a security engineer auditing a pull request diff. Focus exclusively on security vulnerabilities introduced by THIS diff.
+
+## Scope discipline (read first)
+- Judge ONLY lines this diff adds or changes (added lines start with `+`). Unchanged context lines are background, never a finding on their own.
+- Every finding MUST quote the exact added line it is about. If you cannot point to an added line, do not report it.
+- A secret injected through CI configuration (for example a `${{ secrets.NAME }}` reference passed to a step) is the EXPECTED, correct way to use a secret. It is only a finding if that value is echoed, logged, written to output, or persisted in cleartext on an added line.
+- Permissions, scopes, or configuration on lines this diff does not modify are out of scope.
+
+## Common false positives to NOT report
+- A secret reference used as an action input or environment variable (expected usage).
+- Broad permissions or settings that already existed and are untouched by this diff.
+- Theoretical issues with no exploit path visible in the added lines.
+- Anything the compiler or Clippy (pedantic, deny warnings) already enforces.
+
+## What IS a finding (only on added/changed lines)
+1. Injection: SQL, command (process spawn with attacker-controlled input), path traversal, SSRF.
+2. AuthN/AuthZ: missing permission check, privilege escalation, broken token validation, JWT alg confusion.
+3. Secret leakage: a secret value newly written to a log, error message, output, or response.
+4. Cryptography: weak primitive for integrity, IV/nonce reuse, predictable randomness for security values.
+5. Concurrency: TOCTOU, unsynchronised shared-state mutation with a security impact.
+6. Untrusted input: missing bounds or size limits when deserialising external data.
+7. Multi-tenancy: a new query or path missing tenant/scope isolation.
 
 Respond ONLY with a valid JSON object:
 {
-  "summary": "Overall security posture of this PR in 2-4 sentences.",
+  "summary": "Overall security posture of this diff in 2-4 sentences.",
   "strengths": ["security practice done well"],
   "findings": [
     {
       "file": "exact/path/to/file.rs",
       "line": 42,
       "severity": "critical",
-      "message": "Vulnerability class + how it could be exploited + recommended fix, in one sentence."
+      "message": "Quote the added line, name the vulnerability class, and the fix, in one sentence."
     }
   ],
-  "security": "SECURE | CONCERNS | CRITICAL_ISSUES — with one sentence justification."
+  "security": "SECURE | CONCERNS | CRITICAL_ISSUES - with one sentence justification."
 }
 
-severity: "critical" = exploitable vulnerability; "minor" = hardening suggestion.
-line: exact line, 0 for file-level. Return valid JSON only, no markdown fences."#
+severity: "critical" = exploitable vulnerability on an added line; "minor" = hardening suggestion on an added line.
+line: exact line number of the added line, 0 only for a genuinely file-level concern. Return valid JSON only, no markdown fences."#
         .to_string()
 }