diff --git a/README.md b/README.md index 4d27e28..4972078 100644 --- a/README.md +++ b/README.md @@ -6,3 +6,52 @@ Organization-level tooling for nubster-opensources. Rust binary used in reusable GitHub Actions workflows to review pull requests and generate PR descriptions using the Mistral API. + +### Modes + +The mode is selected with the `AI_MODE` environment variable, which the +reusable workflow forwards from its `mode` input. + +| Mode | What it does | Model | +| --- | --- | --- | +| `review` | General code review (bugs, logic, security summary) | codestral | +| `security` | Security-focused audit | codestral | +| `architecture` | Architecture and design review | codestral | +| `performance` | Performance review | codestral | +| `product` | Product, compliance and developer-experience review | mistral-small | +| `describe` | Fills an empty PR description from the diff | mistral-small | +| `team` | Multi-agent review: four specialist agents run in parallel, a synthesis step merges and deduplicates their findings, then every finding is checked by a three-lens adversarial vote before a deterministic verdict | codestral + mistral-large | + +### Calling the reusable workflow + +Default per-PR review: + +```yaml +jobs: + review: + uses: nubster-opensources/.github/.github/workflows/ai-review.yml@main + with: + pr-number: ${{ github.event.pull_request.number }} + mode: review + secrets: + mistral-api-key: ${{ secrets.MISTRAL_API_KEY }} +``` + +Team mode is heavier, so trigger it on demand by adding the `ai:team` label to a +pull request: + +```yaml +on: + pull_request: + types: [opened, synchronize, reopened, labeled] + +jobs: + team: + if: contains(github.event.pull_request.labels.*.name, 'ai:team') + uses: nubster-opensources/.github/.github/workflows/ai-review.yml@main + with: + pr-number: ${{ github.event.pull_request.number }} + mode: team + secrets: + mistral-api-key: ${{ secrets.MISTRAL_API_KEY }} +``` diff --git a/tools/ai-review/src/github.rs b/tools/ai-review/src/github.rs index 16ed15c..43a41e8 100644 --- a/tools/ai-review/src/github.rs +++ b/tools/ai-review/src/github.rs @@ -1,11 +1,13 @@ #![allow(clippy::missing_errors_doc)] +use std::collections::HashMap; use std::fmt::Write as _; use anyhow::Context; use octocrab::Octocrab; use crate::review::has_bot_marker; +use crate::types::SynthFinding; /// An inline comment to post on a specific file line. pub struct InlineComment { @@ -14,31 +16,83 @@ pub struct InlineComment { pub body: String, } -/// Returns (concatenated diff as string, number of changed files). -pub async fn fetch_diff( +/// Maximum bytes of the full diff handed to a lens as fallback context when a +/// finding has no per-file patch (file-level or cross-file findings). +const PATCH_FALLBACK_CHARS: usize = 8_000; + +/// The PR diff in two shapes: a concatenated view and a per-file patch map. +pub struct DiffContext { + pub full: String, + pub by_file: HashMap, + pub file_count: usize, +} + +impl DiffContext { + /// Returns the patch to hand to a lens for `finding`, falling back to a + /// truncated view of the full diff for file-level or cross-file findings. + #[must_use] + pub fn patch_for(&self, finding: &SynthFinding) -> &str { + match self.by_file.get(&finding.file) { + Some(patch) if !patch.is_empty() => patch.as_str(), + _ => safe_truncate(&self.full, PATCH_FALLBACK_CHARS), + } + } +} + +/// Truncates `s` to at most `max` bytes without splitting a UTF-8 sequence. +fn safe_truncate(s: &str, max: usize) -> &str { + if s.len() <= max { + return s; + } + let mut end = max; + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + &s[..end] +} + +/// Returns the PR diff as a [`DiffContext`] (concatenated view plus per-file map). +pub async fn fetch_diff_context( octo: &Octocrab, owner: &str, repo: &str, pr_number: u64, -) -> anyhow::Result<(String, usize)> { +) -> anyhow::Result { let files = octo .pulls(owner, repo) .list_files(pr_number) .await .context("failed to list PR files")?; - let count = files.items.len(); - let mut diff = String::new(); + let file_count = files.items.len(); + let mut full = String::new(); + let mut by_file = HashMap::with_capacity(file_count); for file in &files.items { - writeln!(diff, "--- {}", file.filename).unwrap(); + writeln!(full, "--- {}", file.filename).unwrap(); if let Some(patch) = &file.patch { - diff.push_str(patch); + full.push_str(patch); + by_file.insert(file.filename.clone(), patch.clone()); } - diff.push('\n'); + full.push('\n'); } - Ok((diff, count)) + Ok(DiffContext { + full, + by_file, + file_count, + }) +} + +/// Returns (concatenated diff as string, number of changed files). +pub async fn fetch_diff( + octo: &Octocrab, + owner: &str, + repo: &str, + pr_number: u64, +) -> anyhow::Result<(String, usize)> { + let ctx = fetch_diff_context(octo, owner, repo, pr_number).await?; + Ok((ctx.full, ctx.file_count)) } /// Returns the current PR body (empty string if None). diff --git a/tools/ai-review/src/main.rs b/tools/ai-review/src/main.rs index 3219008..f97ef98 100644 --- a/tools/ai-review/src/main.rs +++ b/tools/ai-review/src/main.rs @@ -1,6 +1,7 @@ mod github; mod mistral; mod review; +mod team; mod types; use anyhow::Context; @@ -54,6 +55,9 @@ async fn main() -> anyhow::Result<()> { Mode::Describe => { run_describe(&clients, owner, repo, pr_number).await?; } + Mode::Team => { + team::run_team(&clients, owner, repo, pr_number).await?; + } } Ok(()) diff --git a/tools/ai-review/src/mistral.rs b/tools/ai-review/src/mistral.rs index 2eb6890..3e861b4 100644 --- a/tools/ai-review/src/mistral.rs +++ b/tools/ai-review/src/mistral.rs @@ -1,13 +1,22 @@ #![allow(clippy::missing_errors_doc)] +use std::fmt::Write as _; + use anyhow::Context; use serde::{Deserialize, Serialize}; -use crate::types::ReviewResponse; +use crate::types::{Agent, Lens, LensVerdict, ReviewResponse, Severity, SynthReport}; const API_URL: &str = "https://api.mistral.ai/v1/chat/completions"; const MAX_DIFF_CHARS: usize = 20_000; +/// Model used by the four specialist agents in team mode. +pub(crate) const TEAM_AGENT_MODEL: &str = "codestral-latest"; +/// Model used by the synthesis step in team mode (stronger cross-report reasoning). +pub(crate) const TEAM_SYNTH_MODEL: &str = "mistral-large-latest"; +/// Model used by the adversarial lenses in team mode. +pub(crate) const TEAM_LENS_MODEL: &str = "codestral-latest"; + #[derive(Serialize)] struct ChatRequest { model: String, @@ -426,6 +435,172 @@ pub async fn call_describe( Ok(parsed.body) } +fn synthesis_system_prompt() -> String { + r#"You are the lead reviewer for Nubster, a sovereign hybrid DevOps platform (Rust/.NET/TypeScript). Four specialist agents (correctness, security, architecture, performance) have each independently reviewed the SAME pull request. Your job is to MERGE their reports into one deduplicated review, not to add new findings. + +## Your tasks +1. Deduplicate: when several agents report the same underlying issue (even if worded differently or on nearby lines), merge them into ONE finding. +2. Attribute: record every contributing agent in "sources", e.g. ["security","correctness"]. +3. Preserve signal: when merging duplicates, keep the HIGHEST severity. Never drop a critical. +4. Stay grounded: do NOT invent findings that no agent raised. You may only merge and rewrite for clarity. +5. Cut the noise: drop findings that are purely stylistic, cosmetic, or already enforced by Clippy or the compiler (formatting, naming, import ordering, unused warnings). Nubster runs Clippy pedantic with -D warnings in CI, so these add no value. +6. Anchor to code: every "message" MUST name the concrete code element involved (a function, type, variable, or call) so it can be verified against the diff. +7. Categorise: tag each finding with a "category" from ["bug","security","design","performance","test-gap"]. +8. Surface disagreement: if two agents contradict each other on the same point, state that explicitly in the message instead of silently choosing a side. +9. Summarise: write a 3-5 sentence executive summary of what the PR does and its overall quality, and list concrete strengths. +10. Do NOT emit a verdict or recommendation — that is computed deterministically downstream. + +Respond ONLY with a valid JSON object: +{ + "executive_summary": "3-5 sentences on what the PR does and its overall quality.", + "strengths": ["specific strength 1", "specific strength 2"], + "findings": [ + { + "file": "exact/path/from/the/reports.rs", + "line": 42, + "severity": "critical", + "category": "security", + "message": "Names the concrete code element and the issue in one sentence.", + "sources": ["security", "correctness"] + } + ] +} + +Rules: severity MUST be "critical" or "minor". category MUST be one of ["bug","security","design","performance","test-gap"]. line MUST be the line from the reports, or 0 for a file-level concern. sources MUST be a non-empty subset of ["correctness","security","architecture","performance"]. Return valid JSON only, no markdown fences."# + .to_string() +} + +const LENS_OUTPUT_SPEC: &str = r#"Respond ONLY with a valid JSON object: +{ "contested": true, "reason": "one-sentence justification grounded in the shown code" } + +"contested": true means the finding should NOT be trusted and acted on as-is. +"contested": false means the shown code confirms a genuine, in-scope issue. +When you are uncertain, set "contested": true. Return valid JSON only, no markdown fences."#; + +fn lens_system_prompt(lens: Lens) -> String { + let intro = match lens { + Lens::CodeConfirms => "You are a skeptical code verifier reviewing a pull request for Nubster. You are given a single review finding and the patch of the file it refers to. Decide whether the shown code UNAMBIGUOUSLY confirms the problem: you must be able to point to the exact changed lines that exhibit it. If the patch does not clearly prove the claim, set contested = true.", + Lens::RealImpact => "You are a skeptical impact assessor reviewing a pull request for Nubster. You are given a single review finding and the patch of the file it refers to. Decide whether the finding has REAL, observable impact: a reproducible bug, an exploitable vulnerability, or a measurable regression. If the concern is purely theoretical, stylistic, cosmetic, or already caught by the compiler or Clippy, set contested = true.", + Lens::FalsePositive => "You are a skeptical false-positive hunter reviewing a pull request for Nubster. You are given a single review finding and the patch of the file it refers to. Decide whether this is a classic false positive: already handled elsewhere, out of scope of this diff, intentional by design, or something the compiler or Clippy already enforces. If it looks like a false positive, set contested = true.", + }; + format!("{intro}\n\n{LENS_OUTPUT_SPEC}") +} + +fn agent_system_prompt(agent: Agent) -> String { + match agent { + Agent::Correctness => review_system_prompt(), + Agent::Security => security_system_prompt(), + Agent::Architecture => architecture_system_prompt(), + Agent::Performance => performance_system_prompt(), + } +} + +fn render_reports_for_synthesis(reports: &[(Agent, ReviewResponse)]) -> String { + let mut out = String::new(); + for (agent, report) in reports { + let _ = writeln!(out, "## {} agent", agent.label()); + let _ = writeln!(out, "Summary: {}", report.summary); + if !report.findings.is_empty() { + out.push_str("Findings:\n"); + for f in &report.findings { + let sev = match f.severity { + Severity::Critical => "critical", + Severity::Minor => "minor", + }; + let _ = writeln!(out, "- [{sev}] {}:{} {}", f.file, f.line, f.message); + } + } + out.push('\n'); + } + out +} + +/// Runs one specialist agent over the diff. Returns `(ReviewResponse, truncated)`. +pub async fn call_agent( + client: &reqwest::Client, + api_key: &str, + agent: Agent, + diff: &str, +) -> anyhow::Result<(ReviewResponse, bool)> { + call_analysis_mode( + client, + api_key, + TEAM_AGENT_MODEL, + agent_system_prompt(agent), + diff, + ) + .await +} + +/// Merges the specialist agent reports into a single deduplicated [`SynthReport`]. +pub async fn call_synthesis( + client: &reqwest::Client, + api_key: &str, + diff: &str, + reports: &[(Agent, ReviewResponse)], +) -> anyhow::Result { + let (content, _) = truncate_diff(diff); + let agents_block = render_reports_for_synthesis(reports); + + let request = ChatRequest { + model: TEAM_SYNTH_MODEL.to_string(), + messages: vec![ + Message { + role: "system".to_string(), + content: synthesis_system_prompt(), + }, + Message { + role: "user".to_string(), + content: format!( + "Specialist agent reports for a pull request:\n\n{agents_block}\nThe PR diff under review:\n\n{content}" + ), + }, + ], + response_format: ResponseFormat { + kind: "json_object", + }, + temperature: 0.2, + }; + + let raw = send_request(client, api_key, &request).await?; + serde_json::from_str(&raw).context("failed to parse Mistral synthesis response") +} + +/// Runs one adversarial lens over a single finding, given the file's patch. +pub async fn call_lens( + client: &reqwest::Client, + api_key: &str, + lens: Lens, + file: &str, + message: &str, + patch: &str, +) -> anyhow::Result { + let (patch_content, _) = truncate_diff(patch); + + let request = ChatRequest { + model: TEAM_LENS_MODEL.to_string(), + messages: vec![ + Message { + role: "system".to_string(), + content: lens_system_prompt(lens), + }, + Message { + role: "user".to_string(), + content: format!( + "Finding to scrutinise:\nFile: {file}\nClaim: {message}\n\nCode under review (patch of {file}):\n\n{patch_content}" + ), + }, + ], + response_format: ResponseFormat { + kind: "json_object", + }, + temperature: 0.0, + }; + + let raw = send_request(client, api_key, &request).await?; + serde_json::from_str(&raw).context("failed to parse Mistral lens response") +} + fn truncate_diff(diff: &str) -> (&str, bool) { if diff.len() <= MAX_DIFF_CHARS { (diff, false) diff --git a/tools/ai-review/src/review.rs b/tools/ai-review/src/review.rs index 990c376..001a4b2 100644 --- a/tools/ai-review/src/review.rs +++ b/tools/ai-review/src/review.rs @@ -1,6 +1,8 @@ use std::fmt::Write as _; -use crate::types::{Finding, ReviewResponse, Severity}; +use crate::types::{ + Agent, Finding, FindingVerdict, ReviewResponse, Severity, SynthFinding, Verdict, +}; /// Findings that become inline PR comments: critical severity AND specific line. pub fn inline_findings(response: &ReviewResponse) -> Vec<&Finding> { @@ -67,6 +69,122 @@ pub fn has_bot_marker(body: &str, marker: &str) -> bool { body.contains(marker) } +/// Input bundle for [`render_team_comment`], grouped to keep the argument count low. +pub struct TeamCommentView<'a> { + pub executive_summary: &'a str, + pub strengths: &'a [String], + pub scored: &'a [(SynthFinding, FindingVerdict)], + pub verdict: Verdict, + pub file_count: usize, + pub agents_ok: &'a [Agent], + pub agents_failed: &'a [Agent], + pub raw_count: usize, + pub dedup_count: usize, + pub capped: usize, + pub model: &'a str, +} + +/// Renders the team-mode global comment (with hidden upsert marker). +#[must_use] +pub fn render_team_comment(view: &TeamCommentView) -> String { + let mut md = "\n## Team Review IA\n\n".to_string(); + + let verdict_line = match view.verdict { + Verdict::Ship => "**Verdict : SHIP ✅**", + Verdict::NeedsWork => "**Verdict : NEEDS_WORK ⚠️**", + Verdict::Discuss => "**Verdict : DISCUSS 💬**", + }; + writeln!(md, "{verdict_line}\n").unwrap(); + writeln!(md, "### Vue d'ensemble\n{}\n", view.executive_summary).unwrap(); + + if !view.strengths.is_empty() { + md.push_str("### Points forts\n"); + for s in view.strengths { + writeln!(md, "- {s}").unwrap(); + } + md.push('\n'); + } + + let confirmed: Vec<&(SynthFinding, FindingVerdict)> = + view.scored.iter().filter(|(_, v)| !v.contested).collect(); + let contested: Vec<&(SynthFinding, FindingVerdict)> = + view.scored.iter().filter(|(_, v)| v.contested).collect(); + + if !confirmed.is_empty() { + md.push_str("### Findings confirmés\n"); + for (f, _) in &confirmed { + writeln!(md, "{}", render_finding_line(f)).unwrap(); + } + md.push('\n'); + } + + if !contested.is_empty() { + md.push_str("### ⚠️ Contestés (vérification adversariale)\n"); + for (f, v) in &contested { + writeln!(md, "{}", render_finding_line(f)).unwrap(); + if let Some(reason) = v.reasons.first() { + writeln!(md, " - _{reason}_").unwrap(); + } + } + md.push('\n'); + } + + if confirmed.is_empty() && contested.is_empty() { + md.push_str("Aucun problème remonté par les agents.\n\n"); + } + + md.push_str("---\n"); + let ok_labels: Vec<&str> = view.agents_ok.iter().map(|a| a.label()).collect(); + write!(md, "*Agents : {}", ok_labels.join(", ")).unwrap(); + if !view.agents_failed.is_empty() { + let failed_labels: Vec<&str> = view.agents_failed.iter().map(|a| a.label()).collect(); + write!(md, " · échoués : {}", failed_labels.join(", ")).unwrap(); + } + writeln!(md).unwrap(); + write!( + md, + "Findings : {} bruts → {} après fusion", + view.raw_count, view.dedup_count + ) + .unwrap(); + if view.capped > 0 { + write!(md, " → {} non vérifiés (plafond)", view.capped).unwrap(); + } + writeln!(md).unwrap(); + write!( + md, + "Modèle : {} · Diff : {} fichier(s)*", + view.model, view.file_count + ) + .unwrap(); + + md +} + +fn render_finding_line(f: &SynthFinding) -> String { + let sev = match f.severity { + Severity::Critical => "🔴", + Severity::Minor => "🟡", + }; + let location = if f.line > 0 { + format!("{}:{}", f.file, f.line) + } else { + f.file.clone() + }; + let sources = if f.sources.is_empty() { + String::new() + } else { + format!(" _(via {})_", f.sources.join(", ")) + }; + format!( + "- {sev} `{}` **{}** : {}{}", + f.category.label(), + location, + f.message, + sources + ) +} + #[cfg(test)] mod tests { use super::*; @@ -178,4 +296,66 @@ mod tests { "" )); } + + #[test] + fn renders_team_comment_with_verdict_and_sections() { + use crate::types::Category; + + let scored = vec![ + ( + SynthFinding { + file: "a.rs".to_string(), + line: 10, + severity: Severity::Critical, + category: Category::Bug, + message: "panic in handler".to_string(), + sources: vec!["correctness".to_string()], + }, + FindingVerdict { + contested: false, + reasons: vec![], + }, + ), + ( + SynthFinding { + file: "b.rs".to_string(), + line: 0, + severity: Severity::Minor, + category: Category::Design, + message: "tight coupling".to_string(), + sources: vec![], + }, + FindingVerdict { + contested: true, + reasons: vec!["already handled elsewhere".to_string()], + }, + ), + ]; + let ok = [Agent::Correctness, Agent::Security]; + let failed = [Agent::Performance]; + let strengths = ["clear separation".to_string()]; + let view = TeamCommentView { + executive_summary: "Adds the handler.", + strengths: &strengths, + scored: &scored, + verdict: Verdict::NeedsWork, + file_count: 3, + agents_ok: &ok, + agents_failed: &failed, + raw_count: 5, + dedup_count: 2, + capped: 0, + model: "codestral-latest + mistral-large-latest", + }; + let md = render_team_comment(&view); + + assert!(md.starts_with("")); + assert!(md.contains("NEEDS_WORK")); + assert!(md.contains("Findings confirmés")); + assert!(md.contains("panic in handler")); + assert!(md.contains("Contestés")); + assert!(md.contains("already handled elsewhere")); + assert!(md.contains("échoués : Performance")); + assert!(md.contains("5 bruts → 2 après fusion")); + } } diff --git a/tools/ai-review/src/team.rs b/tools/ai-review/src/team.rs new file mode 100644 index 0000000..7f9d81e --- /dev/null +++ b/tools/ai-review/src/team.rs @@ -0,0 +1,357 @@ +#![allow(clippy::missing_errors_doc)] + +use std::sync::Arc; + +use tokio::sync::Semaphore; +use tokio::task::JoinSet; + +use crate::types::{Agent, FindingVerdict, Lens, LensVerdict, Severity, SynthFinding, Verdict}; +use crate::{github, mistral, review, Clients}; + +/// Maximum number of synthesised findings put through adversarial verification. +const MAX_VERIFIED_FINDINGS: usize = 15; +/// Maximum number of concurrent Mistral calls during the verification phase. +const MAX_CONCURRENT_CALLS: usize = 6; +/// The three adversarial lenses applied to every verified finding. +const LENSES: [Lens; 3] = [Lens::CodeConfirms, Lens::RealImpact, Lens::FalsePositive]; + +/// Marker used to upsert the single team-review comment on a pull request. +const MARKER: &str = ""; + +/// Runs the full multi-agent team review pipeline for a pull request. +pub async fn run_team( + clients: &Clients, + owner: &str, + repo: &str, + pr_number: u64, +) -> anyhow::Result<()> { + println!("Fetching PR #{pr_number} diff for team review…"); + let ctx = github::fetch_diff_context(&clients.octo, owner, repo, pr_number).await?; + if ctx.full.trim().is_empty() { + println!("Empty diff — nothing to review."); + return Ok(()); + } + + println!("Running specialist agents…"); + let (report_ok, agents_ok, agents_failed) = run_agents(clients, &ctx.full).await; + if report_ok.is_empty() { + let body = format!( + "{MARKER}\n## Team Review IA\n\n⚠️ Team review unavailable: all specialist agents failed." + ); + github::upsert_global_comment(&clients.octo, owner, repo, pr_number, &body, MARKER).await?; + return Ok(()); + } + + let raw_count: usize = report_ok.iter().map(|(_, r)| r.findings.len()).sum(); + println!("Synthesising {} agent report(s)…", report_ok.len()); + let mut synth = + mistral::call_synthesis(&clients.http, &clients.mistral_key, &ctx.full, &report_ok).await?; + + let mut findings = std::mem::take(&mut synth.findings); + findings.sort_by_key(|f| severity_rank(&f.severity)); + let dedup_count = findings.len(); + let capped = dedup_count.saturating_sub(MAX_VERIFIED_FINDINGS); + if capped > 0 { + println!( + "Verifying top {MAX_VERIFIED_FINDINGS} of {dedup_count} findings ({capped} skipped)." + ); + findings.truncate(MAX_VERIFIED_FINDINGS); + } + + println!( + "Verifying {} finding(s) with the 3-lens vote…", + findings.len() + ); + let verdicts = verify_findings(clients, &ctx, &findings).await; + let scored: Vec<(SynthFinding, FindingVerdict)> = findings.into_iter().zip(verdicts).collect(); + let verdict = compute_verdict(&scored); + + let model = format!( + "{} + {}", + mistral::TEAM_AGENT_MODEL, + mistral::TEAM_SYNTH_MODEL + ); + let view = review::TeamCommentView { + executive_summary: &synth.executive_summary, + strengths: &synth.strengths, + scored: &scored, + verdict, + file_count: ctx.file_count, + agents_ok: &agents_ok, + agents_failed: &agents_failed, + raw_count, + dedup_count, + capped, + model: &model, + }; + let body = review::render_team_comment(&view); + + println!("Upserting team comment…"); + github::upsert_global_comment(&clients.octo, owner, repo, pr_number, &body, MARKER).await?; + + post_confirmed_inline(clients, owner, repo, pr_number, &scored).await?; + + println!("Team review complete."); + Ok(()) +} + +/// Runs the four specialist agents concurrently, returning the successful +/// reports plus the lists of agents that succeeded and failed. +async fn run_agents( + clients: &Clients, + diff: &str, +) -> ( + Vec<(Agent, crate::types::ReviewResponse)>, + Vec, + Vec, +) { + let agents = [ + Agent::Correctness, + Agent::Security, + Agent::Architecture, + Agent::Performance, + ]; + let (corr, sec, arch, perf) = tokio::join!( + mistral::call_agent( + &clients.http, + &clients.mistral_key, + Agent::Correctness, + diff + ), + mistral::call_agent(&clients.http, &clients.mistral_key, Agent::Security, diff), + mistral::call_agent( + &clients.http, + &clients.mistral_key, + Agent::Architecture, + diff + ), + mistral::call_agent( + &clients.http, + &clients.mistral_key, + Agent::Performance, + diff + ), + ); + + let mut reports = Vec::new(); + let mut ok = Vec::new(); + let mut failed = Vec::new(); + for (agent, result) in agents.into_iter().zip([corr, sec, arch, perf]) { + match result { + Ok((response, _)) => { + ok.push(agent); + reports.push((agent, response)); + } + Err(error) => { + eprintln!("warning: {} agent failed: {error}", agent.label()); + failed.push(agent); + } + } + } + (reports, ok, failed) +} + +/// Verifies each finding with the 3-lens adversarial vote under a concurrency +/// bound, returning one [`FindingVerdict`] per finding (index-aligned). +async fn verify_findings( + clients: &Clients, + ctx: &github::DiffContext, + findings: &[SynthFinding], +) -> Vec { + let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_CALLS)); + let mut set: JoinSet<(usize, Lens, anyhow::Result)> = JoinSet::new(); + + for (idx, finding) in findings.iter().enumerate() { + let patch = ctx.patch_for(finding).to_owned(); + for lens in LENSES { + let permit_source = Arc::clone(&semaphore); + let client = clients.http.clone(); + let key = clients.mistral_key.clone(); + let file = finding.file.clone(); + let message = finding.message.clone(); + let patch = patch.clone(); + set.spawn(async move { + let _permit = permit_source.acquire_owned().await.ok(); + let result = mistral::call_lens(&client, &key, lens, &file, &message, &patch).await; + (idx, lens, result) + }); + } + } + + let mut votes: Vec> = (0..findings.len()).map(|_| Vec::new()).collect(); + while let Some(joined) = set.join_next().await { + match joined { + Ok((idx, _, Ok(verdict))) => votes[idx].push(verdict), + Ok((idx, lens, Err(error))) => { + eprintln!( + "warning: {} lens failed for finding {idx}: {error}", + lens.label() + ); + } + Err(error) => eprintln!("warning: a lens task did not complete: {error}"), + } + } + + votes.iter().map(|v| aggregate_lens_votes(v)).collect() +} + +/// Posts inline comments for confirmed, critical, line-located findings. +async fn post_confirmed_inline( + clients: &Clients, + owner: &str, + repo: &str, + pr_number: u64, + scored: &[(SynthFinding, FindingVerdict)], +) -> anyhow::Result<()> { + let inline: Vec = scored + .iter() + .filter(|(f, v)| f.severity == Severity::Critical && !v.contested && f.line > 0) + .map(|(f, _)| github::InlineComment { + path: f.file.clone(), + line: f.line, + body: f.message.clone(), + }) + .collect(); + + if inline.is_empty() { + return Ok(()); + } + + let head_sha = github::fetch_head_sha(&clients.octo, owner, repo, pr_number).await?; + println!("Posting {} confirmed inline comment(s)…", inline.len()); + github::post_inline_comments( + &clients.github_token, + owner, + repo, + pr_number, + &head_sha, + &inline, + ) + .await +} + +/// Aggregates the lens votes for one finding using a sceptical majority rule: +/// contested when at least half of the received votes are contested, and when +/// no lens could be reached at all (an unverified finding is not confirmed). +#[must_use] +pub fn aggregate_lens_votes(votes: &[LensVerdict]) -> FindingVerdict { + let total = votes.len(); + let contested_count = votes.iter().filter(|v| v.contested).count(); + let contested = total == 0 || contested_count * 2 >= total; + let reasons = if total == 0 { + vec!["verification unavailable (all lenses failed)".to_string()] + } else { + votes + .iter() + .filter(|v| v.contested) + .map(|v| v.reason.clone()) + .collect() + }; + FindingVerdict { contested, reasons } +} + +/// Computes the overall verdict deterministically: a confirmed critical blocks, +/// an only-contested critical invites discussion, otherwise the PR may ship. +#[must_use] +pub fn compute_verdict(scored: &[(SynthFinding, FindingVerdict)]) -> Verdict { + let confirmed_critical = scored + .iter() + .any(|(f, v)| f.severity == Severity::Critical && !v.contested); + if confirmed_critical { + return Verdict::NeedsWork; + } + let contested_critical = scored + .iter() + .any(|(f, v)| f.severity == Severity::Critical && v.contested); + if contested_critical { + return Verdict::Discuss; + } + Verdict::Ship +} + +/// Sort key placing critical findings before minor ones. +fn severity_rank(severity: &Severity) -> u8 { + match severity { + Severity::Critical => 0, + Severity::Minor => 1, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::Category; + + fn lens_vote(contested: bool) -> LensVerdict { + LensVerdict { + contested, + reason: "because".to_string(), + } + } + + #[test] + fn aggregate_three_lens_majority() { + assert!( + aggregate_lens_votes(&[lens_vote(true), lens_vote(true), lens_vote(false)]).contested + ); + assert!( + aggregate_lens_votes(&[lens_vote(true), lens_vote(true), lens_vote(true)]).contested + ); + assert!( + !aggregate_lens_votes(&[lens_vote(true), lens_vote(false), lens_vote(false)]).contested + ); + assert!( + !aggregate_lens_votes(&[lens_vote(false), lens_vote(false), lens_vote(false)]) + .contested + ); + } + + #[test] + fn aggregate_handles_abstentions_and_empty() { + assert!(aggregate_lens_votes(&[]).contested); + assert!(aggregate_lens_votes(&[lens_vote(true), lens_vote(false)]).contested); + assert!(!aggregate_lens_votes(&[lens_vote(false), lens_vote(false)]).contested); + assert!(aggregate_lens_votes(&[lens_vote(true)]).contested); + assert!(!aggregate_lens_votes(&[lens_vote(false)]).contested); + } + + fn finding(severity: Severity) -> SynthFinding { + SynthFinding { + file: "a.rs".to_string(), + line: 1, + severity, + category: Category::Bug, + message: "m".to_string(), + sources: vec![], + } + } + + fn verdict(contested: bool) -> FindingVerdict { + FindingVerdict { + contested, + reasons: vec![], + } + } + + #[test] + fn verdict_needs_work_on_confirmed_critical() { + let scored = vec![(finding(Severity::Critical), verdict(false))]; + assert_eq!(compute_verdict(&scored), Verdict::NeedsWork); + } + + #[test] + fn verdict_discuss_when_all_criticals_contested() { + let scored = vec![ + (finding(Severity::Critical), verdict(true)), + (finding(Severity::Minor), verdict(false)), + ]; + assert_eq!(compute_verdict(&scored), Verdict::Discuss); + } + + #[test] + fn verdict_ship_without_criticals() { + let scored = vec![(finding(Severity::Minor), verdict(false))]; + assert_eq!(compute_verdict(&scored), Verdict::Ship); + assert_eq!(compute_verdict(&[]), Verdict::Ship); + } +} diff --git a/tools/ai-review/src/types.rs b/tools/ai-review/src/types.rs index 74f4603..345968e 100644 --- a/tools/ai-review/src/types.rs +++ b/tools/ai-review/src/types.rs @@ -23,7 +23,6 @@ pub struct ReviewResponse { pub security: String, } -#[allow(dead_code)] pub enum Mode { Review, Describe, @@ -31,9 +30,9 @@ pub enum Mode { Architecture, Performance, Product, + Team, } -#[allow(dead_code)] impl Mode { pub fn from_env() -> anyhow::Result { match std::env::var("AI_MODE").as_deref() { @@ -43,13 +42,14 @@ impl Mode { Ok("architecture") => Ok(Self::Architecture), Ok("performance") => Ok(Self::Performance), Ok("product") => Ok(Self::Product), + Ok("team") => Ok(Self::Team), Ok(other) => anyhow::bail!("unknown AI_MODE: {other}"), } } pub fn mistral_model(&self) -> &'static str { match self { - Self::Review | Self::Security | Self::Architecture | Self::Performance => { + Self::Review | Self::Security | Self::Architecture | Self::Performance | Self::Team => { "codestral-latest" } Self::Describe | Self::Product => "mistral-small-latest", @@ -63,6 +63,7 @@ impl Mode { Self::Architecture => "", Self::Performance => "", Self::Product => "", + Self::Team => "", Self::Describe => "", } } @@ -74,11 +75,160 @@ impl Mode { Self::Architecture => "Architecture Review", Self::Performance => "Performance Review", Self::Product => "Product Review", + Self::Team => "Team Review", Self::Describe => "Description", } } } +/// One of the four specialised review angles run in parallel in team mode. +#[derive(Debug, Clone, Copy)] +pub enum Agent { + Correctness, + Security, + Architecture, + Performance, +} + +impl Agent { + /// Returns the human-readable label for this agent. + pub fn label(self) -> &'static str { + match self { + Self::Correctness => "Correctness", + Self::Security => "Security", + Self::Architecture => "Architecture", + Self::Performance => "Performance", + } + } +} + +/// One adversarial lens applied to a synthesised finding during verification. +#[derive(Debug, Clone, Copy)] +pub enum Lens { + CodeConfirms, + RealImpact, + FalsePositive, +} + +impl Lens { + /// Returns the human-readable label for this lens. + pub fn label(self) -> &'static str { + match self { + Self::CodeConfirms => "code-confirms", + Self::RealImpact => "real-impact", + Self::FalsePositive => "false-positive", + } + } +} + +/// The kind of issue a synthesised finding describes. +#[derive(Debug, Deserialize, Clone, Copy, PartialEq)] +#[serde(rename_all = "kebab-case")] +pub enum Category { + Bug, + Security, + Design, + Performance, + TestGap, + #[serde(other)] + Other, +} + +impl Category { + /// Returns the human-readable label for this category. + pub fn label(self) -> &'static str { + match self { + Self::Bug => "bug", + Self::Security => "security", + Self::Design => "design", + Self::Performance => "performance", + Self::TestGap => "test-gap", + Self::Other => "other", + } + } +} + +/// A finding merged by the synthesis agent, tracking which agents raised it. +#[derive(Debug, Deserialize, Clone)] +pub struct SynthFinding { + pub file: String, + #[serde(default)] + pub line: u32, + #[serde(default = "default_severity")] + pub severity: Severity, + #[serde(default = "default_category")] + pub category: Category, + pub message: String, + #[serde(default)] + pub sources: Vec, +} + +/// Output of the synthesis agent: a merged, deduplicated cross-agent review. +#[derive(Debug, Deserialize)] +pub struct SynthReport { + pub executive_summary: String, + #[serde(default)] + pub strengths: Vec, + #[serde(default)] + pub findings: Vec, +} + +/// Verdict returned by one adversarial lens for a single finding. +#[derive(Debug, Deserialize, Clone)] +pub struct LensVerdict { + #[serde(deserialize_with = "de_bool_loose")] + pub contested: bool, + pub reason: String, +} + +/// Aggregated verdict for one finding after the multi-lens vote. +#[derive(Debug, Clone)] +pub struct FindingVerdict { + pub contested: bool, + pub reasons: Vec, +} + +/// Final overall verdict computed deterministically after verification. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Verdict { + Ship, + NeedsWork, + Discuss, +} + +/// Default severity for a finding whose severity field is missing or unparseable. +fn default_severity() -> Severity { + Severity::Minor +} + +/// Default category for a finding whose category field is missing. +fn default_category() -> Category { + Category::Other +} + +/// Deserializes a boolean from `true`/`false`, `"true"`/`"false"`, `"yes"`/`"no"`, +/// or `0`/`1`, defaulting to `true` (the sceptical stance) when the value cannot +/// be interpreted. Language models are inconsistent about boolean encoding. +fn de_bool_loose<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + #[derive(Deserialize)] + #[serde(untagged)] + enum Loose { + Bool(bool), + Int(i64), + Str(String), + } + + let parsed = match Loose::deserialize(deserializer)? { + Loose::Bool(b) => b, + Loose::Int(n) => n != 0, + Loose::Str(s) => !matches!(s.trim().to_ascii_lowercase().as_str(), "false" | "0" | "no"), + }; + Ok(parsed) +} + #[cfg(test)] mod tests { use super::*; @@ -113,4 +263,78 @@ mod tests { let r: ReviewResponse = serde_json::from_str(json).unwrap(); assert!(r.findings.is_empty()); } + + #[test] + fn synth_report_defaults_missing_collections() { + let json = r#"{ "executive_summary": "Solid PR." }"#; + let r: SynthReport = serde_json::from_str(json).unwrap(); + assert_eq!(r.executive_summary, "Solid PR."); + assert!(r.strengths.is_empty()); + assert!(r.findings.is_empty()); + } + + #[test] + fn synth_finding_defaults_line_severity_and_sources() { + let json = r#"{ "file": "src/a.rs", "message": "Possible panic." }"#; + let f: SynthFinding = serde_json::from_str(json).unwrap(); + assert_eq!(f.line, 0); + assert_eq!(f.severity, Severity::Minor); + assert!(f.sources.is_empty()); + } + + #[test] + fn synth_finding_keeps_sources_and_severity() { + let json = r#"{ + "file": "src/a.rs", "line": 12, "severity": "critical", "category": "security", + "message": "Tenant scoping missing.", "sources": ["security", "correctness"] + }"#; + let f: SynthFinding = serde_json::from_str(json).unwrap(); + assert_eq!(f.severity, Severity::Critical); + assert_eq!(f.category, Category::Security); + assert_eq!(f.sources, vec!["security", "correctness"]); + } + + #[test] + fn synth_finding_category_parses_defaults_and_falls_back() { + let known = r#"{"file": "a.rs", "message": "x", "category": "test-gap"}"#; + assert_eq!( + serde_json::from_str::(known) + .unwrap() + .category, + Category::TestGap + ); + + let unknown = r#"{"file": "a.rs", "message": "x", "category": "whatever"}"#; + assert_eq!( + serde_json::from_str::(unknown) + .unwrap() + .category, + Category::Other + ); + + let missing = r#"{"file": "a.rs", "message": "x"}"#; + assert_eq!( + serde_json::from_str::(missing) + .unwrap() + .category, + Category::Other + ); + } + + #[test] + fn lens_verdict_parses_loose_booleans() { + let cases = [ + (r#"{"contested": true, "reason": "x"}"#, true), + (r#"{"contested": false, "reason": "x"}"#, false), + (r#"{"contested": "true", "reason": "x"}"#, true), + (r#"{"contested": "false", "reason": "x"}"#, false), + (r#"{"contested": 1, "reason": "x"}"#, true), + (r#"{"contested": 0, "reason": "x"}"#, false), + (r#"{"contested": "maybe", "reason": "x"}"#, true), + ]; + for (json, expected) in cases { + let v: LensVerdict = serde_json::from_str(json).unwrap(); + assert_eq!(v.contested, expected, "input was {json}"); + } + } }