From 9ec0754077f5e011be4e5b97d68a3f74a21c0cf3 Mon Sep 17 00:00:00 2001 From: Pierrick Fonquerne Date: Wed, 3 Jun 2026 16:04:54 +0200 Subject: [PATCH 1/8] types: add team-mode data model Add Agent, Lens, SynthFinding, SynthReport, LensVerdict, FindingVerdict and Verdict types for the multi-agent team review mode. Responses from language models are parsed with tolerant serde (default collections, loose boolean coercion) to survive minor formatting drift. --- tools/ai-review/src/types.rs | 169 +++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) diff --git a/tools/ai-review/src/types.rs b/tools/ai-review/src/types.rs index 74f4603..9b7f26e 100644 --- a/tools/ai-review/src/types.rs +++ b/tools/ai-review/src/types.rs @@ -79,6 +79,129 @@ impl Mode { } } +/// One of the four specialised review angles run in parallel in team mode. +#[derive(Debug, Clone, Copy)] +#[allow(dead_code)] +pub enum Agent { + Correctness, + Security, + Architecture, + Performance, +} + +#[allow(dead_code)] +impl Agent { + /// Returns the human-readable label for this agent. + pub fn label(self) -> &'static str { + match self { + Self::Correctness => "Correctness", + Self::Security => "Security", + Self::Architecture => "Architecture", + Self::Performance => "Performance", + } + } +} + +/// One adversarial lens applied to a synthesised finding during verification. +#[derive(Debug, Clone, Copy)] +#[allow(dead_code)] +pub enum Lens { + CodeConfirms, + RealImpact, + FalsePositive, +} + +#[allow(dead_code)] +impl Lens { + /// Returns the human-readable label for this lens. + pub fn label(self) -> &'static str { + match self { + Self::CodeConfirms => "code-confirms", + Self::RealImpact => "real-impact", + Self::FalsePositive => "false-positive", + } + } +} + +/// A finding merged by the synthesis agent, tracking which agents raised it. +#[derive(Debug, Deserialize, Clone)] +#[allow(dead_code)] +pub struct SynthFinding { + pub file: String, + #[serde(default)] + pub line: u32, + #[serde(default = "default_severity")] + pub severity: Severity, + pub message: String, + #[serde(default)] + pub sources: Vec, +} + +/// Output of the synthesis agent: a merged, deduplicated cross-agent review. +#[derive(Debug, Deserialize)] +#[allow(dead_code)] +pub struct SynthReport { + pub executive_summary: String, + #[serde(default)] + pub strengths: Vec, + #[serde(default)] + pub findings: Vec, +} + +/// Verdict returned by one adversarial lens for a single finding. +#[derive(Debug, Deserialize, Clone)] +#[allow(dead_code)] +pub struct LensVerdict { + #[serde(deserialize_with = "de_bool_loose")] + pub contested: bool, + pub reason: String, +} + +/// Aggregated verdict for one finding after the multi-lens vote. +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct FindingVerdict { + pub contested: bool, + pub reasons: Vec, +} + +/// Final overall verdict computed deterministically after verification. +#[derive(Debug, Clone, Copy, PartialEq)] +#[allow(dead_code)] +pub enum Verdict { + Ship, + NeedsWork, + Discuss, +} + +/// Default severity for a finding whose severity field is missing or unparseable. +fn default_severity() -> Severity { + Severity::Minor +} + +/// Deserializes a boolean from `true`/`false`, `"true"`/`"false"`, `"yes"`/`"no"`, +/// or `0`/`1`, defaulting to `true` (the sceptical stance) when the value cannot +/// be interpreted. Language models are inconsistent about boolean encoding. +fn de_bool_loose<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + #[derive(Deserialize)] + #[serde(untagged)] + enum Loose { + Bool(bool), + Int(i64), + Str(String), + } + + let parsed = match Loose::deserialize(deserializer)? { + Loose::Bool(b) => b, + Loose::Int(n) => n != 0, + Loose::Str(s) => !matches!(s.trim().to_ascii_lowercase().as_str(), "false" | "0" | "no"), + }; + Ok(parsed) +} + #[cfg(test)] mod tests { use super::*; @@ -113,4 +236,50 @@ mod tests { let r: ReviewResponse = serde_json::from_str(json).unwrap(); assert!(r.findings.is_empty()); } + + #[test] + fn synth_report_defaults_missing_collections() { + let json = r#"{ "executive_summary": "Solid PR." }"#; + let r: SynthReport = serde_json::from_str(json).unwrap(); + assert_eq!(r.executive_summary, "Solid PR."); + assert!(r.strengths.is_empty()); + assert!(r.findings.is_empty()); + } + + #[test] + fn synth_finding_defaults_line_severity_and_sources() { + let json = r#"{ "file": "src/a.rs", "message": "Possible panic." }"#; + let f: SynthFinding = serde_json::from_str(json).unwrap(); + assert_eq!(f.line, 0); + assert_eq!(f.severity, Severity::Minor); + assert!(f.sources.is_empty()); + } + + #[test] + fn synth_finding_keeps_sources_and_severity() { + let json = r#"{ + "file": "src/a.rs", "line": 12, "severity": "critical", + "message": "Tenant scoping missing.", "sources": ["security", "correctness"] + }"#; + let f: SynthFinding = serde_json::from_str(json).unwrap(); + assert_eq!(f.severity, Severity::Critical); + assert_eq!(f.sources, vec!["security", "correctness"]); + } + + #[test] + fn lens_verdict_parses_loose_booleans() { + let cases = [ + (r#"{"contested": true, "reason": "x"}"#, true), + (r#"{"contested": false, "reason": "x"}"#, false), + (r#"{"contested": "true", "reason": "x"}"#, true), + (r#"{"contested": "false", "reason": "x"}"#, false), + (r#"{"contested": 1, "reason": "x"}"#, true), + (r#"{"contested": 0, "reason": "x"}"#, false), + (r#"{"contested": "maybe", "reason": "x"}"#, true), + ]; + for (json, expected) in cases { + let v: LensVerdict = serde_json::from_str(json).unwrap(); + assert_eq!(v.contested, expected, "input was {json}"); + } + } } From 7c3aeaded6b570482572e98b5a2d7b8bfd99dc6a Mon Sep 17 00:00:00 2001 From: Pierrick Fonquerne Date: Wed, 3 Jun 2026 16:27:00 +0200 Subject: [PATCH 2/8] github: add per-file diff context for lens verification Add DiffContext (full diff plus per-file patch map) and fetch_diff_context; fetch_diff now delegates to it for zero behavioural change. patch_for selects the per-file patch for a finding, falling back to a UTF-8-safe truncation of the full diff for file-level or cross-file findings. --- tools/ai-review/src/github.rs | 74 ++++++++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 9 deletions(-) diff --git a/tools/ai-review/src/github.rs b/tools/ai-review/src/github.rs index 16ed15c..2fd2004 100644 --- a/tools/ai-review/src/github.rs +++ b/tools/ai-review/src/github.rs @@ -1,11 +1,13 @@ #![allow(clippy::missing_errors_doc)] +use std::collections::HashMap; use std::fmt::Write as _; use anyhow::Context; use octocrab::Octocrab; use crate::review::has_bot_marker; +use crate::types::SynthFinding; /// An inline comment to post on a specific file line. pub struct InlineComment { @@ -14,31 +16,85 @@ pub struct InlineComment { pub body: String, } -/// Returns (concatenated diff as string, number of changed files). -pub async fn fetch_diff( +/// Maximum bytes of the full diff handed to a lens as fallback context when a +/// finding has no per-file patch (file-level or cross-file findings). +const PATCH_FALLBACK_CHARS: usize = 8_000; + +/// The PR diff in two shapes: a concatenated view and a per-file patch map. +#[allow(dead_code)] +pub struct DiffContext { + pub full: String, + pub by_file: HashMap, + pub file_count: usize, +} + +#[allow(dead_code)] +impl DiffContext { + /// Returns the patch to hand to a lens for `finding`, falling back to a + /// truncated view of the full diff for file-level or cross-file findings. + #[must_use] + pub fn patch_for(&self, finding: &SynthFinding) -> &str { + match self.by_file.get(&finding.file) { + Some(patch) if !patch.is_empty() => patch.as_str(), + _ => safe_truncate(&self.full, PATCH_FALLBACK_CHARS), + } + } +} + +/// Truncates `s` to at most `max` bytes without splitting a UTF-8 sequence. +fn safe_truncate(s: &str, max: usize) -> &str { + if s.len() <= max { + return s; + } + let mut end = max; + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + &s[..end] +} + +/// Returns the PR diff as a [`DiffContext`] (concatenated view plus per-file map). +pub async fn fetch_diff_context( octo: &Octocrab, owner: &str, repo: &str, pr_number: u64, -) -> anyhow::Result<(String, usize)> { +) -> anyhow::Result { let files = octo .pulls(owner, repo) .list_files(pr_number) .await .context("failed to list PR files")?; - let count = files.items.len(); - let mut diff = String::new(); + let file_count = files.items.len(); + let mut full = String::new(); + let mut by_file = HashMap::with_capacity(file_count); for file in &files.items { - writeln!(diff, "--- {}", file.filename).unwrap(); + writeln!(full, "--- {}", file.filename).unwrap(); if let Some(patch) = &file.patch { - diff.push_str(patch); + full.push_str(patch); + by_file.insert(file.filename.clone(), patch.clone()); } - diff.push('\n'); + full.push('\n'); } - Ok((diff, count)) + Ok(DiffContext { + full, + by_file, + file_count, + }) +} + +/// Returns (concatenated diff as string, number of changed files). +pub async fn fetch_diff( + octo: &Octocrab, + owner: &str, + repo: &str, + pr_number: u64, +) -> anyhow::Result<(String, usize)> { + let ctx = fetch_diff_context(octo, owner, repo, pr_number).await?; + Ok((ctx.full, ctx.file_count)) } /// Returns the current PR body (empty string if None). From f4c1611fa4e9166112a2466f28cf489d65014ba5 Mon Sep 17 00:00:00 2001 From: Pierrick Fonquerne Date: Wed, 3 Jun 2026 16:29:09 +0200 Subject: [PATCH 3/8] mistral: add team-mode agent, synthesis and lens calls Add call_agent (specialist fan-out), call_synthesis (deduplicating merge of the agent reports into a SynthReport) and call_lens (adversarial per-finding verification). Introduce the synthesis and three lens system prompts plus the team model constants. --- tools/ai-review/src/mistral.rs | 175 ++++++++++++++++++++++++++++++++- 1 file changed, 174 insertions(+), 1 deletion(-) diff --git a/tools/ai-review/src/mistral.rs b/tools/ai-review/src/mistral.rs index 2eb6890..94578bf 100644 --- a/tools/ai-review/src/mistral.rs +++ b/tools/ai-review/src/mistral.rs @@ -1,13 +1,22 @@ #![allow(clippy::missing_errors_doc)] +use std::fmt::Write as _; + use anyhow::Context; use serde::{Deserialize, Serialize}; -use crate::types::ReviewResponse; +use crate::types::{Agent, Lens, LensVerdict, ReviewResponse, Severity, SynthReport}; const API_URL: &str = "https://api.mistral.ai/v1/chat/completions"; const MAX_DIFF_CHARS: usize = 20_000; +/// Model used by the four specialist agents in team mode. +pub(crate) const TEAM_AGENT_MODEL: &str = "codestral-latest"; +/// Model used by the synthesis step in team mode (stronger cross-report reasoning). +pub(crate) const TEAM_SYNTH_MODEL: &str = "mistral-large-latest"; +/// Model used by the adversarial lenses in team mode. +pub(crate) const TEAM_LENS_MODEL: &str = "codestral-latest"; + #[derive(Serialize)] struct ChatRequest { model: String, @@ -426,6 +435,170 @@ pub async fn call_describe( Ok(parsed.body) } +fn synthesis_system_prompt() -> String { + r#"You are the lead reviewer for Nubster, a sovereign hybrid DevOps platform (Rust/.NET/TypeScript). Four specialist agents (correctness, security, architecture, performance) have each independently reviewed the SAME pull request. Your job is to MERGE their reports into one deduplicated review, not to add new findings. + +## Your tasks +1. Deduplicate: when several agents report the same underlying issue (even if worded differently or on nearby lines), merge them into ONE finding. +2. Attribute: record every contributing agent in "sources", e.g. ["security","correctness"]. +3. Preserve signal: when merging duplicates, keep the HIGHEST severity. Never drop a critical. +4. Stay grounded: do NOT invent findings that no agent raised. You may only merge and rewrite for clarity. +5. Summarise: write a 3-5 sentence executive summary of what the PR does and its overall quality, and list concrete strengths. +6. Do NOT emit a verdict or recommendation — that is computed deterministically downstream. + +Respond ONLY with a valid JSON object: +{ + "executive_summary": "3-5 sentences on what the PR does and its overall quality.", + "strengths": ["specific strength 1", "specific strength 2"], + "findings": [ + { + "file": "exact/path/from/the/reports.rs", + "line": 42, + "severity": "critical", + "message": "Merged, clear, actionable description in one sentence.", + "sources": ["security", "correctness"] + } + ] +} + +Rules: severity MUST be "critical" or "minor". line MUST be the line from the reports, or 0 for a file-level concern. sources MUST be a non-empty subset of ["correctness","security","architecture","performance"]. Return valid JSON only, no markdown fences."# + .to_string() +} + +const LENS_OUTPUT_SPEC: &str = r#"Respond ONLY with a valid JSON object: +{ "contested": true, "reason": "one-sentence justification grounded in the shown code" } + +"contested": true means the finding should NOT be trusted and acted on as-is. +"contested": false means the shown code confirms a genuine, in-scope issue. +When you are uncertain, set "contested": true. Return valid JSON only, no markdown fences."#; + +fn lens_system_prompt(lens: Lens) -> String { + let intro = match lens { + Lens::CodeConfirms => "You are a skeptical code verifier reviewing a pull request for Nubster. You are given a single review finding and the patch of the file it refers to. Decide whether the shown code UNAMBIGUOUSLY confirms the problem: you must be able to point to the exact changed lines that exhibit it. If the patch does not clearly prove the claim, set contested = true.", + Lens::RealImpact => "You are a skeptical impact assessor reviewing a pull request for Nubster. You are given a single review finding and the patch of the file it refers to. Decide whether the finding has REAL, observable impact: a reproducible bug, an exploitable vulnerability, or a measurable regression. If the concern is purely theoretical, stylistic, cosmetic, or already caught by the compiler or Clippy, set contested = true.", + Lens::FalsePositive => "You are a skeptical false-positive hunter reviewing a pull request for Nubster. You are given a single review finding and the patch of the file it refers to. Decide whether this is a classic false positive: already handled elsewhere, out of scope of this diff, intentional by design, or something the compiler or Clippy already enforces. If it looks like a false positive, set contested = true.", + }; + format!("{intro}\n\n{LENS_OUTPUT_SPEC}") +} + +fn agent_system_prompt(agent: Agent) -> String { + match agent { + Agent::Correctness => review_system_prompt(), + Agent::Security => security_system_prompt(), + Agent::Architecture => architecture_system_prompt(), + Agent::Performance => performance_system_prompt(), + } +} + +fn render_reports_for_synthesis(reports: &[(Agent, ReviewResponse)]) -> String { + let mut out = String::new(); + for (agent, report) in reports { + let _ = writeln!(out, "## {} agent", agent.label()); + let _ = writeln!(out, "Summary: {}", report.summary); + if !report.findings.is_empty() { + out.push_str("Findings:\n"); + for f in &report.findings { + let sev = match f.severity { + Severity::Critical => "critical", + Severity::Minor => "minor", + }; + let _ = writeln!(out, "- [{sev}] {}:{} {}", f.file, f.line, f.message); + } + } + out.push('\n'); + } + out +} + +/// Runs one specialist agent over the diff. Returns `(ReviewResponse, truncated)`. +#[allow(dead_code)] +pub async fn call_agent( + client: &reqwest::Client, + api_key: &str, + agent: Agent, + diff: &str, +) -> anyhow::Result<(ReviewResponse, bool)> { + call_analysis_mode( + client, + api_key, + TEAM_AGENT_MODEL, + agent_system_prompt(agent), + diff, + ) + .await +} + +/// Merges the specialist agent reports into a single deduplicated [`SynthReport`]. +#[allow(dead_code)] +pub async fn call_synthesis( + client: &reqwest::Client, + api_key: &str, + diff: &str, + reports: &[(Agent, ReviewResponse)], +) -> anyhow::Result { + let (content, _) = truncate_diff(diff); + let agents_block = render_reports_for_synthesis(reports); + + let request = ChatRequest { + model: TEAM_SYNTH_MODEL.to_string(), + messages: vec![ + Message { + role: "system".to_string(), + content: synthesis_system_prompt(), + }, + Message { + role: "user".to_string(), + content: format!( + "Specialist agent reports for a pull request:\n\n{agents_block}\nThe PR diff under review:\n\n{content}" + ), + }, + ], + response_format: ResponseFormat { + kind: "json_object", + }, + temperature: 0.2, + }; + + let raw = send_request(client, api_key, &request).await?; + serde_json::from_str(&raw).context("failed to parse Mistral synthesis response") +} + +/// Runs one adversarial lens over a single finding, given the file's patch. +#[allow(dead_code)] +pub async fn call_lens( + client: &reqwest::Client, + api_key: &str, + lens: Lens, + file: &str, + message: &str, + patch: &str, +) -> anyhow::Result { + let (patch_content, _) = truncate_diff(patch); + + let request = ChatRequest { + model: TEAM_LENS_MODEL.to_string(), + messages: vec![ + Message { + role: "system".to_string(), + content: lens_system_prompt(lens), + }, + Message { + role: "user".to_string(), + content: format!( + "Finding to scrutinise:\nFile: {file}\nClaim: {message}\n\nCode under review (patch of {file}):\n\n{patch_content}" + ), + }, + ], + response_format: ResponseFormat { + kind: "json_object", + }, + temperature: 0.0, + }; + + let raw = send_request(client, api_key, &request).await?; + serde_json::from_str(&raw).context("failed to parse Mistral lens response") +} + fn truncate_diff(diff: &str) -> (&str, bool) { if diff.len() <= MAX_DIFF_CHARS { (diff, false) From 0f37120506d5d4c981739e6470ba89816ff50e58 Mon Sep 17 00:00:00 2001 From: Pierrick Fonquerne Date: Wed, 3 Jun 2026 16:41:02 +0200 Subject: [PATCH 4/8] synthesis: reinforce dedup prompt and add finding category Strengthen the synthesis prompt: drop noise already enforced by Clippy or the compiler, require each message to name the concrete code element, and surface agent disagreement instead of silently resolving it. Add a Category enum (bug/security/design/performance/test-gap, with a tolerant Other fallback) carried on every SynthFinding. --- tools/ai-review/src/mistral.rs | 13 ++++--- tools/ai-review/src/types.rs | 66 +++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 5 deletions(-) diff --git a/tools/ai-review/src/mistral.rs b/tools/ai-review/src/mistral.rs index 94578bf..e5e4ddc 100644 --- a/tools/ai-review/src/mistral.rs +++ b/tools/ai-review/src/mistral.rs @@ -443,8 +443,12 @@ fn synthesis_system_prompt() -> String { 2. Attribute: record every contributing agent in "sources", e.g. ["security","correctness"]. 3. Preserve signal: when merging duplicates, keep the HIGHEST severity. Never drop a critical. 4. Stay grounded: do NOT invent findings that no agent raised. You may only merge and rewrite for clarity. -5. Summarise: write a 3-5 sentence executive summary of what the PR does and its overall quality, and list concrete strengths. -6. Do NOT emit a verdict or recommendation — that is computed deterministically downstream. +5. Cut the noise: drop findings that are purely stylistic, cosmetic, or already enforced by Clippy or the compiler (formatting, naming, import ordering, unused warnings). Nubster runs Clippy pedantic with -D warnings in CI, so these add no value. +6. Anchor to code: every "message" MUST name the concrete code element involved (a function, type, variable, or call) so it can be verified against the diff. +7. Categorise: tag each finding with a "category" from ["bug","security","design","performance","test-gap"]. +8. Surface disagreement: if two agents contradict each other on the same point, state that explicitly in the message instead of silently choosing a side. +9. Summarise: write a 3-5 sentence executive summary of what the PR does and its overall quality, and list concrete strengths. +10. Do NOT emit a verdict or recommendation — that is computed deterministically downstream. Respond ONLY with a valid JSON object: { @@ -455,13 +459,14 @@ Respond ONLY with a valid JSON object: "file": "exact/path/from/the/reports.rs", "line": 42, "severity": "critical", - "message": "Merged, clear, actionable description in one sentence.", + "category": "security", + "message": "Names the concrete code element and the issue in one sentence.", "sources": ["security", "correctness"] } ] } -Rules: severity MUST be "critical" or "minor". line MUST be the line from the reports, or 0 for a file-level concern. sources MUST be a non-empty subset of ["correctness","security","architecture","performance"]. Return valid JSON only, no markdown fences."# +Rules: severity MUST be "critical" or "minor". category MUST be one of ["bug","security","design","performance","test-gap"]. line MUST be the line from the reports, or 0 for a file-level concern. sources MUST be a non-empty subset of ["correctness","security","architecture","performance"]. Return valid JSON only, no markdown fences."# .to_string() } diff --git a/tools/ai-review/src/types.rs b/tools/ai-review/src/types.rs index 9b7f26e..0bef408 100644 --- a/tools/ai-review/src/types.rs +++ b/tools/ai-review/src/types.rs @@ -123,6 +123,35 @@ impl Lens { } } +/// The kind of issue a synthesised finding describes. +#[derive(Debug, Deserialize, Clone, Copy, PartialEq)] +#[serde(rename_all = "kebab-case")] +#[allow(dead_code)] +pub enum Category { + Bug, + Security, + Design, + Performance, + TestGap, + #[serde(other)] + Other, +} + +#[allow(dead_code)] +impl Category { + /// Returns the human-readable label for this category. + pub fn label(self) -> &'static str { + match self { + Self::Bug => "bug", + Self::Security => "security", + Self::Design => "design", + Self::Performance => "performance", + Self::TestGap => "test-gap", + Self::Other => "other", + } + } +} + /// A finding merged by the synthesis agent, tracking which agents raised it. #[derive(Debug, Deserialize, Clone)] #[allow(dead_code)] @@ -132,6 +161,8 @@ pub struct SynthFinding { pub line: u32, #[serde(default = "default_severity")] pub severity: Severity, + #[serde(default = "default_category")] + pub category: Category, pub message: String, #[serde(default)] pub sources: Vec, @@ -179,6 +210,11 @@ fn default_severity() -> Severity { Severity::Minor } +/// Default category for a finding whose category field is missing. +fn default_category() -> Category { + Category::Other +} + /// Deserializes a boolean from `true`/`false`, `"true"`/`"false"`, `"yes"`/`"no"`, /// or `0`/`1`, defaulting to `true` (the sceptical stance) when the value cannot /// be interpreted. Language models are inconsistent about boolean encoding. @@ -258,14 +294,42 @@ mod tests { #[test] fn synth_finding_keeps_sources_and_severity() { let json = r#"{ - "file": "src/a.rs", "line": 12, "severity": "critical", + "file": "src/a.rs", "line": 12, "severity": "critical", "category": "security", "message": "Tenant scoping missing.", "sources": ["security", "correctness"] }"#; let f: SynthFinding = serde_json::from_str(json).unwrap(); assert_eq!(f.severity, Severity::Critical); + assert_eq!(f.category, Category::Security); assert_eq!(f.sources, vec!["security", "correctness"]); } + #[test] + fn synth_finding_category_parses_defaults_and_falls_back() { + let known = r#"{"file": "a.rs", "message": "x", "category": "test-gap"}"#; + assert_eq!( + serde_json::from_str::(known) + .unwrap() + .category, + Category::TestGap + ); + + let unknown = r#"{"file": "a.rs", "message": "x", "category": "whatever"}"#; + assert_eq!( + serde_json::from_str::(unknown) + .unwrap() + .category, + Category::Other + ); + + let missing = r#"{"file": "a.rs", "message": "x"}"#; + assert_eq!( + serde_json::from_str::(missing) + .unwrap() + .category, + Category::Other + ); + } + #[test] fn lens_verdict_parses_loose_booleans() { let cases = [ From 860ea1da1f62e7720cb5ef5c82ed0f2620e7118c Mon Sep 17 00:00:00 2001 From: Pierrick Fonquerne Date: Wed, 3 Jun 2026 16:46:54 +0200 Subject: [PATCH 5/8] team: orchestrate fan-out, lens vote and deterministic verdict Add the team module: run_team runs the four specialist agents concurrently (graceful degradation), synthesises and caps findings, then verifies each with a concurrency-bounded 3-lens adversarial vote, computes the verdict deterministically, renders the comment and posts confirmed criticals inline. Add render_team_comment and TeamCommentView. The pure aggregate_lens_votes and compute_verdict helpers are unit-tested. --- tools/ai-review/src/main.rs | 1 + tools/ai-review/src/review.rs | 184 +++++++++++++++++- tools/ai-review/src/team.rs | 355 ++++++++++++++++++++++++++++++++++ 3 files changed, 539 insertions(+), 1 deletion(-) create mode 100644 tools/ai-review/src/team.rs diff --git a/tools/ai-review/src/main.rs b/tools/ai-review/src/main.rs index 3219008..f95009a 100644 --- a/tools/ai-review/src/main.rs +++ b/tools/ai-review/src/main.rs @@ -1,6 +1,7 @@ mod github; mod mistral; mod review; +mod team; mod types; use anyhow::Context; diff --git a/tools/ai-review/src/review.rs b/tools/ai-review/src/review.rs index 990c376..bf18075 100644 --- a/tools/ai-review/src/review.rs +++ b/tools/ai-review/src/review.rs @@ -1,6 +1,8 @@ use std::fmt::Write as _; -use crate::types::{Finding, ReviewResponse, Severity}; +use crate::types::{ + Agent, Finding, FindingVerdict, ReviewResponse, Severity, SynthFinding, Verdict, +}; /// Findings that become inline PR comments: critical severity AND specific line. pub fn inline_findings(response: &ReviewResponse) -> Vec<&Finding> { @@ -67,6 +69,124 @@ pub fn has_bot_marker(body: &str, marker: &str) -> bool { body.contains(marker) } +/// Input bundle for [`render_team_comment`], grouped to keep the argument count low. +#[allow(dead_code)] +pub struct TeamCommentView<'a> { + pub executive_summary: &'a str, + pub strengths: &'a [String], + pub scored: &'a [(SynthFinding, FindingVerdict)], + pub verdict: Verdict, + pub file_count: usize, + pub agents_ok: &'a [Agent], + pub agents_failed: &'a [Agent], + pub raw_count: usize, + pub dedup_count: usize, + pub capped: usize, + pub model: &'a str, +} + +/// Renders the team-mode global comment (with hidden upsert marker). +#[allow(dead_code)] +#[must_use] +pub fn render_team_comment(view: &TeamCommentView) -> String { + let mut md = "\n## Team Review IA\n\n".to_string(); + + let verdict_line = match view.verdict { + Verdict::Ship => "**Verdict : SHIP ✅**", + Verdict::NeedsWork => "**Verdict : NEEDS_WORK ⚠️**", + Verdict::Discuss => "**Verdict : DISCUSS 💬**", + }; + writeln!(md, "{verdict_line}\n").unwrap(); + writeln!(md, "### Vue d'ensemble\n{}\n", view.executive_summary).unwrap(); + + if !view.strengths.is_empty() { + md.push_str("### Points forts\n"); + for s in view.strengths { + writeln!(md, "- {s}").unwrap(); + } + md.push('\n'); + } + + let confirmed: Vec<&(SynthFinding, FindingVerdict)> = + view.scored.iter().filter(|(_, v)| !v.contested).collect(); + let contested: Vec<&(SynthFinding, FindingVerdict)> = + view.scored.iter().filter(|(_, v)| v.contested).collect(); + + if !confirmed.is_empty() { + md.push_str("### Findings confirmés\n"); + for (f, _) in &confirmed { + writeln!(md, "{}", render_finding_line(f)).unwrap(); + } + md.push('\n'); + } + + if !contested.is_empty() { + md.push_str("### ⚠️ Contestés (vérification adversariale)\n"); + for (f, v) in &contested { + writeln!(md, "{}", render_finding_line(f)).unwrap(); + if let Some(reason) = v.reasons.first() { + writeln!(md, " - _{reason}_").unwrap(); + } + } + md.push('\n'); + } + + if confirmed.is_empty() && contested.is_empty() { + md.push_str("Aucun problème remonté par les agents.\n\n"); + } + + md.push_str("---\n"); + let ok_labels: Vec<&str> = view.agents_ok.iter().map(|a| a.label()).collect(); + write!(md, "*Agents : {}", ok_labels.join(", ")).unwrap(); + if !view.agents_failed.is_empty() { + let failed_labels: Vec<&str> = view.agents_failed.iter().map(|a| a.label()).collect(); + write!(md, " · échoués : {}", failed_labels.join(", ")).unwrap(); + } + writeln!(md).unwrap(); + write!( + md, + "Findings : {} bruts → {} après fusion", + view.raw_count, view.dedup_count + ) + .unwrap(); + if view.capped > 0 { + write!(md, " → {} non vérifiés (plafond)", view.capped).unwrap(); + } + writeln!(md).unwrap(); + write!( + md, + "Modèle : {} · Diff : {} fichier(s)*", + view.model, view.file_count + ) + .unwrap(); + + md +} + +fn render_finding_line(f: &SynthFinding) -> String { + let sev = match f.severity { + Severity::Critical => "🔴", + Severity::Minor => "🟡", + }; + let location = if f.line > 0 { + format!("{}:{}", f.file, f.line) + } else { + f.file.clone() + }; + let sources = if f.sources.is_empty() { + String::new() + } else { + format!(" _(via {})_", f.sources.join(", ")) + }; + format!( + "- {sev} `{}` **{}** — {}{}", + f.category.label(), + location, + f.message, + sources + ) +} + #[cfg(test)] mod tests { use super::*; @@ -178,4 +298,66 @@ mod tests { "" )); } + + #[test] + fn renders_team_comment_with_verdict_and_sections() { + use crate::types::Category; + + let scored = vec![ + ( + SynthFinding { + file: "a.rs".to_string(), + line: 10, + severity: Severity::Critical, + category: Category::Bug, + message: "panic in handler".to_string(), + sources: vec!["correctness".to_string()], + }, + FindingVerdict { + contested: false, + reasons: vec![], + }, + ), + ( + SynthFinding { + file: "b.rs".to_string(), + line: 0, + severity: Severity::Minor, + category: Category::Design, + message: "tight coupling".to_string(), + sources: vec![], + }, + FindingVerdict { + contested: true, + reasons: vec!["already handled elsewhere".to_string()], + }, + ), + ]; + let ok = [Agent::Correctness, Agent::Security]; + let failed = [Agent::Performance]; + let strengths = ["clear separation".to_string()]; + let view = TeamCommentView { + executive_summary: "Adds the handler.", + strengths: &strengths, + scored: &scored, + verdict: Verdict::NeedsWork, + file_count: 3, + agents_ok: &ok, + agents_failed: &failed, + raw_count: 5, + dedup_count: 2, + capped: 0, + model: "codestral-latest + mistral-large-latest", + }; + let md = render_team_comment(&view); + + assert!(md.starts_with("")); + assert!(md.contains("NEEDS_WORK")); + assert!(md.contains("Findings confirmés")); + assert!(md.contains("panic in handler")); + assert!(md.contains("Contestés")); + assert!(md.contains("already handled elsewhere")); + assert!(md.contains("échoués : Performance")); + assert!(md.contains("5 bruts → 2 après fusion")); + } } diff --git a/tools/ai-review/src/team.rs b/tools/ai-review/src/team.rs new file mode 100644 index 0000000..b30ea85 --- /dev/null +++ b/tools/ai-review/src/team.rs @@ -0,0 +1,355 @@ +#![allow(clippy::missing_errors_doc)] +#![allow(dead_code)] + +use std::sync::Arc; + +use tokio::sync::Semaphore; +use tokio::task::JoinSet; + +use crate::types::{Agent, FindingVerdict, Lens, LensVerdict, Severity, SynthFinding, Verdict}; +use crate::{github, mistral, review, Clients}; + +/// Maximum number of synthesised findings put through adversarial verification. +const MAX_VERIFIED_FINDINGS: usize = 15; +/// Maximum number of concurrent Mistral calls during the verification phase. +const MAX_CONCURRENT_CALLS: usize = 6; +/// The three adversarial lenses applied to every verified finding. +const LENSES: [Lens; 3] = [Lens::CodeConfirms, Lens::RealImpact, Lens::FalsePositive]; + +/// Marker used to upsert the single team-review comment on a pull request. +const MARKER: &str = ""; + +/// Runs the full multi-agent team review pipeline for a pull request. +pub async fn run_team( + clients: &Clients, + owner: &str, + repo: &str, + pr_number: u64, +) -> anyhow::Result<()> { + println!("Fetching PR #{pr_number} diff for team review…"); + let ctx = github::fetch_diff_context(&clients.octo, owner, repo, pr_number).await?; + if ctx.full.trim().is_empty() { + println!("Empty diff — nothing to review."); + return Ok(()); + } + + println!("Running specialist agents…"); + let (report_ok, agents_ok, agents_failed) = run_agents(clients, &ctx.full).await; + if report_ok.is_empty() { + let body = format!( + "{MARKER}\n## Team Review IA\n\n⚠️ Team review unavailable: all specialist agents failed." + ); + github::upsert_global_comment(&clients.octo, owner, repo, pr_number, &body, MARKER).await?; + return Ok(()); + } + + let raw_count: usize = report_ok.iter().map(|(_, r)| r.findings.len()).sum(); + println!("Synthesising {} agent report(s)…", report_ok.len()); + let mut synth = + mistral::call_synthesis(&clients.http, &clients.mistral_key, &ctx.full, &report_ok).await?; + + let mut findings = std::mem::take(&mut synth.findings); + findings.sort_by_key(|f| severity_rank(&f.severity)); + let dedup_count = findings.len(); + let capped = dedup_count.saturating_sub(MAX_VERIFIED_FINDINGS); + if capped > 0 { + println!( + "Verifying top {MAX_VERIFIED_FINDINGS} of {dedup_count} findings ({capped} skipped)." + ); + findings.truncate(MAX_VERIFIED_FINDINGS); + } + + println!( + "Verifying {} finding(s) with the 3-lens vote…", + findings.len() + ); + let verdicts = verify_findings(clients, &ctx, &findings).await; + let scored: Vec<(SynthFinding, FindingVerdict)> = findings.into_iter().zip(verdicts).collect(); + let verdict = compute_verdict(&scored); + + let model = format!( + "{} + {}", + mistral::TEAM_AGENT_MODEL, + mistral::TEAM_SYNTH_MODEL + ); + let view = review::TeamCommentView { + executive_summary: &synth.executive_summary, + strengths: &synth.strengths, + scored: &scored, + verdict, + file_count: ctx.file_count, + agents_ok: &agents_ok, + agents_failed: &agents_failed, + raw_count, + dedup_count, + capped, + model: &model, + }; + let body = review::render_team_comment(&view); + + println!("Upserting team comment…"); + github::upsert_global_comment(&clients.octo, owner, repo, pr_number, &body, MARKER).await?; + + post_confirmed_inline(clients, owner, repo, pr_number, &scored).await?; + + println!("Team review complete."); + Ok(()) +} + +/// Runs the four specialist agents concurrently, returning the successful +/// reports plus the lists of agents that succeeded and failed. +async fn run_agents( + clients: &Clients, + diff: &str, +) -> ( + Vec<(Agent, crate::types::ReviewResponse)>, + Vec, + Vec, +) { + let agents = [ + Agent::Correctness, + Agent::Security, + Agent::Architecture, + Agent::Performance, + ]; + let (corr, sec, arch, perf) = tokio::join!( + mistral::call_agent( + &clients.http, + &clients.mistral_key, + Agent::Correctness, + diff + ), + mistral::call_agent(&clients.http, &clients.mistral_key, Agent::Security, diff), + mistral::call_agent( + &clients.http, + &clients.mistral_key, + Agent::Architecture, + diff + ), + mistral::call_agent( + &clients.http, + &clients.mistral_key, + Agent::Performance, + diff + ), + ); + + let mut reports = Vec::new(); + let mut ok = Vec::new(); + let mut failed = Vec::new(); + for (agent, result) in agents.into_iter().zip([corr, sec, arch, perf]) { + match result { + Ok((response, _)) => { + ok.push(agent); + reports.push((agent, response)); + } + Err(error) => { + eprintln!("warning: {} agent failed: {error}", agent.label()); + failed.push(agent); + } + } + } + (reports, ok, failed) +} + +/// Verifies each finding with the 3-lens adversarial vote under a concurrency +/// bound, returning one [`FindingVerdict`] per finding (index-aligned). +async fn verify_findings( + clients: &Clients, + ctx: &github::DiffContext, + findings: &[SynthFinding], +) -> Vec { + let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_CALLS)); + let mut set: JoinSet<(usize, anyhow::Result)> = JoinSet::new(); + + for (idx, finding) in findings.iter().enumerate() { + let patch = ctx.patch_for(finding).to_owned(); + for lens in LENSES { + let permit_source = Arc::clone(&semaphore); + let client = clients.http.clone(); + let key = clients.mistral_key.clone(); + let file = finding.file.clone(); + let message = finding.message.clone(); + let patch = patch.clone(); + set.spawn(async move { + let _permit = permit_source.acquire_owned().await.ok(); + let result = mistral::call_lens(&client, &key, lens, &file, &message, &patch).await; + (idx, result) + }); + } + } + + let mut votes: Vec> = (0..findings.len()).map(|_| Vec::new()).collect(); + while let Some(joined) = set.join_next().await { + match joined { + Ok((idx, Ok(verdict))) => votes[idx].push(verdict), + Ok((idx, Err(error))) => { + eprintln!("warning: a lens failed for finding {idx}: {error}"); + } + Err(error) => eprintln!("warning: a lens task did not complete: {error}"), + } + } + + votes.iter().map(|v| aggregate_lens_votes(v)).collect() +} + +/// Posts inline comments for confirmed, critical, line-located findings. +async fn post_confirmed_inline( + clients: &Clients, + owner: &str, + repo: &str, + pr_number: u64, + scored: &[(SynthFinding, FindingVerdict)], +) -> anyhow::Result<()> { + let inline: Vec = scored + .iter() + .filter(|(f, v)| f.severity == Severity::Critical && !v.contested && f.line > 0) + .map(|(f, _)| github::InlineComment { + path: f.file.clone(), + line: f.line, + body: f.message.clone(), + }) + .collect(); + + if inline.is_empty() { + return Ok(()); + } + + let head_sha = github::fetch_head_sha(&clients.octo, owner, repo, pr_number).await?; + println!("Posting {} confirmed inline comment(s)…", inline.len()); + github::post_inline_comments( + &clients.github_token, + owner, + repo, + pr_number, + &head_sha, + &inline, + ) + .await +} + +/// Aggregates the lens votes for one finding using a sceptical majority rule: +/// contested when at least half of the received votes are contested, and when +/// no lens could be reached at all (an unverified finding is not confirmed). +#[must_use] +pub fn aggregate_lens_votes(votes: &[LensVerdict]) -> FindingVerdict { + let total = votes.len(); + let contested_count = votes.iter().filter(|v| v.contested).count(); + let contested = total == 0 || contested_count * 2 >= total; + let reasons = if total == 0 { + vec!["verification unavailable (all lenses failed)".to_string()] + } else { + votes + .iter() + .filter(|v| v.contested) + .map(|v| v.reason.clone()) + .collect() + }; + FindingVerdict { contested, reasons } +} + +/// Computes the overall verdict deterministically: a confirmed critical blocks, +/// an only-contested critical invites discussion, otherwise the PR may ship. +#[must_use] +pub fn compute_verdict(scored: &[(SynthFinding, FindingVerdict)]) -> Verdict { + let confirmed_critical = scored + .iter() + .any(|(f, v)| f.severity == Severity::Critical && !v.contested); + if confirmed_critical { + return Verdict::NeedsWork; + } + let contested_critical = scored + .iter() + .any(|(f, v)| f.severity == Severity::Critical && v.contested); + if contested_critical { + return Verdict::Discuss; + } + Verdict::Ship +} + +/// Sort key placing critical findings before minor ones. +fn severity_rank(severity: &Severity) -> u8 { + match severity { + Severity::Critical => 0, + Severity::Minor => 1, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::Category; + + fn lens_vote(contested: bool) -> LensVerdict { + LensVerdict { + contested, + reason: "because".to_string(), + } + } + + #[test] + fn aggregate_three_lens_majority() { + assert!( + aggregate_lens_votes(&[lens_vote(true), lens_vote(true), lens_vote(false)]).contested + ); + assert!( + aggregate_lens_votes(&[lens_vote(true), lens_vote(true), lens_vote(true)]).contested + ); + assert!( + !aggregate_lens_votes(&[lens_vote(true), lens_vote(false), lens_vote(false)]).contested + ); + assert!( + !aggregate_lens_votes(&[lens_vote(false), lens_vote(false), lens_vote(false)]) + .contested + ); + } + + #[test] + fn aggregate_handles_abstentions_and_empty() { + assert!(aggregate_lens_votes(&[]).contested); + assert!(aggregate_lens_votes(&[lens_vote(true), lens_vote(false)]).contested); + assert!(!aggregate_lens_votes(&[lens_vote(false), lens_vote(false)]).contested); + assert!(aggregate_lens_votes(&[lens_vote(true)]).contested); + assert!(!aggregate_lens_votes(&[lens_vote(false)]).contested); + } + + fn finding(severity: Severity) -> SynthFinding { + SynthFinding { + file: "a.rs".to_string(), + line: 1, + severity, + category: Category::Bug, + message: "m".to_string(), + sources: vec![], + } + } + + fn verdict(contested: bool) -> FindingVerdict { + FindingVerdict { + contested, + reasons: vec![], + } + } + + #[test] + fn verdict_needs_work_on_confirmed_critical() { + let scored = vec![(finding(Severity::Critical), verdict(false))]; + assert_eq!(compute_verdict(&scored), Verdict::NeedsWork); + } + + #[test] + fn verdict_discuss_when_all_criticals_contested() { + let scored = vec![ + (finding(Severity::Critical), verdict(true)), + (finding(Severity::Minor), verdict(false)), + ]; + assert_eq!(compute_verdict(&scored), Verdict::Discuss); + } + + #[test] + fn verdict_ship_without_criticals() { + let scored = vec![(finding(Severity::Minor), verdict(false))]; + assert_eq!(compute_verdict(&scored), Verdict::Ship); + assert_eq!(compute_verdict(&[]), Verdict::Ship); + } +} From 7887fdfb797bedf8f25eb6678c04cb54a931e987 Mon Sep 17 00:00:00 2001 From: Pierrick Fonquerne Date: Wed, 3 Jun 2026 16:50:10 +0200 Subject: [PATCH 6/8] main: dispatch team mode end to end Wire Mode::Team (env "team", comment marker, label) and dispatch run_team from main. Surface the lens name in verification warnings, and drop the temporary dead_code allowances now that every team-mode item is reachable. --- tools/ai-review/src/github.rs | 2 -- tools/ai-review/src/main.rs | 3 +++ tools/ai-review/src/mistral.rs | 3 --- tools/ai-review/src/review.rs | 2 -- tools/ai-review/src/team.rs | 14 ++++++++------ tools/ai-review/src/types.rs | 19 +++++-------------- 6 files changed, 16 insertions(+), 27 deletions(-) diff --git a/tools/ai-review/src/github.rs b/tools/ai-review/src/github.rs index 2fd2004..43a41e8 100644 --- a/tools/ai-review/src/github.rs +++ b/tools/ai-review/src/github.rs @@ -21,14 +21,12 @@ pub struct InlineComment { const PATCH_FALLBACK_CHARS: usize = 8_000; /// The PR diff in two shapes: a concatenated view and a per-file patch map. -#[allow(dead_code)] pub struct DiffContext { pub full: String, pub by_file: HashMap, pub file_count: usize, } -#[allow(dead_code)] impl DiffContext { /// Returns the patch to hand to a lens for `finding`, falling back to a /// truncated view of the full diff for file-level or cross-file findings. diff --git a/tools/ai-review/src/main.rs b/tools/ai-review/src/main.rs index f95009a..f97ef98 100644 --- a/tools/ai-review/src/main.rs +++ b/tools/ai-review/src/main.rs @@ -55,6 +55,9 @@ async fn main() -> anyhow::Result<()> { Mode::Describe => { run_describe(&clients, owner, repo, pr_number).await?; } + Mode::Team => { + team::run_team(&clients, owner, repo, pr_number).await?; + } } Ok(()) diff --git a/tools/ai-review/src/mistral.rs b/tools/ai-review/src/mistral.rs index e5e4ddc..3e861b4 100644 --- a/tools/ai-review/src/mistral.rs +++ b/tools/ai-review/src/mistral.rs @@ -516,7 +516,6 @@ fn render_reports_for_synthesis(reports: &[(Agent, ReviewResponse)]) -> String { } /// Runs one specialist agent over the diff. Returns `(ReviewResponse, truncated)`. -#[allow(dead_code)] pub async fn call_agent( client: &reqwest::Client, api_key: &str, @@ -534,7 +533,6 @@ pub async fn call_agent( } /// Merges the specialist agent reports into a single deduplicated [`SynthReport`]. -#[allow(dead_code)] pub async fn call_synthesis( client: &reqwest::Client, api_key: &str, @@ -569,7 +567,6 @@ pub async fn call_synthesis( } /// Runs one adversarial lens over a single finding, given the file's patch. -#[allow(dead_code)] pub async fn call_lens( client: &reqwest::Client, api_key: &str, diff --git a/tools/ai-review/src/review.rs b/tools/ai-review/src/review.rs index bf18075..c6b5e49 100644 --- a/tools/ai-review/src/review.rs +++ b/tools/ai-review/src/review.rs @@ -70,7 +70,6 @@ pub fn has_bot_marker(body: &str, marker: &str) -> bool { } /// Input bundle for [`render_team_comment`], grouped to keep the argument count low. -#[allow(dead_code)] pub struct TeamCommentView<'a> { pub executive_summary: &'a str, pub strengths: &'a [String], @@ -86,7 +85,6 @@ pub struct TeamCommentView<'a> { } /// Renders the team-mode global comment (with hidden upsert marker). -#[allow(dead_code)] #[must_use] pub fn render_team_comment(view: &TeamCommentView) -> String { let mut md = "\n## Team Review IA\n\n".to_string(); diff --git a/tools/ai-review/src/team.rs b/tools/ai-review/src/team.rs index b30ea85..7f9d81e 100644 --- a/tools/ai-review/src/team.rs +++ b/tools/ai-review/src/team.rs @@ -1,5 +1,4 @@ #![allow(clippy::missing_errors_doc)] -#![allow(dead_code)] use std::sync::Arc; @@ -160,7 +159,7 @@ async fn verify_findings( findings: &[SynthFinding], ) -> Vec { let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_CALLS)); - let mut set: JoinSet<(usize, anyhow::Result)> = JoinSet::new(); + let mut set: JoinSet<(usize, Lens, anyhow::Result)> = JoinSet::new(); for (idx, finding) in findings.iter().enumerate() { let patch = ctx.patch_for(finding).to_owned(); @@ -174,7 +173,7 @@ async fn verify_findings( set.spawn(async move { let _permit = permit_source.acquire_owned().await.ok(); let result = mistral::call_lens(&client, &key, lens, &file, &message, &patch).await; - (idx, result) + (idx, lens, result) }); } } @@ -182,9 +181,12 @@ async fn verify_findings( let mut votes: Vec> = (0..findings.len()).map(|_| Vec::new()).collect(); while let Some(joined) = set.join_next().await { match joined { - Ok((idx, Ok(verdict))) => votes[idx].push(verdict), - Ok((idx, Err(error))) => { - eprintln!("warning: a lens failed for finding {idx}: {error}"); + Ok((idx, _, Ok(verdict))) => votes[idx].push(verdict), + Ok((idx, lens, Err(error))) => { + eprintln!( + "warning: {} lens failed for finding {idx}: {error}", + lens.label() + ); } Err(error) => eprintln!("warning: a lens task did not complete: {error}"), } diff --git a/tools/ai-review/src/types.rs b/tools/ai-review/src/types.rs index 0bef408..345968e 100644 --- a/tools/ai-review/src/types.rs +++ b/tools/ai-review/src/types.rs @@ -23,7 +23,6 @@ pub struct ReviewResponse { pub security: String, } -#[allow(dead_code)] pub enum Mode { Review, Describe, @@ -31,9 +30,9 @@ pub enum Mode { Architecture, Performance, Product, + Team, } -#[allow(dead_code)] impl Mode { pub fn from_env() -> anyhow::Result { match std::env::var("AI_MODE").as_deref() { @@ -43,13 +42,14 @@ impl Mode { Ok("architecture") => Ok(Self::Architecture), Ok("performance") => Ok(Self::Performance), Ok("product") => Ok(Self::Product), + Ok("team") => Ok(Self::Team), Ok(other) => anyhow::bail!("unknown AI_MODE: {other}"), } } pub fn mistral_model(&self) -> &'static str { match self { - Self::Review | Self::Security | Self::Architecture | Self::Performance => { + Self::Review | Self::Security | Self::Architecture | Self::Performance | Self::Team => { "codestral-latest" } Self::Describe | Self::Product => "mistral-small-latest", @@ -63,6 +63,7 @@ impl Mode { Self::Architecture => "", Self::Performance => "", Self::Product => "", + Self::Team => "", Self::Describe => "", } } @@ -74,6 +75,7 @@ impl Mode { Self::Architecture => "Architecture Review", Self::Performance => "Performance Review", Self::Product => "Product Review", + Self::Team => "Team Review", Self::Describe => "Description", } } @@ -81,7 +83,6 @@ impl Mode { /// One of the four specialised review angles run in parallel in team mode. #[derive(Debug, Clone, Copy)] -#[allow(dead_code)] pub enum Agent { Correctness, Security, @@ -89,7 +90,6 @@ pub enum Agent { Performance, } -#[allow(dead_code)] impl Agent { /// Returns the human-readable label for this agent. pub fn label(self) -> &'static str { @@ -104,14 +104,12 @@ impl Agent { /// One adversarial lens applied to a synthesised finding during verification. #[derive(Debug, Clone, Copy)] -#[allow(dead_code)] pub enum Lens { CodeConfirms, RealImpact, FalsePositive, } -#[allow(dead_code)] impl Lens { /// Returns the human-readable label for this lens. pub fn label(self) -> &'static str { @@ -126,7 +124,6 @@ impl Lens { /// The kind of issue a synthesised finding describes. #[derive(Debug, Deserialize, Clone, Copy, PartialEq)] #[serde(rename_all = "kebab-case")] -#[allow(dead_code)] pub enum Category { Bug, Security, @@ -137,7 +134,6 @@ pub enum Category { Other, } -#[allow(dead_code)] impl Category { /// Returns the human-readable label for this category. pub fn label(self) -> &'static str { @@ -154,7 +150,6 @@ impl Category { /// A finding merged by the synthesis agent, tracking which agents raised it. #[derive(Debug, Deserialize, Clone)] -#[allow(dead_code)] pub struct SynthFinding { pub file: String, #[serde(default)] @@ -170,7 +165,6 @@ pub struct SynthFinding { /// Output of the synthesis agent: a merged, deduplicated cross-agent review. #[derive(Debug, Deserialize)] -#[allow(dead_code)] pub struct SynthReport { pub executive_summary: String, #[serde(default)] @@ -181,7 +175,6 @@ pub struct SynthReport { /// Verdict returned by one adversarial lens for a single finding. #[derive(Debug, Deserialize, Clone)] -#[allow(dead_code)] pub struct LensVerdict { #[serde(deserialize_with = "de_bool_loose")] pub contested: bool, @@ -190,7 +183,6 @@ pub struct LensVerdict { /// Aggregated verdict for one finding after the multi-lens vote. #[derive(Debug, Clone)] -#[allow(dead_code)] pub struct FindingVerdict { pub contested: bool, pub reasons: Vec, @@ -198,7 +190,6 @@ pub struct FindingVerdict { /// Final overall verdict computed deterministically after verification. #[derive(Debug, Clone, Copy, PartialEq)] -#[allow(dead_code)] pub enum Verdict { Ship, NeedsWork, From 30eb9f89037274a72333e2b0da817e4ace9db57f Mon Sep 17 00:00:00 2001 From: Pierrick Fonquerne Date: Wed, 3 Jun 2026 17:13:40 +0200 Subject: [PATCH 7/8] docs: document ai-review modes and the ai:team trigger List the seven review modes and their models, and show how a caller workflow invokes the reusable workflow, including the on-demand team mode gated by the ai:team label. --- README.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/README.md b/README.md index 4d27e28..4972078 100644 --- a/README.md +++ b/README.md @@ -6,3 +6,52 @@ Organization-level tooling for nubster-opensources. Rust binary used in reusable GitHub Actions workflows to review pull requests and generate PR descriptions using the Mistral API. + +### Modes + +The mode is selected with the `AI_MODE` environment variable, which the +reusable workflow forwards from its `mode` input. + +| Mode | What it does | Model | +| --- | --- | --- | +| `review` | General code review (bugs, logic, security summary) | codestral | +| `security` | Security-focused audit | codestral | +| `architecture` | Architecture and design review | codestral | +| `performance` | Performance review | codestral | +| `product` | Product, compliance and developer-experience review | mistral-small | +| `describe` | Fills an empty PR description from the diff | mistral-small | +| `team` | Multi-agent review: four specialist agents run in parallel, a synthesis step merges and deduplicates their findings, then every finding is checked by a three-lens adversarial vote before a deterministic verdict | codestral + mistral-large | + +### Calling the reusable workflow + +Default per-PR review: + +```yaml +jobs: + review: + uses: nubster-opensources/.github/.github/workflows/ai-review.yml@main + with: + pr-number: ${{ github.event.pull_request.number }} + mode: review + secrets: + mistral-api-key: ${{ secrets.MISTRAL_API_KEY }} +``` + +Team mode is heavier, so trigger it on demand by adding the `ai:team` label to a +pull request: + +```yaml +on: + pull_request: + types: [opened, synchronize, reopened, labeled] + +jobs: + team: + if: contains(github.event.pull_request.labels.*.name, 'ai:team') + uses: nubster-opensources/.github/.github/workflows/ai-review.yml@main + with: + pr-number: ${{ github.event.pull_request.number }} + mode: team + secrets: + mistral-api-key: ${{ secrets.MISTRAL_API_KEY }} +``` From ca8460f34b26f6be8cf452f4ea5be7229b0de166 Mon Sep 17 00:00:00 2001 From: Pierrick Fonquerne Date: Wed, 3 Jun 2026 17:14:59 +0200 Subject: [PATCH 8/8] review: align team finding separator with the existing convention --- tools/ai-review/src/review.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ai-review/src/review.rs b/tools/ai-review/src/review.rs index c6b5e49..001a4b2 100644 --- a/tools/ai-review/src/review.rs +++ b/tools/ai-review/src/review.rs @@ -177,7 +177,7 @@ fn render_finding_line(f: &SynthFinding) -> String { format!(" _(via {})_", f.sources.join(", ")) }; format!( - "- {sev} `{}` **{}** — {}{}", + "- {sev} `{}` **{}** : {}{}", f.category.label(), location, f.message,