diff --git a/Cargo.lock b/Cargo.lock index 9f80a035..397c80bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,7 @@ dependencies = [ "cfg-if", "getrandom 0.3.4", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -559,7 +560,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" dependencies = [ "memchr", - "regex-automata 0.4.14", + "regex-automata", "serde", ] @@ -1011,9 +1012,9 @@ dependencies = [ ] [[package]] -name = "dd-sds" -version = "0.1.2" -source = "git+https://github.com/DataDog/dd-sensitive-data-scanner.git?rev=51fc3d11b6b24c9b1b1d74f8285b47c3d7faa253#51fc3d11b6b24c9b1b1d74f8285b47c3d7faa253" +name = "dd-sensitive-data-scanner" +version = "0.0.0" +source = "git+https://github.com/DataDog/dd-sensitive-data-scanner.git?rev=b2dca51b27a87ecb2d847ad4167a0537afca1972#b2dca51b27a87ecb2d847ad4167a0537afca1972" dependencies = [ "ahash", "aws-sign-v4", @@ -1023,7 +1024,7 @@ dependencies = [ "chrono", "crc32fast", "ethaddr", - "farmhash", + "farmhash2", "futures", "iban_validate", "iso_iec_7064", @@ -1037,8 +1038,8 @@ dependencies = [ "once_cell", "rayon", "regex", - "regex-automata 0.4.14", - "regex-automata 0.4.9", + "regex-automata", + "regex-pool", "regex-syntax 0.7.5", "reqwest", "serde", @@ -1410,9 +1411,10 @@ dependencies = [ ] [[package]] -name = "farmhash" +name = "farmhash2" version = "1.1.5" -source = "git+https://github.com/fuchsnj/rust-farmhash?rev=82d80b689d65fbd378b13deff10cdd07794df64e#82d80b689d65fbd378b13deff10cdd07794df64e" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c4e203d1ec89aa6b1c81b7c14ef14f1a985e2eee5e452c5c1abf66ccee9d2" [[package]] name = "fastrand" @@ -1719,7 +1721,7 @@ dependencies = [ "aho-corasick", "bstr", "log", - "regex-automata 0.4.14", + "regex-automata", "regex-syntax 0.8.10", ] @@ -2455,7 +2457,7 @@ version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" dependencies = [ - "regex-automata 0.4.14", + "regex-automata", ] [[package]] @@ -2614,7 +2616,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" dependencies = [ - "regex-automata 0.4.14", + "regex-automata", ] [[package]] @@ -3531,20 +3533,10 @@ checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.14", + "regex-automata", "regex-syntax 0.8.10", ] -[[package]] -name = "regex-automata" -version = "0.4.9" -source = "git+https://github.com/fbryden/regex?rev=6952250af962ca3e364da47382b16dba9c703431#6952250af962ca3e364da47382b16dba9c703431" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax 0.8.5", -] - [[package]] name = "regex-automata" version = "0.4.14" @@ -3557,15 +3549,16 @@ dependencies = [ ] [[package]] -name = "regex-syntax" -version = "0.7.5" +name = "regex-pool" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +checksum = "e9d1c4e093793ea93da49df43979b94c6b73d94c00baba346eeda9f58e53e8e5" [[package]] name = "regex-syntax" -version = "0.8.5" -source = "git+https://github.com/fbryden/regex?rev=6952250af962ca3e364da47382b16dba9c703431#6952250af962ca3e364da47382b16dba9c703431" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "regex-syntax" @@ -3966,7 +3959,7 @@ version = "0.8.7" dependencies = [ "anyhow", "common", - "dd-sds", + "dd-sensitive-data-scanner", "futures", "httpmock", "itertools 0.14.0", @@ -4954,7 +4947,7 @@ dependencies = [ "matchers", "nu-ansi-term", "once_cell", - "regex-automata 0.4.14", + "regex-automata", "serde", "serde_json", "sharded-slab", diff --git a/crates/secrets/Cargo.toml b/crates/secrets/Cargo.toml index d3ad109d..145a0a22 100644 --- a/crates/secrets/Cargo.toml +++ b/crates/secrets/Cargo.toml @@ -15,7 +15,7 @@ futures = "0.3.31" lazy_static = "1.5.0" # remote -dd-sds = { git = "https://github.com/DataDog/dd-sensitive-data-scanner.git", rev = "51fc3d11b6b24c9b1b1d74f8285b47c3d7faa253" } +dd-sds = { package = "dd-sensitive-data-scanner", git = "https://github.com/DataDog/dd-sensitive-data-scanner.git", rev = "b2dca51b27a87ecb2d847ad4167a0537afca1972" } strum = "0.25.0" [dev-dependencies] diff --git a/crates/secrets/src/scanner.rs b/crates/secrets/src/scanner.rs index c13962c8..3990c704 100644 --- a/crates/secrets/src/scanner.rs +++ b/crates/secrets/src/scanner.rs @@ -9,7 +9,7 @@ use anyhow::Error; use common::analysis_options::AnalysisOptions; use common::model::position::Position; use common::utils::position_utils::get_position_in_string; -use dd_sds::{RootRuleConfig, RuleConfig, Scanner}; +use dd_sds::{RootRuleConfig, RuleConfig, ScanOptionBuilder, Scanner}; use itertools::Itertools; use std::sync::Arc; @@ -48,19 +48,11 @@ pub fn find_secrets( let mut codemut = code.to_owned(); - // NOTE(incident-56036): use the non-validating `scan()` entrypoint and call - // `validate_matches()` as a separate step, rather than `scan_with_options()` with - // `with_validate_matching`. The latter triggered a stack overflow inside dd-sds when - // scanning very large monorepos (see prod incident 56036). - // - // The two paths are equivalent for plain validators. They differ for the new - // `is_supporting_rule` HTTP match-pairing feature: cross-rule template-variable - // resolution (a supporting rule's match feeding `$PARAM` placeholders in a primary - // rule's validator URL) only fires inside `scan_with_options`. With this workaround - // those primary-rule matches surface as `NotValidated` instead of `Valid` — see the - // ignored `test_supporting_rule_excluded_from_output_but_used_for_match_pairing` test. - // Revert once dd-sds is patched. - let mut matches = match scanner.scan(&mut codemut) { + let scan_options = ScanOptionBuilder::new() + .with_validate_matching(!options.disable_validation) + .build(); + + let matches = match scanner.scan_with_options(&mut codemut, scan_options) { Ok(m) => m, Err(e) => { if options.use_debug { @@ -77,10 +69,6 @@ pub fn find_secrets( return vec![]; } - if !options.disable_validation { - scanner.validate_matches(&mut matches); - } - matches .iter() .flat_map(|sds_match| { @@ -370,14 +358,7 @@ mod tests { /// A supporting rule's match must be excluded from `find_secrets` output, but its value /// must still be used to populate template variables for the main rule's HTTP validation call. - /// - /// IGNORED (incident-56036): cross-rule template-variable resolution only happens inside - /// `Scanner::scan_with_options(validate_matches=true)`. That call triggers a stack overflow - /// in dd-sds on large repos in prod, so `find_secrets` now uses `scan()` + `validate_matches()` - /// instead; the latter does not propagate `provides` parameters between rules, so this - /// scenario validates as `NotValidated` rather than `Valid`. Re-enable once dd-sds is patched. #[test] - #[ignore = "regressed by incident-56036 stack-overflow workaround; re-enable after dd-sds fix"] fn test_supporting_rule_excluded_from_output_but_used_for_match_pairing() { use httpmock::Method::GET; use httpmock::MockServer;