Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- stop flagging a false-positive ONNX Python operator when tensor weight bytes coincidentally spell `PyOp`
- detect Python operators declared in nested ONNX graphs and functions
- distinguish ASCII-serialized Torch7 artifacts from plain PyTorch source text
- detect and scan signature-valid CNTK and LightGBM payloads even when renamed with misleading suffixes
- detect and scan signature-valid RKNN, TFLite, and ExecuTorch payloads when renamed with misleading suffixes, and classify unavailable ExecuTorch reads as inconclusive

## [0.2.45](https://github.com/promptfoo/modelaudit/compare/v0.2.44...v0.2.45) (2026-05-03)

Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,19 @@ ModelAudit includes 44 registered scanners covering model, archive, and configur
| **CoreML** | `.mlmodel` | LOW |
| **MXNet** | `*-symbol.json`, `*-NNNN.params` | LOW |
| **NeMo** | `.nemo` | MEDIUM |
| **CNTK** | `.dnn`, `.cmf` | MEDIUM |
| **RKNN** | `.rknn` | MEDIUM |
| **CNTK** | `.dnn`, `.cmf`, signature-valid renamed artifacts | MEDIUM |
| **RKNN** | `.rknn`, signature-valid renamed artifacts | MEDIUM |
| **Torch7** | `.t7`, `.th`, `.net` | HIGH |
| **CatBoost** | `.cbm` | MEDIUM |
| **XGBoost** | `.bst`, `.model`, `.json`, `.ubj` | MEDIUM |
| **LightGBM** | `.lgb`, `.lightgbm`, `.model` | MEDIUM |
| **LightGBM** | `.lgb`, `.lightgbm`, `.model`, signature-valid renamed artifacts | MEDIUM |
| **Llamafile** | `.llamafile`, extensionless, `.exe` | MEDIUM |
| **TorchServe** | `.mar` | HIGH |
| **SafeTensors** | `.safetensors` | LOW |
| **GGUF/GGML** | `.gguf`, `.ggml`, `.ggmf`, `.ggjt`, `.ggla`, `.ggsa` | LOW |
| **JAX/Flax** | `.msgpack`, `.flax`, `.orbax`, `.jax`, `.checkpoint`, `.orbax-checkpoint` | LOW |
| **TFLite** | `.tflite` | LOW |
| **ExecuTorch** | `.ptl`, `.pte` | LOW |
| **TFLite** | `.tflite`, signature-valid renamed artifacts | LOW |
| **ExecuTorch** | `.ptl`, `.pte`, signature-valid standalone renamed artifacts | LOW |
| **TensorRT** | `.engine`, `.plan`, `.trt` | LOW |
| **PaddlePaddle** | `.pdmodel`, `.pdiparams` | LOW |
| **OpenVINO** | `.xml` | LOW |
Expand Down
65 changes: 33 additions & 32 deletions docs/user/compatibility-matrix.md

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions modelaudit/scanners/cntk_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
]

_CNTK_SUPPORTED_EXTENSIONS = frozenset({".dnn", ".cmf"})
_CNTK_CANDIDATE_EXTENSIONS = frozenset({".dnn", ".cmf", ".model"})
_CNTK_EXCLUDED_EXTENSIONS = frozenset({".model"})

_MAX_SIGNATURE_BYTES = 4096
_MAX_SCAN_BYTES = 10 * 1024 * 1024 # 10MB parser budget per file
Expand Down Expand Up @@ -125,17 +125,15 @@ def _has_cntkv2_structure_markers(prefix: bytes) -> bool:


def _detect_cntk_variant(prefix: bytes, extension: str) -> tuple[str, str]:
if extension not in _CNTK_CANDIDATE_EXTENSIONS:
return "not_cntk", "extension_not_cntk_candidate"
if extension in _CNTK_EXCLUDED_EXTENSIONS:
return "unsupported_cntk_variant", "model_extension_excluded_for_xgboost_overlap"

if prefix.startswith(_CNTK_LEGACY_MAGIC):
if _CNTK_LEGACY_VERSION_MARKER in prefix:
return "legacy_v1", "legacy_bcn_and_bversion_markers"
return "unsupported_cntk_variant", "legacy_marker_without_bversion_marker"

if _has_cntkv2_core_markers(prefix):
if extension == ".model":
return "unsupported_cntk_variant", "cntkv2_model_extension_deferred_v1"
if _has_cntkv2_structure_markers(prefix):
return "cntk_v2", "protobuf_core_and_structure_markers"
return "unsupported_cntk_variant", "protobuf_core_markers_without_structure_markers"
Expand Down Expand Up @@ -267,7 +265,7 @@ class CntkScanner(BaseScanner):
"""Scanner for CNTK model files with strict format detection."""

name = "cntk"
description = "Scans CNTK .dnn/.cmf model artifacts for load-time execution indicators"
description = "Scans signature-validated CNTK model artifacts for load-time execution indicators"
supported_extensions: ClassVar[list[str]] = [".dnn", ".cmf"]

@classmethod
Expand All @@ -276,7 +274,7 @@ def can_handle(cls, path: str) -> bool:
return False

extension = os.path.splitext(path)[1].lower()
if extension not in _CNTK_SUPPORTED_EXTENSIONS:
if extension in _CNTK_EXCLUDED_EXTENSIONS:
return False

prefix = _read_prefix(path)
Expand Down Expand Up @@ -310,14 +308,16 @@ def scan(self, path: str) -> ScanResult:
passed=False,
message=(
"Unsupported or out-of-scope CNTK variant detected. "
"Current scanner supports only signature-backed .dnn/.cmf variants."
"The scanner supports signature-backed CNTK artifacts but excludes .model "
"because that extension overlaps with XGBoost."
),
severity=IssueSeverity.INFO,
location=path,
details={
"variant": variant,
"reason": variant_reason,
"supported_extensions": sorted(_CNTK_SUPPORTED_EXTENSIONS),
"excluded_extensions": sorted(_CNTK_EXCLUDED_EXTENSIONS),
},
)
result.finish(success=False)
Expand Down
52 changes: 42 additions & 10 deletions modelaudit/scanners/executorch_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import zipfile
from typing import Any, BinaryIO, ClassVar, cast

from ..scanner_results import mark_inconclusive_scan_result
from ..scanner_selection import add_scanner_selection_skip_check, embedded_pickle_scanner
from ..utils import sanitize_archive_path
from ..utils.file.detection import (
Expand Down Expand Up @@ -37,15 +38,38 @@ def can_handle(cls, path: str) -> bool:
ext = os.path.splitext(path)[1].lower()
if ext in cls.supported_extensions:
return True
return is_executorch_archive(path)
try:
header = cls._read_header(path, length=8)
except OSError:
return False
return (_is_executorch_binary_signature(header) and _is_valid_executorch_binary(path)) or is_executorch_archive(
path
)

@staticmethod
def _read_header(path: str, length: int = 4) -> bytes:
try:
with open(path, "rb") as f:
return f.read(length)
except Exception:
return b""
with open(path, "rb") as f:
return f.read(length)

@staticmethod
def _finish_read_failure(result: ScanResult, path: str, exc: OSError) -> ScanResult:
mark_inconclusive_scan_result(result, "executorch_read_failed")
result.add_check(
name="ExecuTorch File Read",
passed=False,
message=f"Unable to read ExecuTorch content: {exc!s}",
severity=IssueSeverity.INFO,
location=path,
details={
"exception": str(exc),
"exception_type": type(exc).__name__,
"analysis_incomplete": True,
"scan_outcome_reason": "executorch_read_failed",
},
rule_code="S902",
)
result.finish(success=False)
return result

def scan(self, path: str) -> ScanResult:
path_check_result = self._check_path(path)
Expand All @@ -60,8 +84,14 @@ def scan(self, path: str) -> ScanResult:
file_size = self.get_file_size(path)
result.metadata["file_size"] = file_size

header = self._read_header(path, length=8)
valid_binary_program = _is_executorch_binary_signature(header) and _is_valid_executorch_binary(path)
try:
header = self._read_header(path, length=8)
valid_binary_program = _is_executorch_binary_signature(header) and _is_valid_executorch_binary(
path,
propagate_io_errors=True,
)
except OSError as exc:
return self._finish_read_failure(result, path, exc)
if valid_binary_program:
result.add_check(
name="ExecuTorch Binary Format Validation",
Expand All @@ -75,8 +105,8 @@ def scan(self, path: str) -> ScanResult:
if valid_binary_program and not should_scan_archive:
try:
should_scan_archive = zipfile.is_zipfile(path)
except OSError:
should_scan_archive = False
except OSError as exc:
return self._finish_read_failure(result, path, exc)

if valid_binary_program and not should_scan_archive:
result.bytes_scanned = file_size
Expand Down Expand Up @@ -176,6 +206,8 @@ def scan(self, path: str) -> ScanResult:
)
result.finish(success=False)
return result
except OSError as exc:
return self._finish_read_failure(result, path, exc)
except Exception as e: # pragma: no cover - unexpected errors
result.add_check(
name="ExecuTorch File Scan",
Expand Down
3 changes: 0 additions & 3 deletions modelaudit/scanners/lightgbm_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,6 @@ def can_handle(cls, path: str) -> bool:
if not os.path.isfile(path):
return False

if os.path.splitext(path)[1].lower() not in cls.supported_extensions:
return False

try:
with open(path, "rb") as file_obj:
preview = file_obj.read(cls._SIGNATURE_READ_BYTES)
Expand Down
7 changes: 2 additions & 5 deletions modelaudit/scanners/rknn_scanner.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
"""Scanner for Rockchip RKNN model artifacts (.rknn)."""
"""Scanner for Rockchip RKNN model artifacts."""

from __future__ import annotations

import ipaddress
import os
import re
from pathlib import Path
from typing import Any, ClassVar

from ..scanner_results import INCONCLUSIVE_SCAN_OUTCOME, mark_inconclusive_scan_result
Expand Down Expand Up @@ -70,7 +69,7 @@ class RknnScanner(BaseScanner):
"""Static scanner for RKNN models."""

name = "rknn"
description = "Scans RKNN .rknn model files for suspicious metadata references and command/network indicators"
description = "Scans RKNN model files for suspicious metadata references and command/network indicators"
supported_extensions: ClassVar[list[str]] = [".rknn"]

def __init__(self, config: dict[str, Any] | None = None) -> None:
Expand All @@ -86,8 +85,6 @@ def _has_rknn_signature(prefix: bytes) -> bool:
def can_handle(cls, path: str) -> bool:
if not os.path.isfile(path):
return False
if Path(path).suffix.lower() not in cls.supported_extensions:
return False

try:
file_size = os.path.getsize(path)
Expand Down
32 changes: 26 additions & 6 deletions modelaudit/utils/file/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ def _is_executorch_binary_signature(prefix: bytes) -> bool:
return len(prefix) >= 8 and prefix[4:6] == b"ET" and prefix[6:8].isdigit()


def _is_valid_executorch_binary(path: str | Path) -> bool:
def _is_valid_executorch_binary(path: str | Path, *, propagate_io_errors: bool = False) -> bool:
"""Validate the minimal FlatBuffers structure for ExecuTorch binaries."""
file_path = Path(path)
if not file_path.is_file():
Expand Down Expand Up @@ -1195,7 +1195,11 @@ def _is_valid_executorch_binary(path: str | Path) -> bool:
return False
if root_table_offset + object_size > file_size:
return False
except (OSError, struct.error):
except OSError:
if propagate_io_errors:
raise
return False
except struct.error:
return False

return True
Expand Down Expand Up @@ -1320,7 +1324,7 @@ def detect_file_format_from_magic(path: str) -> str:
# Use bounded signature markers for deterministic identification.
f.seek(0)
cntk_prefix = f.read(_CNTK_SIGNATURE_READ_BYTES)
if _is_cntk_signature(cntk_prefix):
if file_path.suffix.lower() != ".model" and _is_cntk_signature(cntk_prefix):
return "cntk"

f.seek(0)
Expand Down Expand Up @@ -1426,10 +1430,16 @@ def detect_file_format_for_skip_filter(path: str) -> str:
if format_result != "unknown":
return format_result

cntk_probe_size = min(size, _CNTK_SIGNATURE_READ_BYTES)
if len(prefix) < cntk_probe_size:
prefix += f.read(cntk_probe_size - len(prefix))
if file_path.suffix.lower() != ".model" and _is_cntk_signature(prefix[:cntk_probe_size]):
return "cntk"

lightgbm_probe_size = min(size, _LIGHTGBM_SIGNATURE_READ_BYTES)
if len(prefix) < lightgbm_probe_size:
prefix += f.read(lightgbm_probe_size - len(prefix))
if _is_lightgbm_signature(prefix):
if _is_lightgbm_signature(prefix[:lightgbm_probe_size]):
return "lightgbm"

if _could_start_proto0_or_1_pickle(prefix):
Expand Down Expand Up @@ -1505,6 +1515,12 @@ def detect_file_format(path: str) -> str:
return "gguf"
if magic4 in GGML_MAGIC_VARIANTS:
return "ggml"
if _looks_like_tflite_header(magic8):
return "tflite"
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
if _is_executorch_binary_signature(magic8) and _is_valid_executorch_binary(file_path):
return "executorch"
if magic4 == b"RKNN":
return "rknn"
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated

ext = file_path.suffix.lower()
filename_lower = file_path.name.lower()
Expand Down Expand Up @@ -1554,11 +1570,15 @@ def detect_file_format(path: str) -> str:
if xml_format != "unknown":
return xml_format

signature_prefix = read_magic_bytes(path, max(_CNTK_SIGNATURE_READ_BYTES, _LIGHTGBM_SIGNATURE_READ_BYTES))
if ext != ".model" and _is_cntk_signature(signature_prefix[:_CNTK_SIGNATURE_READ_BYTES]):
return "cntk"
if _is_lightgbm_signature(signature_prefix[:_LIGHTGBM_SIGNATURE_READ_BYTES]):
return "lightgbm"

# For .bin files, do more sophisticated detection
if ext == ".bin":
magic64 = read_magic_bytes(path, 64)
if _looks_like_tflite_header(magic8):
return "tflite"
# IMPORTANT: Check ZIP format first (PyTorch models saved with torch.save())
if _has_zip_magic(magic4):
return "zip"
Expand Down
12 changes: 12 additions & 0 deletions tests/scanners/test_cntk_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,18 @@ def test_cntk_scanner_can_handle_cntkv2_signature(tmp_path: Path) -> None:
assert CntkScanner.can_handle(str(path))


def test_cntk_scanner_can_handle_signature_with_misleading_suffix(tmp_path: Path) -> None:
path = tmp_path / "renamed.jpg"
_write_cntkv2(path, payload=b" inputs outputs ")
assert CntkScanner.can_handle(str(path))


def test_cntk_scanner_rejects_renamed_structure_near_match(tmp_path: Path) -> None:
path = tmp_path / "near_match.jpg"
_write_cntkv2(path, payload=b" inputs outputs ", include_structure=False)
assert not CntkScanner.can_handle(str(path))


def test_cntk_scanner_rejects_misnamed_non_cntk_file(tmp_path: Path) -> None:
path = tmp_path / "not_cntk.dnn"
path.write_text("plain text that should not match CNTK signatures")
Expand Down
Loading
Loading