From 30c56cf78e579892485accd093c9435ccf82b78b Mon Sep 17 00:00:00 2001 From: zhuque Date: Fri, 10 Apr 2026 17:25:19 +0800 Subject: [PATCH 1/2] feat(system): add data auto-sync API for data/ directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two new endpoints under /api/v1/system/: POST /api/v1/system/update-data Downloads the GitHub archive (branch or tag) and overwrites the configured data/ sub-directories (fingerprints, vuln, vuln_en, mcp, eval, agents). Runs asynchronously; supports optional github_token to avoid rate-limiting and per-call dir selection. GET /api/v1/system/update-status Returns current / last sync status: running flag, success bool, started_at / finished_at timestamps, file count, message. Implementation: - common/websocket/update_api.go — handler + sync logic - common/websocket/update_api_test.go — unit tests (zip extract, dir filter) - common/websocket/server.go — route registration under /system group - docs/api_data_update.md — full API documentation with examples --- common/websocket/server.go | 8 + common/websocket/update_api.go | 364 ++++++++++++++++++++++++++++ common/websocket/update_api_test.go | 185 ++++++++++++++ docs/api_data_update.md | 170 +++++++++++++ 4 files changed, 727 insertions(+) create mode 100644 common/websocket/update_api.go create mode 100644 common/websocket/update_api_test.go create mode 100644 docs/api_data_update.md diff --git a/common/websocket/server.go b/common/websocket/server.go index 9d973dbb..d16217fa 100644 --- a/common/websocket/server.go +++ b/common/websocket/server.go @@ -303,6 +303,14 @@ func RunWebServer(options *version.Options) { "changelog": string(data), }) }) + + // system — data directory auto-sync + system := v1.Group("/system") + system.Use(setupIdentityMiddleware()) + { + system.POST("/update-data", HandleTriggerDataUpdate) + system.GET("/update-status", HandleGetUpdateStatus) + } } // Swagger UI - 必须在 NoRoute 之前注册 diff --git a/common/websocket/update_api.go b/common/websocket/update_api.go new file mode 100644 index 00000000..40e598bd --- /dev/null +++ b/common/websocket/update_api.go @@ -0,0 +1,364 @@ +// Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Requirement: Any integration or derivative work must explicitly attribute +// Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its +// documentation or user interface, as detailed in the NOTICE file. + +// Package websocket provides the HTTP API handlers for the AIG web server. +package websocket + +import ( + "archive/zip" + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "github.com/gin-gonic/gin" +) + +// --------------------------------------------------------------------------- +// Constants & package-level state +// --------------------------------------------------------------------------- + +const ( + defaultGitHubRepo = "Tencent/AI-Infra-Guard" + defaultGitHubBranch = "main" + githubZipURLFmt = "https://codeload.github.com/%s/zip/refs/heads/%s" + githubTagZipURLFmt = "https://codeload.github.com/%s/zip/refs/tags/%s" + + // dataDirs lists the sub-directories inside data/ that are synced. + // Callers may override via UpdateDataRequest.Dirs. + dataDirsDefault = "fingerprints,vuln,vuln_en,mcp,eval,agents" +) + +// UpdateStatus holds the current state of a data-sync operation. +type UpdateStatus struct { + Running bool `json:"running"` + Success *bool `json:"success,omitempty"` + StartedAt time.Time `json:"started_at,omitempty"` + FinishedAt *time.Time `json:"finished_at,omitempty"` + Message string `json:"message"` + // FilesUpdated is the number of files written to disk. + FilesUpdated int `json:"files_updated"` + // Ref is the branch or tag that was used. + Ref string `json:"ref,omitempty"` +} + +var ( + updateMu sync.Mutex + updateStatus = &UpdateStatus{Message: "idle"} +) + +// --------------------------------------------------------------------------- +// Request / Response types +// --------------------------------------------------------------------------- + +// UpdateDataRequest is the JSON body for POST /api/v1/system/update-data. +// +// { +// "ref": "main", // branch or tag, default: "main" +// "is_tag": false, // set true when ref is a tag +// "github_token": "", // optional, avoids GitHub rate-limit (60 req/h anon) +// "dirs": "fingerprints,vuln,vuln_en,mcp,eval,agents" // optional +// } +type UpdateDataRequest struct { + Ref string `json:"ref"` + IsTag bool `json:"is_tag"` + GithubToken string `json:"github_token"` + Dirs string `json:"dirs"` +} + +// --------------------------------------------------------------------------- +// Handlers +// --------------------------------------------------------------------------- + +// HandleGetUpdateStatus godoc +// +// @Summary Get data-sync status +// @Description Returns the current (or last) status of the automatic data directory sync. +// @Tags system +// @Produce json +// @Success 200 {object} UpdateStatus +// @Router /api/v1/system/update-status [get] +func HandleGetUpdateStatus(c *gin.Context) { + updateMu.Lock() + snap := *updateStatus + updateMu.Unlock() + c.JSON(http.StatusOK, snap) +} + +// HandleTriggerDataUpdate godoc +// +// @Summary Trigger data directory sync from GitHub +// @Description Downloads the repository archive from GitHub and overwrites the local +// @Description data/ sub-directories (fingerprints, vuln, vuln_en, mcp, eval, agents). +// @Description The operation runs asynchronously; poll GET /api/v1/system/update-status +// @Description for progress. Only one sync may run at a time. +// @Tags system +// @Accept json +// @Produce json +// @Param body body UpdateDataRequest false "Sync options" +// @Success 202 {object} UpdateStatus "Sync started" +// @Success 200 {object} UpdateStatus "Already running" +// @Failure 500 {object} map[string]string "Internal error" +// @Router /api/v1/system/update-data [post] +func HandleTriggerDataUpdate(c *gin.Context) { + var req UpdateDataRequest + // allow empty body + _ = c.ShouldBindJSON(&req) + + if req.Ref == "" { + req.Ref = defaultGitHubBranch + } + if req.Dirs == "" { + req.Dirs = dataDirsDefault + } + + updateMu.Lock() + if updateStatus.Running { + snap := *updateStatus + updateMu.Unlock() + c.JSON(http.StatusOK, snap) + return + } + updateStatus = &UpdateStatus{ + Running: true, + StartedAt: time.Now(), + Message: "downloading archive from GitHub…", + Ref: req.Ref, + } + updateMu.Unlock() + + go runDataUpdate(req) + + updateMu.Lock() + snap := *updateStatus + updateMu.Unlock() + c.JSON(http.StatusAccepted, snap) +} + +// --------------------------------------------------------------------------- +// Core sync logic +// --------------------------------------------------------------------------- + +func runDataUpdate(req UpdateDataRequest) { + setStatus := func(msg string, filesUpdated int) { + updateMu.Lock() + updateStatus.Message = msg + updateStatus.FilesUpdated = filesUpdated + updateMu.Unlock() + } + + finish := func(success bool, msg string, filesUpdated int) { + now := time.Now() + updateMu.Lock() + b := success + updateStatus.Running = false + updateStatus.Success = &b + updateStatus.FinishedAt = &now + updateStatus.Message = msg + updateStatus.FilesUpdated = filesUpdated + updateMu.Unlock() + } + + // 1. Build download URL + var downloadURL string + if req.IsTag { + downloadURL = fmt.Sprintf(githubTagZipURLFmt, defaultGitHubRepo, req.Ref) + } else { + downloadURL = fmt.Sprintf(githubZipURLFmt, defaultGitHubRepo, req.Ref) + } + + // 2. Download archive + setStatus(fmt.Sprintf("downloading %s …", downloadURL), 0) + body, err := downloadArchive(downloadURL, req.GithubToken) + if err != nil { + finish(false, fmt.Sprintf("download failed: %v", err), 0) + return + } + + // 3. Extract & overwrite + setStatus("extracting archive …", 0) + dirs := splitDirs(req.Dirs) + n, err := extractDataDirs(body, dirs) + if err != nil { + finish(false, fmt.Sprintf("extraction failed: %v", err), n) + return + } + + finish(true, fmt.Sprintf("sync complete — %d file(s) updated from ref %q", n, req.Ref), n) +} + +// downloadArchive fetches the zip archive and returns its bytes. +func downloadArchive(url, token string) ([]byte, error) { + client := &http.Client{Timeout: 5 * time.Minute} + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return nil, err + } + if token != "" { + req.Header.Set("Authorization", "token "+token) + } + req.Header.Set("User-Agent", "AI-Infra-Guard/data-updater") + + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d from %s", resp.StatusCode, url) + } + + return io.ReadAll(resp.Body) +} + +// extractDataDirs extracts the requested data sub-directories from the zip +// archive and writes them to the local filesystem. +// +// GitHub's archive has a single top-level directory named +// "-/", e.g. "AI-Infra-Guard-main/". +// We strip that prefix and write only the files under data//. +func extractDataDirs(zipBytes []byte, dirs []string) (int, error) { + zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes))) + if err != nil { + return 0, fmt.Errorf("invalid zip: %w", err) + } + + // Find the top-level prefix (first directory entry). + prefix := "" + for _, f := range zr.File { + if f.FileInfo().IsDir() { + parts := strings.SplitN(f.Name, "/", 2) + prefix = parts[0] + "/" + break + } + } + + // Build a quick lookup set for the requested dirs. + wantDir := make(map[string]bool, len(dirs)) + for _, d := range dirs { + wantDir[strings.TrimSpace(d)] = true + } + + filesWritten := 0 + for _, f := range zr.File { + // Strip the top-level prefix. + rel := strings.TrimPrefix(f.Name, prefix) + // We only care about files under data// + if !strings.HasPrefix(rel, "data/") { + continue + } + // rel is now like "data/fingerprints/foo.yaml" + parts := strings.SplitN(rel, "/", 3) // ["data", "subdir", "rest"] + if len(parts) < 3 { + continue // skip "data/" itself or "data/subdir/" directory entries + } + subDir := parts[1] + if !wantDir[subDir] { + continue + } + if f.FileInfo().IsDir() { + if err := os.MkdirAll(rel, 0o755); err != nil { + return filesWritten, fmt.Errorf("mkdir %s: %w", rel, err) + } + continue + } + + // Ensure parent directory exists. + if err := os.MkdirAll(filepath.Dir(rel), 0o755); err != nil { + return filesWritten, fmt.Errorf("mkdir %s: %w", filepath.Dir(rel), err) + } + + // Write file. + rc, err := f.Open() + if err != nil { + return filesWritten, fmt.Errorf("open zip entry %s: %w", f.Name, err) + } + written, writeErr := writeFile(rel, rc) + rc.Close() + if writeErr != nil { + return filesWritten, fmt.Errorf("write %s: %w", rel, writeErr) + } + if written { + filesWritten++ + } + } + + return filesWritten, nil +} + +// writeFile atomically writes the content of rc to path. +// It reports whether the file was actually written (always true on success). +func writeFile(path string, rc io.Reader) (bool, error) { + data, err := io.ReadAll(rc) + if err != nil { + return false, err + } + if err := os.WriteFile(path, data, 0o644); err != nil { + return false, err + } + return true, nil +} + +// splitDirs splits a comma-separated list of directory names. +func splitDirs(s string) []string { + parts := strings.Split(s, ",") + out := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + out = append(out, p) + } + } + return out +} + +// --------------------------------------------------------------------------- +// Swagger model helpers (needed by swaggo for the UpdateStatus pointer fields) +// --------------------------------------------------------------------------- + +// updateStatusJSON is used only for Swagger doc generation. +type updateStatusJSON struct { + Running bool `json:"running"` + Success *bool `json:"success,omitempty"` + StartedAt time.Time `json:"started_at,omitempty"` + FinishedAt *time.Time `json:"finished_at,omitempty"` + Message string `json:"message"` + FilesUpdated int `json:"files_updated"` + Ref string `json:"ref,omitempty"` +} + +// MarshalJSON implements json.Marshaler so UpdateStatus can be serialised +// without exposing internal mutex state. +func (u UpdateStatus) MarshalJSON() ([]byte, error) { + return json.Marshal(updateStatusJSON{ + Running: u.Running, + Success: u.Success, + StartedAt: u.StartedAt, + FinishedAt: u.FinishedAt, + Message: u.Message, + FilesUpdated: u.FilesUpdated, + Ref: u.Ref, + }) +} diff --git a/common/websocket/update_api_test.go b/common/websocket/update_api_test.go new file mode 100644 index 00000000..c6c34cd3 --- /dev/null +++ b/common/websocket/update_api_test.go @@ -0,0 +1,185 @@ +// Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Requirement: Any integration or derivative work must explicitly attribute +// Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its +// documentation or user interface, as detailed in the NOTICE file. + +package websocket + +import ( + "archive/zip" + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +// buildTestZip creates an in-memory zip that mimics the GitHub archive layout: +// +// AI-Infra-Guard-main/ +// AI-Infra-Guard-main/data/fingerprints/foo.yaml +// AI-Infra-Guard-main/data/vuln/bar/CVE-2024-0001.yaml +// AI-Infra-Guard-main/data/mcp/tool.yaml +// AI-Infra-Guard-main/README.md <- should NOT be extracted +func buildTestZip(t *testing.T) []byte { + t.Helper() + buf := new(bytes.Buffer) + w := zip.NewWriter(buf) + + entries := []struct { + name string + content string + isDir bool + }{ + {"AI-Infra-Guard-main/", "", true}, + {"AI-Infra-Guard-main/data/", "", true}, + {"AI-Infra-Guard-main/data/fingerprints/", "", true}, + {"AI-Infra-Guard-main/data/fingerprints/foo.yaml", "name: foo\n", false}, + {"AI-Infra-Guard-main/data/vuln/", "", true}, + {"AI-Infra-Guard-main/data/vuln/bar/", "", true}, + {"AI-Infra-Guard-main/data/vuln/bar/CVE-2024-0001.yaml", "cve: CVE-2024-0001\n", false}, + {"AI-Infra-Guard-main/data/mcp/", "", true}, + {"AI-Infra-Guard-main/data/mcp/tool.yaml", "rule: test\n", false}, + // files that should be ignored + {"AI-Infra-Guard-main/README.md", "# readme\n", false}, + {"AI-Infra-Guard-main/cmd/main.go", "package main\n", false}, + } + + for _, e := range entries { + if e.isDir { + fh := &zip.FileHeader{Name: e.name, Method: zip.Deflate} + fh.SetMode(0o755 | os.ModeDir) + _, err := w.CreateHeader(fh) + if err != nil { + t.Fatalf("zip CreateHeader dir %s: %v", e.name, err) + } + } else { + f, err := w.Create(e.name) + if err != nil { + t.Fatalf("zip Create %s: %v", e.name, err) + } + _, _ = f.Write([]byte(e.content)) + } + } + if err := w.Close(); err != nil { + t.Fatalf("zip Close: %v", err) + } + return buf.Bytes() +} + +func TestExtractDataDirs_selectiveDirs(t *testing.T) { + zipBytes := buildTestZip(t) + tmp := t.TempDir() + + // Change working directory to tmp so relative paths resolve correctly. + orig, _ := os.Getwd() + if err := os.Chdir(tmp); err != nil { + t.Fatalf("Chdir: %v", err) + } + defer os.Chdir(orig) + + dirs := []string{"fingerprints", "vuln"} + n, err := extractDataDirs(zipBytes, dirs) + if err != nil { + t.Fatalf("extractDataDirs: %v", err) + } + + // Expect 2 files: foo.yaml and CVE-2024-0001.yaml + if n != 2 { + t.Errorf("expected 2 files written, got %d", n) + } + + // Verify fingerprints file exists and has correct content. + fpPath := filepath.Join("data", "fingerprints", "foo.yaml") + data, err := os.ReadFile(fpPath) + if err != nil { + t.Fatalf("ReadFile %s: %v", fpPath, err) + } + if strings.TrimSpace(string(data)) != "name: foo" { + t.Errorf("unexpected content in %s: %q", fpPath, string(data)) + } + + // Verify vuln sub-directory file exists. + vulnPath := filepath.Join("data", "vuln", "bar", "CVE-2024-0001.yaml") + if _, err := os.Stat(vulnPath); err != nil { + t.Errorf("expected %s to exist: %v", vulnPath, err) + } + + // Verify mcp was NOT extracted (not in dirs list). + mcpPath := filepath.Join("data", "mcp", "tool.yaml") + if _, err := os.Stat(mcpPath); !os.IsNotExist(err) { + t.Errorf("expected %s to NOT exist", mcpPath) + } + + // Verify README.md was NOT extracted. + readmePath := "README.md" + if _, err := os.Stat(readmePath); !os.IsNotExist(err) { + t.Errorf("expected %s to NOT exist", readmePath) + } +} + +func TestExtractDataDirs_allDirs(t *testing.T) { + zipBytes := buildTestZip(t) + tmp := t.TempDir() + + orig, _ := os.Getwd() + if err := os.Chdir(tmp); err != nil { + t.Fatalf("Chdir: %v", err) + } + defer os.Chdir(orig) + + dirs := splitDirs(dataDirsDefault) + n, err := extractDataDirs(zipBytes, dirs) + if err != nil { + t.Fatalf("extractDataDirs: %v", err) + } + + // Test zip has 3 data files (foo.yaml, CVE-2024-0001.yaml, tool.yaml). + if n != 3 { + t.Errorf("expected 3 files written, got %d", n) + } +} + +func TestExtractDataDirs_invalidZip(t *testing.T) { + _, err := extractDataDirs([]byte("this is not a zip"), []string{"fingerprints"}) + if err == nil { + t.Error("expected error for invalid zip, got nil") + } +} + +func TestSplitDirs(t *testing.T) { + cases := []struct { + input string + want []string + }{ + {"fingerprints,vuln", []string{"fingerprints", "vuln"}}, + {" fingerprints , vuln_en ", []string{"fingerprints", "vuln_en"}}, + {"", []string{}}, + {"mcp", []string{"mcp"}}, + } + for _, tc := range cases { + got := splitDirs(tc.input) + if len(got) != len(tc.want) { + t.Errorf("splitDirs(%q): got %v, want %v", tc.input, got, tc.want) + continue + } + for i := range got { + if got[i] != tc.want[i] { + t.Errorf("splitDirs(%q)[%d]: got %q, want %q", tc.input, i, got[i], tc.want[i]) + } + } + } +} diff --git a/docs/api_data_update.md b/docs/api_data_update.md new file mode 100644 index 00000000..9cf6745e --- /dev/null +++ b/docs/api_data_update.md @@ -0,0 +1,170 @@ +# Data Auto-Sync API + +> **Base URL**: `http://:8088/api/v1` +> +> All endpoints require the same authentication as the rest of the AIG API +> (session cookie / `X-Token` header set during login). + +--- + +## Overview + +AIG's detection rules live in the `data/` directory on disk: + +| Sub-directory | Contents | +|---|---| +| `data/fingerprints/` | YAML fingerprint rules for AI components | +| `data/vuln/` | Chinese CVE/GHSA vulnerability rules | +| `data/vuln_en/` | English CVE/GHSA vulnerability rules | +| `data/mcp/` | MCP security detection rules | +| `data/eval/` | Jailbreak / prompt-security evaluation datasets | +| `data/agents/` | Agent scan configuration | + +The **data auto-sync** feature lets you pull the latest rules from the +official GitHub repository (`Tencent/AI-Infra-Guard`) without restarting +the server or rebuilding the Docker image. + +--- + +## Endpoints + +### POST `/api/v1/system/update-data` + +Trigger an asynchronous sync of the `data/` directory from GitHub. + +Only **one sync** can run at a time. If a sync is already in progress the +endpoint returns `200 OK` with the current status instead of starting a new +one. + +#### Request Body (JSON, optional) + +| Field | Type | Default | Description | +|---|---|---|---| +| `ref` | `string` | `"main"` | Branch name or tag to sync from | +| `is_tag` | `bool` | `false` | Set `true` when `ref` is a Git tag (e.g. `"v4.1.3"`) | +| `github_token` | `string` | `""` | Personal access token — avoids GitHub's anonymous rate limit (60 req/h) | +| `dirs` | `string` | `"fingerprints,vuln,vuln_en,mcp,eval,agents"` | Comma-separated list of `data/` sub-directories to sync | + +#### Response — `202 Accepted` (sync started) or `200 OK` (already running) + +```json +{ + "running": true, + "started_at": "2026-04-10T17:20:00Z", + "finished_at": null, + "message": "downloading archive from GitHub…", + "files_updated": 0, + "ref": "main" +} +``` + +#### Examples + +**Sync latest `main` (anonymous)** +```bash +curl -X POST http://localhost:8088/api/v1/system/update-data \ + -H "Content-Type: application/json" \ + -d '{}' +``` + +**Sync a specific release tag** +```bash +curl -X POST http://localhost:8088/api/v1/system/update-data \ + -H "Content-Type: application/json" \ + -d '{ + "ref": "v4.1.3", + "is_tag": true + }' +``` + +**Sync only vulnerability rules (authenticated)** +```bash +curl -X POST http://localhost:8088/api/v1/system/update-data \ + -H "Content-Type: application/json" \ + -d '{ + "ref": "main", + "github_token": "ghp_xxxxxxxxxxxx", + "dirs": "vuln,vuln_en" + }' +``` + +--- + +### GET `/api/v1/system/update-status` + +Return the status of the current (or most recent) sync operation. + +#### Response — `200 OK` + +```json +{ + "running": false, + "success": true, + "started_at": "2026-04-10T17:20:00Z", + "finished_at": "2026-04-10T17:20:42Z", + "message": "sync complete — 312 file(s) updated from ref \"main\"", + "files_updated": 312, + "ref": "main" +} +``` + +#### Response Fields + +| Field | Type | Description | +|---|---|---| +| `running` | `bool` | `true` while a sync is in progress | +| `success` | `bool \| null` | `true` = completed OK, `false` = error, `null` = never run | +| `started_at` | `string (RFC3339)` | When the current/last sync started | +| `finished_at` | `string (RFC3339) \| null` | When it finished; `null` if still running | +| `message` | `string` | Human-readable status/error description | +| `files_updated` | `int` | Number of files written to disk | +| `ref` | `string` | Branch or tag used | + +#### Example — poll until done +```bash +while true; do + STATUS=$(curl -s http://localhost:8088/api/v1/system/update-status) + echo "$STATUS" + RUNNING=$(echo "$STATUS" | python3 -c "import sys,json; print(json.load(sys.stdin)['running'])") + [ "$RUNNING" = "False" ] && break + sleep 3 +done +``` + +--- + +## Workflow + +``` +Client AIG Server GitHub + | | | + |-- POST /system/update-data ---> | | + |<-- 202 Accepted (running=true) | | + | |-- GET codeload.github.com -->| + | |<-- zip archive --------------| + | | (unzip + overwrite data/) | + | | | + |-- GET /system/update-status --> | | + |<-- 200 OK (running=false, | | + | success=true) | | +``` + +--- + +## Error Cases + +| Scenario | `success` | `message` example | +|---|---|---| +| GitHub unreachable / timeout | `false` | `"download failed: Get … context deadline exceeded"` | +| Invalid ref / 404 | `false` | `"download failed: HTTP 404 from …"` | +| Disk write error | `false` | `"extraction failed: write data/vuln/…: permission denied"` | +| Rate limited (anonymous) | `false` | `"download failed: HTTP 429 from …"` — use `github_token` | + +--- + +## Notes + +- The sync **overwrites** matching files in `data/` but does **not delete** files that no longer exist in the upstream repo. To do a full clean sync, remove the `data/` sub-directories manually before triggering the update. +- The server does **not** need to restart after a sync — rule files are read from disk at scan time. +- In-progress scans are not interrupted; they will use the new rules on the next run. +- The `github_token` field value is **never logged or stored**. From 0a200100e277f5de3427be4f804067dff16d052d Mon Sep 17 00:00:00 2001 From: zhuque Date: Mon, 13 Apr 2026 17:20:16 +0800 Subject: [PATCH 2/2] docs: add SECURITY.md with trust model and vulnerability disclosure policy --- SECURITY.md | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..d907a1cc --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,160 @@ +# Security Policy + +If you believe you've found a security issue in AI-Infra-Guard, please report it responsibly. + +## Reporting + +Report vulnerabilities via GitHub Security Advisories: + +- **Core scanner, agent scan, MCP scan, WebUI** — [Tencent/AI-Infra-Guard](https://github.com/Tencent/AI-Infra-Guard/security/advisories/new) +- **Vulnerability rule database** — [Tencent/AI-Infra-Guard](https://github.com/Tencent/AI-Infra-Guard/security/advisories/new) (data/vuln, data/fingerprints) + +For issues that don't fit a specific category, open a GitHub Security Advisory or contact the maintainers at **[zhuquelab@tencent.com](mailto:zhuquelab@tencent.com)**. + +### Required in Reports + +1. **Title** +2. **Severity Assessment** (Critical / High / Medium / Low) +3. **Impact** — What can an attacker achieve? +4. **Affected Component** — Which module, file, and function? +5. **Technical Reproduction** — Step-by-step PoC +6. **Demonstrated Impact** — Evidence the impact is real +7. **Environment** — AIG version, OS, deployment method (binary/Docker) +8. **Remediation Advice** + +Reports without reproduction steps, demonstrated impact, and remediation advice will be deprioritized. + +### Report Acceptance Gate (Triage Fast Path) + +For fastest triage, include all of the following: + +- Exact vulnerable path (file, function, and line range) on a current revision. +- AIG version (`--version` output) and/or commit SHA. +- Reproducible PoC against the latest `main` or latest released tag. +- Demonstrated impact tied to AIG's documented trust boundaries. +- Explicit statement that the report is **not** covered by the Out of Scope section below. + +### Common False-Positive Patterns + +These are frequently reported but are typically closed with no code change: + +- Reports that assume multi-user isolation exists. **AIG has no multi-user system.** There are no login accounts, sessions, or per-user permission boundaries. The WebUI is a single-operator interface. Reports about "user A accessing user B's data" do not apply — there is only one operator. +- Prompt-injection-only chains (agent scan / MCP scan) without a boundary bypass. Prompt injection is expected behavior in an AI red-teaming tool; it is out of scope unless it crosses an OS/network/filesystem boundary. +- Missing authentication on the WebUI (`:8088`) when deployed per documentation. AIG defaults to `127.0.0.1:8088` (loopback). Exposing it to a non-loopback address is an operator misconfiguration, not an AIG vulnerability. +- Reports that only show AIG scanning itself or the host it runs on, without demonstrating an unauthorized path that triggers such a scan. +- Scanner-only claims against stale or non-existent paths, or claims without a working reproduction. +- Reports about TLS not being enforced on the default local loopback deployment. +- DoS claims that require trusted operator input (e.g., crafted scan targets or rule files already under operator control). +- Reports about the LLM model API key being stored in config when the operator deliberately configured it there. +- Reports that only show AIG executing commands/probes against scan targets that the operator explicitly provided. + +## Trust Model + +AI-Infra-Guard is a **single-operator security tool**, not a multi-tenant platform. + +### Single-Operator Model + +- **There is no multi-user system in AIG.** AIG has no user accounts, no login/authentication for the WebUI, no per-user sessions, and no role-based access control. +- Anyone with network access to the WebUI (`-ws-addr`) is treated as the operator. This is by design for a local security tool. +- Security reports that assume a multi-user authorization boundary (e.g., "user A can view user B's scan results") are **not applicable** — there is only one operator per instance. +- Recommended deployment: run AIG on your local machine or a dedicated scan host, accessible only to the operator. + +### Deployment Trust Boundaries + +- **CLI mode** (`aig -target ...`): No network exposure. Output goes to stdout/file. Full trust to the operator running the process. +- **WebUI mode** (`aig -ws`): Binds to `127.0.0.1:8088` by default. Only the local operator should access it. Do not expose to the network. +- **Docker mode**: The container exposes port `8088`. Use firewall rules, Docker network isolation, or a reverse proxy with authentication to restrict access to trusted operators only. + +### What AIG Trusts + +- The operator who launches AIG is fully trusted. +- Scan targets provided by the operator are treated as external untrusted input. +- LLM API responses (in agent scan / MCP scan) are treated as untrusted content — the scan engine processes them, not the host OS. +- Rule YAML files (`data/fingerprints/`, `data/vuln/`) loaded at startup are treated as trusted operator-supplied data. + +### What AIG Does Not Trust + +- HTTP responses from scan targets: parsed defensively, never executed. +- LLM-generated content during agent/MCP scan: treated as untrusted model output, not host commands. +- User-provided target URLs: validated and sanitized before use. + +## Out of Scope + +- **Multi-user authorization issues** — AIG has no multi-user system. Reports about user isolation, session hijacking between users, or privilege escalation between accounts do not apply. +- **Missing WebUI authentication** when deployed per documentation (loopback-only). If you expose `:8088` publicly and lack auth, that is an operator misconfiguration. +- Prompt-injection-only attacks in agent scan / MCP scan that do not cross a host/network/filesystem boundary. +- Scan results showing vulnerabilities in third-party software that AIG is scanning (those are findings, not AIG vulnerabilities). +- Reports about LLM API keys stored in config files when the operator intentionally placed them there. +- DoS via crafted scan targets that require the operator to deliberately target a malicious host. +- Reports that only demonstrate AIG behaving correctly as a security scanner (e.g., "AIG sends HTTP probes to targets" — that is the intended functionality). +- Reports that require physical or shell access to the machine running AIG (already within the trusted operator boundary). +- Missing HTTPS on the default local loopback deployment. +- Scanner-only claims without a working reproduction or against stale paths. +- Reports that restate an already-fixed issue against later released versions without showing the vulnerable path still exists. + +## Operational Guidance + +### Network Exposure + +AIG WebUI defaults to `127.0.0.1:8088` (loopback only). **Do not expose it to the public internet.** + +If remote access is needed: +- Use an SSH tunnel: `ssh -L 8088:127.0.0.1:8088 user@host` +- Or deploy behind a reverse proxy (nginx/caddy) with authentication, accessible only over a VPN or trusted network. + +Do **not** bind to `0.0.0.0` without additional access controls. + +### Docker Deployment + +When running AIG via Docker: + +```bash +# Restrict to loopback only +docker run -p 127.0.0.1:8088:8088 zhuquelab/aig-server:latest + +# Further restrict with read-only filesystem where possible +docker run -p 127.0.0.1:8088:8088 --read-only \ + -v aig-data:/app/data \ + zhuquelab/aig-server:latest +``` + +Avoid publishing `8088` without `-p 127.0.0.1:8088:8088` binding in production environments. + +### API Key Protection + +AIG uses LLM API keys for agent scan and MCP scan. Protect them: + +- Store keys in environment variables or secure config files, not in version-controlled files. +- Restrict file permissions: `chmod 600 `. +- Rotate keys immediately if accidentally committed to a repository. +- Never log API keys — AIG is designed to mask them in logs, but verify this in your deployment. + +### Rule File Integrity + +AIG loads vulnerability rules from `data/fingerprints/` and `data/vuln/` at startup. + +- Ensure these directories are writable only by the operator. +- When using the auto-update API (`POST /api/v1/system/update-data`), ensure the endpoint is accessible only to trusted operators. +- Validate rule files with `aig -check-vul` after manual changes. + +## Vulnerability Disclosure Process + +1. Reporter submits via GitHub Security Advisories (private). +2. Maintainers acknowledge within **5 business days**. +3. Maintainers assess severity and reproduce the issue. +4. Fix developed and tested, patch release prepared. +5. CVE assigned if applicable. +6. Public disclosure after patch is released (coordinated with reporter). + +We aim to resolve Critical/High severity issues within **14 days** of confirmed reproduction. + +## Bug Bounties + +AI-Infra-Guard is an open-source project. There is no formal bug bounty program. We deeply appreciate responsible disclosure — the best contribution is a clear report and, ideally, a pull request with a fix. + +## Maintainers + +Security reports are handled by the **Tencent Zhuque Lab** team. + +- GitHub: [@Tencent/AI-Infra-Guard](https://github.com/Tencent/AI-Infra-Guard) +- Contact: [zhuquelab@tencent.com](mailto:zhuquelab@tencent.com)