From 30c56cf78e579892485accd093c9435ccf82b78b Mon Sep 17 00:00:00 2001
From: zhuque <zhuque@tencent.com>
Date: Fri, 10 Apr 2026 17:25:19 +0800
Subject: [PATCH 1/2] feat(system): add data auto-sync API for data/ directory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add two new endpoints under /api/v1/system/:

  POST /api/v1/system/update-data
    Downloads the GitHub archive (branch or tag) and overwrites the
    configured data/ sub-directories (fingerprints, vuln, vuln_en,
    mcp, eval, agents). Runs asynchronously; supports optional
    github_token to avoid rate-limiting and per-call dir selection.

  GET /api/v1/system/update-status
    Returns current / last sync status: running flag, success bool,
    started_at / finished_at timestamps, file count, message.

Implementation:
- common/websocket/update_api.go  — handler + sync logic
- common/websocket/update_api_test.go — unit tests (zip extract, dir filter)
- common/websocket/server.go      — route registration under /system group
- docs/api_data_update.md         — full API documentation with examples
---
 common/websocket/server.go          |   8 +
 common/websocket/update_api.go      | 364 ++++++++++++++++++++++++++++
 common/websocket/update_api_test.go | 185 ++++++++++++++
 docs/api_data_update.md             | 170 +++++++++++++
 4 files changed, 727 insertions(+)
 create mode 100644 common/websocket/update_api.go
 create mode 100644 common/websocket/update_api_test.go
 create mode 100644 docs/api_data_update.md

diff --git a/common/websocket/server.go b/common/websocket/server.go
index 9d973dbb..d16217fa 100644
--- a/common/websocket/server.go
+++ b/common/websocket/server.go
@@ -303,6 +303,14 @@ func RunWebServer(options *version.Options) {
 				"changelog": string(data),
 			})
 		})
+
+		// system — data directory auto-sync
+		system := v1.Group("/system")
+		system.Use(setupIdentityMiddleware())
+		{
+			system.POST("/update-data", HandleTriggerDataUpdate)
+			system.GET("/update-status", HandleGetUpdateStatus)
+		}
 	}
 
 	// Swagger UI - 必须在 NoRoute 之前注册
diff --git a/common/websocket/update_api.go b/common/websocket/update_api.go
new file mode 100644
index 00000000..40e598bd
--- /dev/null
+++ b/common/websocket/update_api.go
@@ -0,0 +1,364 @@
+// Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Requirement: Any integration or derivative work must explicitly attribute
+// Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its
+// documentation or user interface, as detailed in the NOTICE file.
+
+// Package websocket provides the HTTP API handlers for the AIG web server.
+package websocket
+
+import (
+	"archive/zip"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/gin-gonic/gin"
+)
+
+// ---------------------------------------------------------------------------
+// Constants & package-level state
+// ---------------------------------------------------------------------------
+
+const (
+	defaultGitHubRepo   = "Tencent/AI-Infra-Guard"
+	defaultGitHubBranch = "main"
+	githubZipURLFmt     = "https://codeload.github.com/%s/zip/refs/heads/%s"
+	githubTagZipURLFmt  = "https://codeload.github.com/%s/zip/refs/tags/%s"
+
+	// dataDirs lists the sub-directories inside data/ that are synced.
+	// Callers may override via UpdateDataRequest.Dirs.
+	dataDirsDefault = "fingerprints,vuln,vuln_en,mcp,eval,agents"
+)
+
+// UpdateStatus holds the current state of a data-sync operation.
+type UpdateStatus struct {
+	Running   bool      `json:"running"`
+	Success   *bool     `json:"success,omitempty"`
+	StartedAt time.Time `json:"started_at,omitempty"`
+	FinishedAt *time.Time `json:"finished_at,omitempty"`
+	Message   string    `json:"message"`
+	// FilesUpdated is the number of files written to disk.
+	FilesUpdated int `json:"files_updated"`
+	// Ref is the branch or tag that was used.
+	Ref string `json:"ref,omitempty"`
+}
+
+var (
+	updateMu     sync.Mutex
+	updateStatus = &UpdateStatus{Message: "idle"}
+)
+
+// ---------------------------------------------------------------------------
+// Request / Response types
+// ---------------------------------------------------------------------------
+
+// UpdateDataRequest is the JSON body for POST /api/v1/system/update-data.
+//
+//	{
+//	  "ref":          "main",          // branch or tag, default: "main"
+//	  "is_tag":       false,           // set true when ref is a tag
+//	  "github_token": "",              // optional, avoids GitHub rate-limit (60 req/h anon)
+//	  "dirs":         "fingerprints,vuln,vuln_en,mcp,eval,agents"  // optional
+//	}
+type UpdateDataRequest struct {
+	Ref         string `json:"ref"`
+	IsTag       bool   `json:"is_tag"`
+	GithubToken string `json:"github_token"`
+	Dirs        string `json:"dirs"`
+}
+
+// ---------------------------------------------------------------------------
+// Handlers
+// ---------------------------------------------------------------------------
+
+// HandleGetUpdateStatus godoc
+//
+//	@Summary		Get data-sync status
+//	@Description	Returns the current (or last) status of the automatic data directory sync.
+//	@Tags			system
+//	@Produce		json
+//	@Success		200	{object}	UpdateStatus
+//	@Router			/api/v1/system/update-status [get]
+func HandleGetUpdateStatus(c *gin.Context) {
+	updateMu.Lock()
+	snap := *updateStatus
+	updateMu.Unlock()
+	c.JSON(http.StatusOK, snap)
+}
+
+// HandleTriggerDataUpdate godoc
+//
+//	@Summary		Trigger data directory sync from GitHub
+//	@Description	Downloads the repository archive from GitHub and overwrites the local
+//	@Description	data/ sub-directories (fingerprints, vuln, vuln_en, mcp, eval, agents).
+//	@Description	The operation runs asynchronously; poll GET /api/v1/system/update-status
+//	@Description	for progress.  Only one sync may run at a time.
+//	@Tags			system
+//	@Accept			json
+//	@Produce		json
+//	@Param			body	body		UpdateDataRequest	false	"Sync options"
+//	@Success		202	{object}	UpdateStatus		"Sync started"
+//	@Success		200	{object}	UpdateStatus		"Already running"
+//	@Failure		500	{object}	map[string]string	"Internal error"
+//	@Router			/api/v1/system/update-data [post]
+func HandleTriggerDataUpdate(c *gin.Context) {
+	var req UpdateDataRequest
+	// allow empty body
+	_ = c.ShouldBindJSON(&req)
+
+	if req.Ref == "" {
+		req.Ref = defaultGitHubBranch
+	}
+	if req.Dirs == "" {
+		req.Dirs = dataDirsDefault
+	}
+
+	updateMu.Lock()
+	if updateStatus.Running {
+		snap := *updateStatus
+		updateMu.Unlock()
+		c.JSON(http.StatusOK, snap)
+		return
+	}
+	updateStatus = &UpdateStatus{
+		Running:   true,
+		StartedAt: time.Now(),
+		Message:   "downloading archive from GitHub…",
+		Ref:       req.Ref,
+	}
+	updateMu.Unlock()
+
+	go runDataUpdate(req)
+
+	updateMu.Lock()
+	snap := *updateStatus
+	updateMu.Unlock()
+	c.JSON(http.StatusAccepted, snap)
+}
+
+// ---------------------------------------------------------------------------
+// Core sync logic
+// ---------------------------------------------------------------------------
+
+func runDataUpdate(req UpdateDataRequest) {
+	setStatus := func(msg string, filesUpdated int) {
+		updateMu.Lock()
+		updateStatus.Message = msg
+		updateStatus.FilesUpdated = filesUpdated
+		updateMu.Unlock()
+	}
+
+	finish := func(success bool, msg string, filesUpdated int) {
+		now := time.Now()
+		updateMu.Lock()
+		b := success
+		updateStatus.Running = false
+		updateStatus.Success = &b
+		updateStatus.FinishedAt = &now
+		updateStatus.Message = msg
+		updateStatus.FilesUpdated = filesUpdated
+		updateMu.Unlock()
+	}
+
+	// 1. Build download URL
+	var downloadURL string
+	if req.IsTag {
+		downloadURL = fmt.Sprintf(githubTagZipURLFmt, defaultGitHubRepo, req.Ref)
+	} else {
+		downloadURL = fmt.Sprintf(githubZipURLFmt, defaultGitHubRepo, req.Ref)
+	}
+
+	// 2. Download archive
+	setStatus(fmt.Sprintf("downloading %s …", downloadURL), 0)
+	body, err := downloadArchive(downloadURL, req.GithubToken)
+	if err != nil {
+		finish(false, fmt.Sprintf("download failed: %v", err), 0)
+		return
+	}
+
+	// 3. Extract & overwrite
+	setStatus("extracting archive …", 0)
+	dirs := splitDirs(req.Dirs)
+	n, err := extractDataDirs(body, dirs)
+	if err != nil {
+		finish(false, fmt.Sprintf("extraction failed: %v", err), n)
+		return
+	}
+
+	finish(true, fmt.Sprintf("sync complete — %d file(s) updated from ref %q", n, req.Ref), n)
+}
+
+// downloadArchive fetches the zip archive and returns its bytes.
+func downloadArchive(url, token string) ([]byte, error) {
+	client := &http.Client{Timeout: 5 * time.Minute}
+	req, err := http.NewRequest(http.MethodGet, url, nil)
+	if err != nil {
+		return nil, err
+	}
+	if token != "" {
+		req.Header.Set("Authorization", "token "+token)
+	}
+	req.Header.Set("User-Agent", "AI-Infra-Guard/data-updater")
+
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("HTTP %d from %s", resp.StatusCode, url)
+	}
+
+	return io.ReadAll(resp.Body)
+}
+
+// extractDataDirs extracts the requested data sub-directories from the zip
+// archive and writes them to the local filesystem.
+//
+// GitHub's archive has a single top-level directory named
+// "<repo>-<ref>/", e.g. "AI-Infra-Guard-main/".
+// We strip that prefix and write only the files under data/<dir>/.
+func extractDataDirs(zipBytes []byte, dirs []string) (int, error) {
+	zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes)))
+	if err != nil {
+		return 0, fmt.Errorf("invalid zip: %w", err)
+	}
+
+	// Find the top-level prefix (first directory entry).
+	prefix := ""
+	for _, f := range zr.File {
+		if f.FileInfo().IsDir() {
+			parts := strings.SplitN(f.Name, "/", 2)
+			prefix = parts[0] + "/"
+			break
+		}
+	}
+
+	// Build a quick lookup set for the requested dirs.
+	wantDir := make(map[string]bool, len(dirs))
+	for _, d := range dirs {
+		wantDir[strings.TrimSpace(d)] = true
+	}
+
+	filesWritten := 0
+	for _, f := range zr.File {
+		// Strip the top-level prefix.
+		rel := strings.TrimPrefix(f.Name, prefix)
+		// We only care about files under data/<wantDir>/
+		if !strings.HasPrefix(rel, "data/") {
+			continue
+		}
+		// rel is now like "data/fingerprints/foo.yaml"
+		parts := strings.SplitN(rel, "/", 3) // ["data", "subdir", "rest"]
+		if len(parts) < 3 {
+			continue // skip "data/" itself or "data/subdir/" directory entries
+		}
+		subDir := parts[1]
+		if !wantDir[subDir] {
+			continue
+		}
+		if f.FileInfo().IsDir() {
+			if err := os.MkdirAll(rel, 0o755); err != nil {
+				return filesWritten, fmt.Errorf("mkdir %s: %w", rel, err)
+			}
+			continue
+		}
+
+		// Ensure parent directory exists.
+		if err := os.MkdirAll(filepath.Dir(rel), 0o755); err != nil {
+			return filesWritten, fmt.Errorf("mkdir %s: %w", filepath.Dir(rel), err)
+		}
+
+		// Write file.
+		rc, err := f.Open()
+		if err != nil {
+			return filesWritten, fmt.Errorf("open zip entry %s: %w", f.Name, err)
+		}
+		written, writeErr := writeFile(rel, rc)
+		rc.Close()
+		if writeErr != nil {
+			return filesWritten, fmt.Errorf("write %s: %w", rel, writeErr)
+		}
+		if written {
+			filesWritten++
+		}
+	}
+
+	return filesWritten, nil
+}
+
+// writeFile atomically writes the content of rc to path.
+// It reports whether the file was actually written (always true on success).
+func writeFile(path string, rc io.Reader) (bool, error) {
+	data, err := io.ReadAll(rc)
+	if err != nil {
+		return false, err
+	}
+	if err := os.WriteFile(path, data, 0o644); err != nil {
+		return false, err
+	}
+	return true, nil
+}
+
+// splitDirs splits a comma-separated list of directory names.
+func splitDirs(s string) []string {
+	parts := strings.Split(s, ",")
+	out := make([]string, 0, len(parts))
+	for _, p := range parts {
+		p = strings.TrimSpace(p)
+		if p != "" {
+			out = append(out, p)
+		}
+	}
+	return out
+}
+
+// ---------------------------------------------------------------------------
+// Swagger model helpers (needed by swaggo for the UpdateStatus pointer fields)
+// ---------------------------------------------------------------------------
+
+// updateStatusJSON is used only for Swagger doc generation.
+type updateStatusJSON struct {
+	Running    bool       `json:"running"`
+	Success    *bool      `json:"success,omitempty"`
+	StartedAt  time.Time  `json:"started_at,omitempty"`
+	FinishedAt *time.Time `json:"finished_at,omitempty"`
+	Message    string     `json:"message"`
+	FilesUpdated int      `json:"files_updated"`
+	Ref        string     `json:"ref,omitempty"`
+}
+
+// MarshalJSON implements json.Marshaler so UpdateStatus can be serialised
+// without exposing internal mutex state.
+func (u UpdateStatus) MarshalJSON() ([]byte, error) {
+	return json.Marshal(updateStatusJSON{
+		Running:      u.Running,
+		Success:      u.Success,
+		StartedAt:    u.StartedAt,
+		FinishedAt:   u.FinishedAt,
+		Message:      u.Message,
+		FilesUpdated: u.FilesUpdated,
+		Ref:          u.Ref,
+	})
+}
diff --git a/common/websocket/update_api_test.go b/common/websocket/update_api_test.go
new file mode 100644
index 00000000..c6c34cd3
--- /dev/null
+++ b/common/websocket/update_api_test.go
@@ -0,0 +1,185 @@
+// Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Requirement: Any integration or derivative work must explicitly attribute
+// Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its
+// documentation or user interface, as detailed in the NOTICE file.
+
+package websocket
+
+import (
+	"archive/zip"
+	"bytes"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+// buildTestZip creates an in-memory zip that mimics the GitHub archive layout:
+//
+//	AI-Infra-Guard-main/
+//	AI-Infra-Guard-main/data/fingerprints/foo.yaml
+//	AI-Infra-Guard-main/data/vuln/bar/CVE-2024-0001.yaml
+//	AI-Infra-Guard-main/data/mcp/tool.yaml
+//	AI-Infra-Guard-main/README.md   <- should NOT be extracted
+func buildTestZip(t *testing.T) []byte {
+	t.Helper()
+	buf := new(bytes.Buffer)
+	w := zip.NewWriter(buf)
+
+	entries := []struct {
+		name    string
+		content string
+		isDir   bool
+	}{
+		{"AI-Infra-Guard-main/", "", true},
+		{"AI-Infra-Guard-main/data/", "", true},
+		{"AI-Infra-Guard-main/data/fingerprints/", "", true},
+		{"AI-Infra-Guard-main/data/fingerprints/foo.yaml", "name: foo\n", false},
+		{"AI-Infra-Guard-main/data/vuln/", "", true},
+		{"AI-Infra-Guard-main/data/vuln/bar/", "", true},
+		{"AI-Infra-Guard-main/data/vuln/bar/CVE-2024-0001.yaml", "cve: CVE-2024-0001\n", false},
+		{"AI-Infra-Guard-main/data/mcp/", "", true},
+		{"AI-Infra-Guard-main/data/mcp/tool.yaml", "rule: test\n", false},
+		// files that should be ignored
+		{"AI-Infra-Guard-main/README.md", "# readme\n", false},
+		{"AI-Infra-Guard-main/cmd/main.go", "package main\n", false},
+	}
+
+	for _, e := range entries {
+		if e.isDir {
+			fh := &zip.FileHeader{Name: e.name, Method: zip.Deflate}
+			fh.SetMode(0o755 | os.ModeDir)
+			_, err := w.CreateHeader(fh)
+			if err != nil {
+				t.Fatalf("zip CreateHeader dir %s: %v", e.name, err)
+			}
+		} else {
+			f, err := w.Create(e.name)
+			if err != nil {
+				t.Fatalf("zip Create %s: %v", e.name, err)
+			}
+			_, _ = f.Write([]byte(e.content))
+		}
+	}
+	if err := w.Close(); err != nil {
+		t.Fatalf("zip Close: %v", err)
+	}
+	return buf.Bytes()
+}
+
+func TestExtractDataDirs_selectiveDirs(t *testing.T) {
+	zipBytes := buildTestZip(t)
+	tmp := t.TempDir()
+
+	// Change working directory to tmp so relative paths resolve correctly.
+	orig, _ := os.Getwd()
+	if err := os.Chdir(tmp); err != nil {
+		t.Fatalf("Chdir: %v", err)
+	}
+	defer os.Chdir(orig)
+
+	dirs := []string{"fingerprints", "vuln"}
+	n, err := extractDataDirs(zipBytes, dirs)
+	if err != nil {
+		t.Fatalf("extractDataDirs: %v", err)
+	}
+
+	// Expect 2 files: foo.yaml and CVE-2024-0001.yaml
+	if n != 2 {
+		t.Errorf("expected 2 files written, got %d", n)
+	}
+
+	// Verify fingerprints file exists and has correct content.
+	fpPath := filepath.Join("data", "fingerprints", "foo.yaml")
+	data, err := os.ReadFile(fpPath)
+	if err != nil {
+		t.Fatalf("ReadFile %s: %v", fpPath, err)
+	}
+	if strings.TrimSpace(string(data)) != "name: foo" {
+		t.Errorf("unexpected content in %s: %q", fpPath, string(data))
+	}
+
+	// Verify vuln sub-directory file exists.
+	vulnPath := filepath.Join("data", "vuln", "bar", "CVE-2024-0001.yaml")
+	if _, err := os.Stat(vulnPath); err != nil {
+		t.Errorf("expected %s to exist: %v", vulnPath, err)
+	}
+
+	// Verify mcp was NOT extracted (not in dirs list).
+	mcpPath := filepath.Join("data", "mcp", "tool.yaml")
+	if _, err := os.Stat(mcpPath); !os.IsNotExist(err) {
+		t.Errorf("expected %s to NOT exist", mcpPath)
+	}
+
+	// Verify README.md was NOT extracted.
+	readmePath := "README.md"
+	if _, err := os.Stat(readmePath); !os.IsNotExist(err) {
+		t.Errorf("expected %s to NOT exist", readmePath)
+	}
+}
+
+func TestExtractDataDirs_allDirs(t *testing.T) {
+	zipBytes := buildTestZip(t)
+	tmp := t.TempDir()
+
+	orig, _ := os.Getwd()
+	if err := os.Chdir(tmp); err != nil {
+		t.Fatalf("Chdir: %v", err)
+	}
+	defer os.Chdir(orig)
+
+	dirs := splitDirs(dataDirsDefault)
+	n, err := extractDataDirs(zipBytes, dirs)
+	if err != nil {
+		t.Fatalf("extractDataDirs: %v", err)
+	}
+
+	// Test zip has 3 data files (foo.yaml, CVE-2024-0001.yaml, tool.yaml).
+	if n != 3 {
+		t.Errorf("expected 3 files written, got %d", n)
+	}
+}
+
+func TestExtractDataDirs_invalidZip(t *testing.T) {
+	_, err := extractDataDirs([]byte("this is not a zip"), []string{"fingerprints"})
+	if err == nil {
+		t.Error("expected error for invalid zip, got nil")
+	}
+}
+
+func TestSplitDirs(t *testing.T) {
+	cases := []struct {
+		input string
+		want  []string
+	}{
+		{"fingerprints,vuln", []string{"fingerprints", "vuln"}},
+		{" fingerprints , vuln_en ", []string{"fingerprints", "vuln_en"}},
+		{"", []string{}},
+		{"mcp", []string{"mcp"}},
+	}
+	for _, tc := range cases {
+		got := splitDirs(tc.input)
+		if len(got) != len(tc.want) {
+			t.Errorf("splitDirs(%q): got %v, want %v", tc.input, got, tc.want)
+			continue
+		}
+		for i := range got {
+			if got[i] != tc.want[i] {
+				t.Errorf("splitDirs(%q)[%d]: got %q, want %q", tc.input, i, got[i], tc.want[i])
+			}
+		}
+	}
+}
diff --git a/docs/api_data_update.md b/docs/api_data_update.md
new file mode 100644
index 00000000..9cf6745e
--- /dev/null
+++ b/docs/api_data_update.md
@@ -0,0 +1,170 @@
+# Data Auto-Sync API
+
+> **Base URL**: `http://<host>:8088/api/v1`
+>
+> All endpoints require the same authentication as the rest of the AIG API
+> (session cookie / `X-Token` header set during login).
+
+---
+
+## Overview
+
+AIG's detection rules live in the `data/` directory on disk:
+
+| Sub-directory | Contents |
+|---|---|
+| `data/fingerprints/` | YAML fingerprint rules for AI components |
+| `data/vuln/` | Chinese CVE/GHSA vulnerability rules |
+| `data/vuln_en/` | English CVE/GHSA vulnerability rules |
+| `data/mcp/` | MCP security detection rules |
+| `data/eval/` | Jailbreak / prompt-security evaluation datasets |
+| `data/agents/` | Agent scan configuration |
+
+The **data auto-sync** feature lets you pull the latest rules from the
+official GitHub repository (`Tencent/AI-Infra-Guard`) without restarting
+the server or rebuilding the Docker image.
+
+---
+
+## Endpoints
+
+### POST `/api/v1/system/update-data`
+
+Trigger an asynchronous sync of the `data/` directory from GitHub.
+
+Only **one sync** can run at a time. If a sync is already in progress the
+endpoint returns `200 OK` with the current status instead of starting a new
+one.
+
+#### Request Body (JSON, optional)
+
+| Field | Type | Default | Description |
+|---|---|---|---|
+| `ref` | `string` | `"main"` | Branch name or tag to sync from |
+| `is_tag` | `bool` | `false` | Set `true` when `ref` is a Git tag (e.g. `"v4.1.3"`) |
+| `github_token` | `string` | `""` | Personal access token — avoids GitHub's anonymous rate limit (60 req/h) |
+| `dirs` | `string` | `"fingerprints,vuln,vuln_en,mcp,eval,agents"` | Comma-separated list of `data/` sub-directories to sync |
+
+#### Response — `202 Accepted` (sync started) or `200 OK` (already running)
+
+```json
+{
+  "running": true,
+  "started_at": "2026-04-10T17:20:00Z",
+  "finished_at": null,
+  "message": "downloading archive from GitHub…",
+  "files_updated": 0,
+  "ref": "main"
+}
+```
+
+#### Examples
+
+**Sync latest `main` (anonymous)**
+```bash
+curl -X POST http://localhost:8088/api/v1/system/update-data \
+  -H "Content-Type: application/json" \
+  -d '{}'
+```
+
+**Sync a specific release tag**
+```bash
+curl -X POST http://localhost:8088/api/v1/system/update-data \
+  -H "Content-Type: application/json" \
+  -d '{
+    "ref": "v4.1.3",
+    "is_tag": true
+  }'
+```
+
+**Sync only vulnerability rules (authenticated)**
+```bash
+curl -X POST http://localhost:8088/api/v1/system/update-data \
+  -H "Content-Type: application/json" \
+  -d '{
+    "ref": "main",
+    "github_token": "ghp_xxxxxxxxxxxx",
+    "dirs": "vuln,vuln_en"
+  }'
+```
+
+---
+
+### GET `/api/v1/system/update-status`
+
+Return the status of the current (or most recent) sync operation.
+
+#### Response — `200 OK`
+
+```json
+{
+  "running": false,
+  "success": true,
+  "started_at": "2026-04-10T17:20:00Z",
+  "finished_at": "2026-04-10T17:20:42Z",
+  "message": "sync complete — 312 file(s) updated from ref \"main\"",
+  "files_updated": 312,
+  "ref": "main"
+}
+```
+
+#### Response Fields
+
+| Field | Type | Description |
+|---|---|---|
+| `running` | `bool` | `true` while a sync is in progress |
+| `success` | `bool \| null` | `true` = completed OK, `false` = error, `null` = never run |
+| `started_at` | `string (RFC3339)` | When the current/last sync started |
+| `finished_at` | `string (RFC3339) \| null` | When it finished; `null` if still running |
+| `message` | `string` | Human-readable status/error description |
+| `files_updated` | `int` | Number of files written to disk |
+| `ref` | `string` | Branch or tag used |
+
+#### Example — poll until done
+```bash
+while true; do
+  STATUS=$(curl -s http://localhost:8088/api/v1/system/update-status)
+  echo "$STATUS"
+  RUNNING=$(echo "$STATUS" | python3 -c "import sys,json; print(json.load(sys.stdin)['running'])")
+  [ "$RUNNING" = "False" ] && break
+  sleep 3
+done
+```
+
+---
+
+## Workflow
+
+```
+Client                          AIG Server                    GitHub
+  |                                 |                            |
+  |-- POST /system/update-data ---> |                            |
+  |<-- 202 Accepted (running=true)  |                            |
+  |                                 |-- GET codeload.github.com -->|
+  |                                 |<-- zip archive --------------|
+  |                                 | (unzip + overwrite data/)   |
+  |                                 |                            |
+  |-- GET /system/update-status --> |                            |
+  |<-- 200 OK (running=false,       |                            |
+  |            success=true)        |                            |
+```
+
+---
+
+## Error Cases
+
+| Scenario | `success` | `message` example |
+|---|---|---|
+| GitHub unreachable / timeout | `false` | `"download failed: Get … context deadline exceeded"` |
+| Invalid ref / 404 | `false` | `"download failed: HTTP 404 from …"` |
+| Disk write error | `false` | `"extraction failed: write data/vuln/…: permission denied"` |
+| Rate limited (anonymous) | `false` | `"download failed: HTTP 429 from …"` — use `github_token` |
+
+---
+
+## Notes
+
+- The sync **overwrites** matching files in `data/` but does **not delete** files that no longer exist in the upstream repo. To do a full clean sync, remove the `data/` sub-directories manually before triggering the update.
+- The server does **not** need to restart after a sync — rule files are read from disk at scan time.
+- In-progress scans are not interrupted; they will use the new rules on the next run.
+- The `github_token` field value is **never logged or stored**.

From 0a200100e277f5de3427be4f804067dff16d052d Mon Sep 17 00:00:00 2001
From: zhuque <zhuque@tencent.com>
Date: Mon, 13 Apr 2026 17:20:16 +0800
Subject: [PATCH 2/2] docs: add SECURITY.md with trust model and vulnerability
 disclosure policy

---
 SECURITY.md | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100644 SECURITY.md

diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 00000000..d907a1cc
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,160 @@
+# Security Policy
+
+If you believe you've found a security issue in AI-Infra-Guard, please report it responsibly.
+
+## Reporting
+
+Report vulnerabilities via GitHub Security Advisories:
+
+- **Core scanner, agent scan, MCP scan, WebUI** — [Tencent/AI-Infra-Guard](https://github.com/Tencent/AI-Infra-Guard/security/advisories/new)
+- **Vulnerability rule database** — [Tencent/AI-Infra-Guard](https://github.com/Tencent/AI-Infra-Guard/security/advisories/new) (data/vuln, data/fingerprints)
+
+For issues that don't fit a specific category, open a GitHub Security Advisory or contact the maintainers at **[zhuquelab@tencent.com](mailto:zhuquelab@tencent.com)**.
+
+### Required in Reports
+
+1. **Title**
+2. **Severity Assessment** (Critical / High / Medium / Low)
+3. **Impact** — What can an attacker achieve?
+4. **Affected Component** — Which module, file, and function?
+5. **Technical Reproduction** — Step-by-step PoC
+6. **Demonstrated Impact** — Evidence the impact is real
+7. **Environment** — AIG version, OS, deployment method (binary/Docker)
+8. **Remediation Advice**
+
+Reports without reproduction steps, demonstrated impact, and remediation advice will be deprioritized.
+
+### Report Acceptance Gate (Triage Fast Path)
+
+For fastest triage, include all of the following:
+
+- Exact vulnerable path (file, function, and line range) on a current revision.
+- AIG version (`--version` output) and/or commit SHA.
+- Reproducible PoC against the latest `main` or latest released tag.
+- Demonstrated impact tied to AIG's documented trust boundaries.
+- Explicit statement that the report is **not** covered by the Out of Scope section below.
+
+### Common False-Positive Patterns
+
+These are frequently reported but are typically closed with no code change:
+
+- Reports that assume multi-user isolation exists. **AIG has no multi-user system.** There are no login accounts, sessions, or per-user permission boundaries. The WebUI is a single-operator interface. Reports about "user A accessing user B's data" do not apply — there is only one operator.
+- Prompt-injection-only chains (agent scan / MCP scan) without a boundary bypass. Prompt injection is expected behavior in an AI red-teaming tool; it is out of scope unless it crosses an OS/network/filesystem boundary.
+- Missing authentication on the WebUI (`:8088`) when deployed per documentation. AIG defaults to `127.0.0.1:8088` (loopback). Exposing it to a non-loopback address is an operator misconfiguration, not an AIG vulnerability.
+- Reports that only show AIG scanning itself or the host it runs on, without demonstrating an unauthorized path that triggers such a scan.
+- Scanner-only claims against stale or non-existent paths, or claims without a working reproduction.
+- Reports about TLS not being enforced on the default local loopback deployment.
+- DoS claims that require trusted operator input (e.g., crafted scan targets or rule files already under operator control).
+- Reports about the LLM model API key being stored in config when the operator deliberately configured it there.
+- Reports that only show AIG executing commands/probes against scan targets that the operator explicitly provided.
+
+## Trust Model
+
+AI-Infra-Guard is a **single-operator security tool**, not a multi-tenant platform.
+
+### Single-Operator Model
+
+- **There is no multi-user system in AIG.** AIG has no user accounts, no login/authentication for the WebUI, no per-user sessions, and no role-based access control.
+- Anyone with network access to the WebUI (`-ws-addr`) is treated as the operator. This is by design for a local security tool.
+- Security reports that assume a multi-user authorization boundary (e.g., "user A can view user B's scan results") are **not applicable** — there is only one operator per instance.
+- Recommended deployment: run AIG on your local machine or a dedicated scan host, accessible only to the operator.
+
+### Deployment Trust Boundaries
+
+- **CLI mode** (`aig -target ...`): No network exposure. Output goes to stdout/file. Full trust to the operator running the process.
+- **WebUI mode** (`aig -ws`): Binds to `127.0.0.1:8088` by default. Only the local operator should access it. Do not expose to the network.
+- **Docker mode**: The container exposes port `8088`. Use firewall rules, Docker network isolation, or a reverse proxy with authentication to restrict access to trusted operators only.
+
+### What AIG Trusts
+
+- The operator who launches AIG is fully trusted.
+- Scan targets provided by the operator are treated as external untrusted input.
+- LLM API responses (in agent scan / MCP scan) are treated as untrusted content — the scan engine processes them, not the host OS.
+- Rule YAML files (`data/fingerprints/`, `data/vuln/`) loaded at startup are treated as trusted operator-supplied data.
+
+### What AIG Does Not Trust
+
+- HTTP responses from scan targets: parsed defensively, never executed.
+- LLM-generated content during agent/MCP scan: treated as untrusted model output, not host commands.
+- User-provided target URLs: validated and sanitized before use.
+
+## Out of Scope
+
+- **Multi-user authorization issues** — AIG has no multi-user system. Reports about user isolation, session hijacking between users, or privilege escalation between accounts do not apply.
+- **Missing WebUI authentication** when deployed per documentation (loopback-only). If you expose `:8088` publicly and lack auth, that is an operator misconfiguration.
+- Prompt-injection-only attacks in agent scan / MCP scan that do not cross a host/network/filesystem boundary.
+- Scan results showing vulnerabilities in third-party software that AIG is scanning (those are findings, not AIG vulnerabilities).
+- Reports about LLM API keys stored in config files when the operator intentionally placed them there.
+- DoS via crafted scan targets that require the operator to deliberately target a malicious host.
+- Reports that only demonstrate AIG behaving correctly as a security scanner (e.g., "AIG sends HTTP probes to targets" — that is the intended functionality).
+- Reports that require physical or shell access to the machine running AIG (already within the trusted operator boundary).
+- Missing HTTPS on the default local loopback deployment.
+- Scanner-only claims without a working reproduction or against stale paths.
+- Reports that restate an already-fixed issue against later released versions without showing the vulnerable path still exists.
+
+## Operational Guidance
+
+### Network Exposure
+
+AIG WebUI defaults to `127.0.0.1:8088` (loopback only). **Do not expose it to the public internet.**
+
+If remote access is needed:
+- Use an SSH tunnel: `ssh -L 8088:127.0.0.1:8088 user@host`
+- Or deploy behind a reverse proxy (nginx/caddy) with authentication, accessible only over a VPN or trusted network.
+
+Do **not** bind to `0.0.0.0` without additional access controls.
+
+### Docker Deployment
+
+When running AIG via Docker:
+
+```bash
+# Restrict to loopback only
+docker run -p 127.0.0.1:8088:8088 zhuquelab/aig-server:latest
+
+# Further restrict with read-only filesystem where possible
+docker run -p 127.0.0.1:8088:8088 --read-only \
+  -v aig-data:/app/data \
+  zhuquelab/aig-server:latest
+```
+
+Avoid publishing `8088` without `-p 127.0.0.1:8088:8088` binding in production environments.
+
+### API Key Protection
+
+AIG uses LLM API keys for agent scan and MCP scan. Protect them:
+
+- Store keys in environment variables or secure config files, not in version-controlled files.
+- Restrict file permissions: `chmod 600 <config-file>`.
+- Rotate keys immediately if accidentally committed to a repository.
+- Never log API keys — AIG is designed to mask them in logs, but verify this in your deployment.
+
+### Rule File Integrity
+
+AIG loads vulnerability rules from `data/fingerprints/` and `data/vuln/` at startup.
+
+- Ensure these directories are writable only by the operator.
+- When using the auto-update API (`POST /api/v1/system/update-data`), ensure the endpoint is accessible only to trusted operators.
+- Validate rule files with `aig -check-vul` after manual changes.
+
+## Vulnerability Disclosure Process
+
+1. Reporter submits via GitHub Security Advisories (private).
+2. Maintainers acknowledge within **5 business days**.
+3. Maintainers assess severity and reproduce the issue.
+4. Fix developed and tested, patch release prepared.
+5. CVE assigned if applicable.
+6. Public disclosure after patch is released (coordinated with reporter).
+
+We aim to resolve Critical/High severity issues within **14 days** of confirmed reproduction.
+
+## Bug Bounties
+
+AI-Infra-Guard is an open-source project. There is no formal bug bounty program. We deeply appreciate responsible disclosure — the best contribution is a clear report and, ideally, a pull request with a fix.
+
+## Maintainers
+
+Security reports are handled by the **Tencent Zhuque Lab** team.
+
+- GitHub: [@Tencent/AI-Infra-Guard](https://github.com/Tencent/AI-Infra-Guard)
+- Contact: [zhuquelab@tencent.com](mailto:zhuquelab@tencent.com)