diff --git a/sast-engine/tools/generate_go_thirdparty_registry.go b/sast-engine/tools/generate_go_thirdparty_registry.go new file mode 100644 index 00000000..1a2d4562 --- /dev/null +++ b/sast-engine/tools/generate_go_thirdparty_registry.go @@ -0,0 +1,204 @@ +//go:build cpf_generate_thirdparty_registry + +// generate_go_thirdparty_registry is a standalone tool that downloads Go third-party +// modules and extracts their exported API surface into versioned JSON registry files +// compatible with the GoThirdPartyRegistryRemote CDN loader. +// +// Usage: +// +// go run -tags cpf_generate_thirdparty_registry tools/generate_go_thirdparty_registry.go \ +// --packages-file tools/top1000.txt \ +// --output-dir ./out/go-thirdparty/v1/ +// +// Flags: +// +// --packages-file File with "module@version" lines (default: tools/top1000.txt). +// --output-dir Directory to write registry JSON files (default: ./out/go-thirdparty/v1/). +// +// Output layout: +// +// {output-dir}/manifest.json — registry index with checksums +// {output-dir}/{encoded-path}.json — per-package type metadata +// +// Module path encoding: slashes are replaced with underscores. +// +// "gorm.io/gorm" → "gorm.io_gorm.json" +// "github.com/gin-gonic/gin" → "github.com_gin-gonic_gin.json" +package main + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "flag" + "fmt" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" + "github.com/shivasurya/code-pathfinder/sast-engine/tools/internal/goextract" +) + +// moduleSpec holds a parsed "module@version" entry from the packages file. +type moduleSpec struct { + Path string + Version string +} + +// cmdRunner executes an external command and returns its combined output. +// It is a package-level variable so it can be replaced in tests. +var cmdRunner = func(name string, arg ...string) ([]byte, error) { + return exec.Command(name, arg...).Output() //nolint:wrapcheck +} + +func main() { + packagesFile := flag.String("packages-file", "tools/top1000.txt", "File with module@version lines") + outputDir := flag.String("output-dir", "./out/go-thirdparty/v1/", "Output directory for JSON files") + flag.Parse() + + modules, err := readPackageList(*packagesFile) + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading packages file: %v\n", err) + os.Exit(1) + } + + if err := os.MkdirAll(*outputDir, 0o755); err != nil { + fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err) + os.Exit(1) + } + + extractor := goextract.NewExtractor(goextract.Config{}) + manifest := core.NewGoManifest() + manifest.SchemaVersion = "1.0.0" + manifest.RegistryVersion = "v1" + manifest.GeneratedAt = time.Now().UTC().Format(time.RFC3339) + + var successCount int + for _, mod := range modules { + modDir, downloadErr := downloadModule(mod.Path, mod.Version) + if downloadErr != nil { + fmt.Fprintf(os.Stderr, "SKIP %s@%s: download failed: %v\n", mod.Path, mod.Version, downloadErr) + continue + } + + pkg, extractErr := extractor.ExtractSinglePackage(modDir, mod.Path) + if extractErr != nil { + fmt.Fprintf(os.Stderr, "SKIP %s@%s: extraction failed: %v\n", mod.Path, mod.Version, extractErr) + continue + } + + jsonBytes, marshalErr := json.MarshalIndent(pkg, "", " ") + if marshalErr != nil { + fmt.Fprintf(os.Stderr, "SKIP %s: marshal failed: %v\n", mod.Path, marshalErr) + continue + } + + encoded := encodeModulePath(mod.Path) + outputFile := filepath.Join(*outputDir, encoded+".json") + if writeErr := os.WriteFile(outputFile, jsonBytes, 0o644); writeErr != nil { + fmt.Fprintf(os.Stderr, "SKIP %s: write failed: %v\n", mod.Path, writeErr) + continue + } + + hash := sha256.Sum256(jsonBytes) + checksum := "sha256:" + hex.EncodeToString(hash[:]) + + manifest.Packages = append(manifest.Packages, &core.GoPackageEntry{ + ImportPath: mod.Path, + Checksum: checksum, + FileSize: int64(len(jsonBytes)), + FunctionCount: len(pkg.Functions), + TypeCount: len(pkg.Types), + ConstantCount: len(pkg.Constants), + }) + + successCount++ + fmt.Printf("OK %s@%s: %d types, %d functions -> %s\n", + mod.Path, mod.Version, len(pkg.Types), len(pkg.Functions), outputFile) + } + + // Sort manifest packages alphabetically for deterministic output. + sort.Slice(manifest.Packages, func(i, j int) bool { + return manifest.Packages[i].ImportPath < manifest.Packages[j].ImportPath + }) + + manifestBytes, err := json.MarshalIndent(manifest, "", " ") + if err != nil { + fmt.Fprintf(os.Stderr, "Error marshaling manifest: %v\n", err) + os.Exit(1) + } + manifestFile := filepath.Join(*outputDir, "manifest.json") + if err := os.WriteFile(manifestFile, manifestBytes, 0o644); err != nil { + fmt.Fprintf(os.Stderr, "Error writing manifest: %v\n", err) + os.Exit(1) + } + + fmt.Printf("\nGenerated manifest: %d/%d packages succeeded -> %s\n", + successCount, len(modules), manifestFile) +} + +// downloadModule runs "go mod download -json module@version" and returns the +// local directory path where the module source is cached by the Go toolchain. +func downloadModule(modulePath, version string) (string, error) { + output, err := cmdRunner("go", "mod", "download", "-json", modulePath+"@"+version) + if err != nil { + return "", fmt.Errorf("go mod download %s@%s: %w", modulePath, version, err) + } + + //nolint:tagliatelle // "Dir" is the literal field name in `go mod download -json` output. + var result struct { + Dir string `json:"Dir"` + } + if err := json.Unmarshal(output, &result); err != nil { + return "", fmt.Errorf("parsing go mod download output for %s@%s: %w", modulePath, version, err) + } + + if result.Dir == "" { + return "", fmt.Errorf("no Dir in go mod download output for %s@%s", modulePath, version) + } + + return result.Dir, nil +} + +// readPackageList reads a file with "module@version" lines. +// Lines starting with "#" are treated as comments and skipped. Empty lines are skipped. +// Returns an error if any non-empty, non-comment line does not have the "module@version" format. +func readPackageList(filename string) ([]moduleSpec, error) { + data, err := os.ReadFile(filename) + if err != nil { + return nil, fmt.Errorf("reading packages file %s: %w", filename, err) + } + + var modules []moduleSpec + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + parts := strings.SplitN(line, "@", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("invalid line (expected module@version): %q", line) + } + modules = append(modules, moduleSpec{ + Path: parts[0], + Version: parts[1], + }) + } + return modules, nil +} + +// encodeModulePath encodes a Go module import path for use as a CDN filename. +// Slashes are replaced with underscores, consistent with PR-06's GoThirdPartyRegistryRemote. +// +// Examples: +// +// "gorm.io/gorm" → "gorm.io_gorm" +// "github.com/gin-gonic/gin" → "github.com_gin-gonic_gin" +// "github.com/jackc/pgx/v5" → "github.com_jackc_pgx_v5" +func encodeModulePath(modulePath string) string { + return strings.ReplaceAll(modulePath, "/", "_") +} diff --git a/sast-engine/tools/generate_go_thirdparty_registry_test.go b/sast-engine/tools/generate_go_thirdparty_registry_test.go new file mode 100644 index 00000000..3209fda9 --- /dev/null +++ b/sast-engine/tools/generate_go_thirdparty_registry_test.go @@ -0,0 +1,258 @@ +//go:build cpf_generate_thirdparty_registry + +package main + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/shivasurya/code-pathfinder/sast-engine/tools/internal/goextract" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// readPackageList +// --------------------------------------------------------------------------- + +func TestReadPackageList_Valid(t *testing.T) { + content := `# Web frameworks +github.com/gin-gonic/gin@v1.10.0 +gorm.io/gorm@v1.25.12 +` + f := writeTempFile(t, content) + modules, err := readPackageList(f) + require.NoError(t, err) + require.Len(t, modules, 2) + assert.Equal(t, "github.com/gin-gonic/gin", modules[0].Path) + assert.Equal(t, "v1.10.0", modules[0].Version) + assert.Equal(t, "gorm.io/gorm", modules[1].Path) + assert.Equal(t, "v1.25.12", modules[1].Version) +} + +func TestReadPackageList_CommentsAndBlanksSkipped(t *testing.T) { + content := ` +# comment line + + # indented comment +github.com/pkg/errors@v0.9.1 + +` + f := writeTempFile(t, content) + modules, err := readPackageList(f) + require.NoError(t, err) + require.Len(t, modules, 1) + assert.Equal(t, "github.com/pkg/errors", modules[0].Path) + assert.Equal(t, "v0.9.1", modules[0].Version) +} + +func TestReadPackageList_EmptyFile(t *testing.T) { + f := writeTempFile(t, "") + modules, err := readPackageList(f) + require.NoError(t, err) + assert.Empty(t, modules) +} + +func TestReadPackageList_MalformedLine(t *testing.T) { + content := "github.com/gin-gonic/gin\n" // missing @version + f := writeTempFile(t, content) + _, err := readPackageList(f) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid line") +} + +func TestReadPackageList_NonExistentFile(t *testing.T) { + _, err := readPackageList("/nonexistent/path/packages.txt") + require.Error(t, err) +} + +func TestReadPackageList_VersionWithAt(t *testing.T) { + // Versions can technically contain @ in pseudo-versions; SplitN(2) handles this. + content := "github.com/pkg/errors@v0.9.1-0.20210430015257-a9b15e44dba1\n" + f := writeTempFile(t, content) + modules, err := readPackageList(f) + require.NoError(t, err) + require.Len(t, modules, 1) + assert.Equal(t, "github.com/pkg/errors", modules[0].Path) + // Version includes everything after the first @. + assert.Equal(t, "v0.9.1-0.20210430015257-a9b15e44dba1", modules[0].Version) +} + +// --------------------------------------------------------------------------- +// encodeModulePath +// --------------------------------------------------------------------------- + +func TestEncodeModulePath(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"gorm.io/gorm", "gorm.io_gorm"}, + {"github.com/gin-gonic/gin", "github.com_gin-gonic_gin"}, + {"github.com/jackc/pgx/v5", "github.com_jackc_pgx_v5"}, + {"google.golang.org/grpc", "google.golang.org_grpc"}, + {"gopkg.in/yaml.v3", "gopkg.in_yaml.v3"}, + {"k8s.io/client-go", "k8s.io_client-go"}, + {"go.uber.org/zap", "go.uber.org_zap"}, + {"fmt", "fmt"}, // single segment — no slashes + {"net/http", "net_http"}, // two segments + {"", ""}, // empty + } + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + assert.Equal(t, tt.expected, encodeModulePath(tt.input)) + }) + } +} + +// --------------------------------------------------------------------------- +// downloadModule +// --------------------------------------------------------------------------- + +func TestDownloadModule_CommandError(t *testing.T) { + // Replace cmdRunner with one that always fails. + orig := cmdRunner + defer func() { cmdRunner = orig }() + cmdRunner = func(_ string, _ ...string) ([]byte, error) { + return nil, os.ErrNotExist + } + + _, err := downloadModule("github.com/gin-gonic/gin", "v1.10.0") + require.Error(t, err) + assert.Contains(t, err.Error(), "go mod download") +} + +func TestDownloadModule_InvalidJSON(t *testing.T) { + orig := cmdRunner + defer func() { cmdRunner = orig }() + cmdRunner = func(_ string, _ ...string) ([]byte, error) { + return []byte("not-json"), nil + } + + _, err := downloadModule("github.com/gin-gonic/gin", "v1.10.0") + require.Error(t, err) + assert.Contains(t, err.Error(), "parsing go mod download output") +} + +func TestDownloadModule_EmptyDir(t *testing.T) { + orig := cmdRunner + defer func() { cmdRunner = orig }() + result, _ := json.Marshal(goModDownloadResult{Dir: ""}) + cmdRunner = func(_ string, _ ...string) ([]byte, error) { + return result, nil + } + + _, err := downloadModule("github.com/gin-gonic/gin", "v1.10.0") + require.Error(t, err) + assert.Contains(t, err.Error(), "no Dir") +} + +func TestDownloadModule_Success(t *testing.T) { + fakeDir := t.TempDir() + orig := cmdRunner + defer func() { cmdRunner = orig }() + result, _ := json.Marshal(goModDownloadResult{Dir: fakeDir}) + cmdRunner = func(_ string, _ ...string) ([]byte, error) { + return result, nil + } + + dir, err := downloadModule("github.com/gin-gonic/gin", "v1.10.0") + require.NoError(t, err) + assert.Equal(t, fakeDir, dir) +} + +// --------------------------------------------------------------------------- +// Integration: full pipeline with a synthetic package directory +// --------------------------------------------------------------------------- + +func TestMain_EndToEnd(t *testing.T) { + // Build a fake module directory that looks like a downloaded module. + modDir := t.TempDir() + src := `package mypkg + +// Client is an HTTP client. +type Client struct{} + +// Get performs a GET request. +func (c *Client) Get(url string) (string, error) { return "", nil } + +// New creates a new Client. +func New() *Client { return &Client{} } +` + require.NoError(t, os.WriteFile(filepath.Join(modDir, "client.go"), []byte(src), 0o644)) + + // Wire up cmdRunner to return the fake module dir. + orig := cmdRunner + defer func() { cmdRunner = orig }() + result, _ := json.Marshal(goModDownloadResult{Dir: modDir}) + cmdRunner = func(_ string, _ ...string) ([]byte, error) { + return result, nil + } + + // Build a packages file. + pkgFile := writeTempFile(t, "example.com/myhttp@v1.0.0\n") + + // Set output dir. + outDir := t.TempDir() + + // Run main-equivalent logic. + modules, err := readPackageList(pkgFile) + require.NoError(t, err) + + dir, err := downloadModule(modules[0].Path, modules[0].Version) + require.NoError(t, err) + + extractor := newTestExtractor() + pkg, err := extractor.ExtractSinglePackage(dir, modules[0].Path) + require.NoError(t, err) + assert.Contains(t, pkg.Types, "Client") + assert.Contains(t, pkg.Functions, "New") + + // Verify the encoded file name. + encoded := encodeModulePath(modules[0].Path) + assert.Equal(t, "example.com_myhttp", encoded) + + // Write the JSON. + jsonBytes, err := json.MarshalIndent(pkg, "", " ") + require.NoError(t, err) + outFile := filepath.Join(outDir, encoded+".json") + require.NoError(t, os.WriteFile(outFile, jsonBytes, 0o644)) + + // Verify it can be read back. + data, err := os.ReadFile(outFile) + require.NoError(t, err) + // Verify import_path field is present (snake_case matches goextract.Package JSON tags). + //nolint:tagliatelle // import_path matches the goextract.Package JSON schema (snake_case). + var parsed struct { + ImportPath string `json:"import_path"` + } + require.NoError(t, json.Unmarshal(data, &parsed)) + assert.Equal(t, "example.com/myhttp", parsed.ImportPath) +} + +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + +// goModDownloadResult mirrors the JSON output of `go mod download -json`. +// +//nolint:tagliatelle // "Dir" matches the literal field in `go mod download -json` output. +type goModDownloadResult struct { + Dir string `json:"Dir"` +} + +func writeTempFile(t *testing.T, content string) string { + t.Helper() + f, err := os.CreateTemp(t.TempDir(), "pkglist-*.txt") + require.NoError(t, err) + _, err = f.WriteString(content) + require.NoError(t, err) + require.NoError(t, f.Close()) + return f.Name() +} + +func newTestExtractor() *goextract.Extractor { + return goextract.NewExtractor(goextract.Config{}) +} diff --git a/sast-engine/tools/internal/goextract/extractor.go b/sast-engine/tools/internal/goextract/extractor.go index b4ad7276..048de836 100644 --- a/sast-engine/tools/internal/goextract/extractor.go +++ b/sast-engine/tools/internal/goextract/extractor.go @@ -156,6 +156,66 @@ func dirHasGoFiles(dir string) (bool, error) { return false, nil } +// ExtractSinglePackage extracts type metadata from a single Go package directory. +// Unlike Run (which processes the entire Go stdlib tree), this operates on a single +// directory path supplied by the caller (e.g., a `go mod download` cache path). +// +// Parameters: +// - packageDir: absolute path to the package source directory +// - importPath: the Go import path (e.g., "gorm.io/gorm") +// +// Files named *_test.go are excluded. Individual files that fail to parse (e.g., +// due to cgo directives or build-constrained syntax) are skipped silently so the +// method returns the best-effort API surface of the remaining files. +// Returns an error if packageDir cannot be read or contains no .go source files. +func (e *Extractor) ExtractSinglePackage(packageDir, importPath string) (*Package, error) { + entries, err := os.ReadDir(packageDir) + if err != nil { + return nil, fmt.Errorf("reading directory %s: %w", packageDir, err) + } + + var goFiles []string + for _, entry := range entries { + if entry.IsDir() { + continue + } + name := entry.Name() + if strings.HasSuffix(name, ".go") && !strings.HasSuffix(name, "_test.go") { + goFiles = append(goFiles, filepath.Join(packageDir, name)) + } + } + + if len(goFiles) == 0 { + return nil, fmt.Errorf("no .go files found in %s", packageDir) + } + + pkg := &Package{ + ImportPath: importPath, + GoVersion: e.config.GoVersion, + GeneratedAt: time.Now().UTC().Format(time.RFC3339), + Functions: make(map[string]*Function), + Types: make(map[string]*Type), + Constants: make(map[string]*Constant), + Variables: make(map[string]*Variable), + } + + fset := token.NewFileSet() + for _, filePath := range goFiles { + astFile, parseErr := parser.ParseFile(fset, filePath, nil, parser.ParseComments) + if parseErr != nil { + // Skip files that fail to parse (cgo, unsupported syntax, etc.). + continue + } + // Skip test-only package declarations (e.g. "package mypkg_test"). + if strings.HasSuffix(astFile.Name.Name, "_test") { + continue + } + e.extractFromFile(pkg, astFile, fset) + } + + return pkg, nil +} + // extractPackage parses all non-test .go files in the given import path and // returns the extracted Package metadata. func (e *Extractor) extractPackage(importPath string) (*Package, error) { diff --git a/sast-engine/tools/internal/goextract/extractor_test.go b/sast-engine/tools/internal/goextract/extractor_test.go index a05a890e..b83ebd81 100644 --- a/sast-engine/tools/internal/goextract/extractor_test.go +++ b/sast-engine/tools/internal/goextract/extractor_test.go @@ -1237,3 +1237,155 @@ func (b *Buffer) Write(p []byte) (int, error) { return 0, nil } // Also available as a top-level function with receiver prefix. assert.Contains(t, pkg.Functions, "Buffer.Write") } + +// --- Tests for ExtractSinglePackage --- + +func TestExtractSinglePackage_ValidPackage(t *testing.T) { + dir := t.TempDir() + src := `package mypkg + +// DB is a database handle. +type DB struct { + Name string +} + +// Query executes a raw SQL query. +func (db *DB) Query(sql string) (string, error) { return "", nil } + +// Open opens a new database connection. +func Open(dsn string) (*DB, error) { return nil, nil } + +const DefaultTimeout = 30 + +var DefaultDB *DB +` + require.NoError(t, os.WriteFile(filepath.Join(dir, "db.go"), []byte(src), 0o644)) + + e := NewExtractor(Config{GoVersion: "1.26"}) + pkg, err := e.ExtractSinglePackage(dir, "example.com/mypkg") + require.NoError(t, err) + require.NotNil(t, pkg) + + assert.Equal(t, "example.com/mypkg", pkg.ImportPath) + assert.Equal(t, "1.26", pkg.GoVersion) + assert.NotEmpty(t, pkg.GeneratedAt) + + // Exported type must be present. + assert.Contains(t, pkg.Types, "DB") + assert.Equal(t, "struct", pkg.Types["DB"].Kind) + assert.Contains(t, pkg.Types["DB"].Fields, pkg.Types["DB"].Fields[0]) // Name field + + // Method on DB must be in Functions with receiver prefix. + assert.Contains(t, pkg.Functions, "DB.Query") + assert.Contains(t, pkg.Functions, "Open") + + // Constants and variables. + assert.Contains(t, pkg.Constants, "DefaultTimeout") + assert.Contains(t, pkg.Variables, "DefaultDB") +} + +func TestExtractSinglePackage_MultipleFiles(t *testing.T) { + dir := t.TempDir() + src1 := "package mypkg\n\nfunc Alpha() {}\n" + src2 := "package mypkg\n\nfunc Beta() {}\n" + require.NoError(t, os.WriteFile(filepath.Join(dir, "a.go"), []byte(src1), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "b.go"), []byte(src2), 0o644)) + + e := NewExtractor(Config{}) + pkg, err := e.ExtractSinglePackage(dir, "example.com/mypkg") + require.NoError(t, err) + assert.Contains(t, pkg.Functions, "Alpha") + assert.Contains(t, pkg.Functions, "Beta") +} + +func TestExtractSinglePackage_TestFilesExcluded(t *testing.T) { + dir := t.TempDir() + src := "package mypkg\n\nfunc Exported() {}\n" + testSrc := "package mypkg\n\nfunc TestHelper() {}\n" + require.NoError(t, os.WriteFile(filepath.Join(dir, "pkg.go"), []byte(src), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "pkg_test.go"), []byte(testSrc), 0o644)) + + e := NewExtractor(Config{}) + pkg, err := e.ExtractSinglePackage(dir, "example.com/mypkg") + require.NoError(t, err) + + assert.Contains(t, pkg.Functions, "Exported") + assert.NotContains(t, pkg.Functions, "TestHelper") +} + +func TestExtractSinglePackage_UnexportedSymbolsSkipped(t *testing.T) { + dir := t.TempDir() + src := "package mypkg\n\nfunc exported() {}\nfunc Exported() {}\n" + require.NoError(t, os.WriteFile(filepath.Join(dir, "pkg.go"), []byte(src), 0o644)) + + e := NewExtractor(Config{}) + pkg, err := e.ExtractSinglePackage(dir, "example.com/mypkg") + require.NoError(t, err) + + assert.Contains(t, pkg.Functions, "Exported") + assert.NotContains(t, pkg.Functions, "exported") +} + +func TestExtractSinglePackage_NoGoFiles(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "README.md"), []byte("# pkg"), 0o644)) + + e := NewExtractor(Config{}) + _, err := e.ExtractSinglePackage(dir, "example.com/mypkg") + require.Error(t, err) + assert.Contains(t, err.Error(), "no .go files found") +} + +func TestExtractSinglePackage_OnlyTestFiles(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.WriteFile( + filepath.Join(dir, "pkg_test.go"), + []byte("package mypkg_test\n\nfunc TestSomething() {}\n"), + 0o644, + )) + + e := NewExtractor(Config{}) + _, err := e.ExtractSinglePackage(dir, "example.com/mypkg") + require.Error(t, err) + assert.Contains(t, err.Error(), "no .go files found") +} + +func TestExtractSinglePackage_NonExistentDir(t *testing.T) { + e := NewExtractor(Config{}) + _, err := e.ExtractSinglePackage("/nonexistent/path/that/does/not/exist", "example.com/mypkg") + require.Error(t, err) + assert.Contains(t, err.Error(), "reading directory") +} + +func TestExtractSinglePackage_ParseErrorSkipped(t *testing.T) { + dir := t.TempDir() + // Valid file with an exported function. + validSrc := "package mypkg\n\nfunc GoodFunc() {}\n" + // Invalid Go syntax — parser will fail but must be skipped gracefully. + invalidSrc := "package mypkg\n\nthis is not valid go syntax !!!\n" + require.NoError(t, os.WriteFile(filepath.Join(dir, "good.go"), []byte(validSrc), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "bad.go"), []byte(invalidSrc), 0o644)) + + e := NewExtractor(Config{}) + pkg, err := e.ExtractSinglePackage(dir, "example.com/mypkg") + // Must succeed despite the bad file. + require.NoError(t, err) + assert.Contains(t, pkg.Functions, "GoodFunc") +} + +func TestExtractSinglePackage_TestOnlyPackageNameSkipped(t *testing.T) { + dir := t.TempDir() + // A file with package name ending in _test (external test package) is not named *_test.go + // but uses a _test package suffix — it should be skipped. + testPkgSrc := "package mypkg_test\n\nfunc Helper() {}\n" + goodSrc := "package mypkg\n\nfunc Exported() {}\n" + require.NoError(t, os.WriteFile(filepath.Join(dir, "external_test.go"), []byte(testPkgSrc), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "pkg.go"), []byte(goodSrc), 0o644)) + + e := NewExtractor(Config{}) + // external_test.go is named *_test.go so it's excluded by the filename filter. + pkg, err := e.ExtractSinglePackage(dir, "example.com/mypkg") + require.NoError(t, err) + assert.Contains(t, pkg.Functions, "Exported") + assert.NotContains(t, pkg.Functions, "Helper") +} diff --git a/sast-engine/tools/top1000.txt b/sast-engine/tools/top1000.txt new file mode 100644 index 00000000..3ee1a53a --- /dev/null +++ b/sast-engine/tools/top1000.txt @@ -0,0 +1,69 @@ +# Tier 1: Security-relevant Go packages (validation set) +# Format: module@version +# These packages are chosen for security analysis coverage: +# - Web frameworks (tainted input sources) +# - Database/ORM (SQL injection sinks) +# - HTTP clients (SSRF sinks) +# - Auth/crypto (authentication vulnerabilities) +# - Template engines (XSS sinks) +# - Configuration (credential exposure) + +# Web frameworks (source of tainted input) +github.com/gin-gonic/gin@v1.10.0 +github.com/labstack/echo/v4@v4.12.0 +github.com/gofiber/fiber/v2@v2.52.5 +github.com/go-chi/chi/v5@v5.1.0 +github.com/gorilla/mux@v1.8.1 + +# Database / ORM (SQL injection sinks) +gorm.io/gorm@v1.25.12 +github.com/jmoiron/sqlx@v1.4.0 +github.com/jackc/pgx/v5@v5.7.1 + +# NoSQL (injection sinks) +go.mongodb.org/mongo-driver@v1.17.1 +github.com/redis/go-redis/v9@v9.7.0 + +# Authentication / crypto +github.com/golang-jwt/jwt/v5@v5.2.1 +golang.org/x/crypto@v0.28.0 + +# HTTP clients (SSRF sinks) +github.com/go-resty/resty/v2@v2.14.0 + +# Template engines (XSS sinks) +github.com/flosch/pongo2/v6@v6.0.0 + +# Configuration (credential exposure) +github.com/spf13/viper@v1.19.0 + +# Serialization (deserialization vulnerabilities) +gopkg.in/yaml.v3@v3.0.1 +github.com/pelletier/go-toml/v2@v2.2.3 + +# Logging (log injection) +go.uber.org/zap@v1.27.0 +github.com/sirupsen/logrus@v1.9.3 + +# Cloud SDKs +github.com/aws/aws-sdk-go-v2@v1.32.2 +cloud.google.com/go@v0.116.0 + +# gRPC +google.golang.org/grpc@v1.68.0 +google.golang.org/protobuf@v1.35.1 + +# Kubernetes +k8s.io/client-go@v0.31.2 + +# Command execution +github.com/codeskyblue/go-sh@v0.0.0-20200712050446-30169cf553fe + +# File handling +github.com/spf13/afero@v1.11.0 + +# Validation +github.com/go-playground/validator/v10@v10.22.1 + +# Testing utilities (for completeness) +github.com/stretchr/testify@v1.9.0