Skip to content

Commit 4dffef2

Browse files
shivasuryaclaude
andauthored
feat: resolve stdlib call: attribute placeholders via CDN registry (#618)
* feat: resolve call: attribute placeholders via stdlib/thirdparty registry When ResolveAttributePlaceholders encounters a call: placeholder like 'call:sqlite3.connect' that doesn't match a project function, it now falls back to checking the stdlib and third-party CDN registries for the function's return type or constructor. This resolves attribute types for stdlib calls (e.g., sqlite3.connect → sqlite3.Connection) and constructors (e.g., configparser.ConfigParser), enabling deep chain resolution through stdlib intermediate types. Project ReturnTypes lookup still takes priority over stdlib fallback. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: 100% coverage for call: placeholder stdlib/thirdparty resolution Edge case tests for resolveCallPlaceholderViaRegistry and tryRegistryLookup: - nil typeEngine safety - single-part funcName (no dots) skipped - unknown/empty return type skipped - stdlib checked before thirdparty (ordering guarantee) - thirdparty constructor fallback - unknown module stays unresolved Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: extract dotted call types and use silentLogger for stdlib resolution Three fixes to complete the stdlib attribute type propagation: 1. extraction/attributes.go: Handle attribute nodes (dotted calls like sqlite3.connect) in inferFromFunctionCall, not just identifiers. This creates call:sqlite3.connect placeholders for self.conn = sqlite3.connect(). 2. resolution/attribute.go: Use a silentLogger (io.Discard writer) for stdlib/thirdparty registry lookups instead of nil, preventing panic when modules need to be lazy-loaded from CDN during attribute resolution. 3. registry/{stdlib,thirdparty}_remote.go: Add GetCachedModule() method for cache-only lookups without CDN downloads. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: 100% coverage for call: placeholder stdlib/thirdparty resolution Cover all lines flagged by Codecov: - extraction/attributes.go: dotted call extraction (sqlite3.connect) - registry/stdlib_remote.go: GetCachedModule - registry/thirdparty_remote.go: GetCachedModule Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 75a4b10 commit 4dffef2

8 files changed

Lines changed: 466 additions & 0 deletions

File tree

sast-engine/graph/callgraph/extraction/attributes.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,19 @@ func inferFromFunctionCall(node *sitter.Node, sourceCode []byte, _ *resolution.T
631631
}
632632
}
633633

634+
// Dotted function/constructor call: module.func() or module.Class()
635+
// Examples: sqlite3.connect(path), configparser.ConfigParser(), logging.getLogger(name)
636+
if funcNode.Type() == "attribute" {
637+
fullName := funcNode.Content(sourceCode)
638+
if len(fullName) > 0 && strings.Contains(fullName, ".") {
639+
return &core.TypeInfo{
640+
TypeFQN: "call:" + fullName,
641+
Confidence: 0.8,
642+
Source: "function_call_attribute",
643+
}
644+
}
645+
}
646+
634647
return nil
635648
}
636649

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package extraction
2+
3+
import (
4+
"testing"
5+
6+
"github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core"
7+
"github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/registry"
8+
"github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/resolution"
9+
"github.com/stretchr/testify/assert"
10+
"github.com/stretchr/testify/require"
11+
)
12+
13+
func TestExtractClassAttributes_DottedCallPlaceholder(t *testing.T) {
14+
source := []byte(`
15+
import sqlite3
16+
import configparser
17+
18+
class DbWrapper:
19+
def __init__(self, path):
20+
self.conn = sqlite3.connect(path)
21+
22+
class ConfigWrapper:
23+
def __init__(self):
24+
self.parser = configparser.ConfigParser()
25+
26+
class SimpleWrapper:
27+
def __init__(self):
28+
self.data = {}
29+
self.name = "test"
30+
self.items = []
31+
`)
32+
33+
moduleRegistry := core.NewModuleRegistry()
34+
typeEngine := resolution.NewTypeInferenceEngine(moduleRegistry)
35+
typeEngine.Attributes = registry.NewAttributeRegistry()
36+
37+
err := ExtractClassAttributes("test.py", source, "test_module", typeEngine, typeEngine.Attributes)
38+
require.NoError(t, err)
39+
40+
// Dotted call: sqlite3.connect → call:sqlite3.connect
41+
attr := typeEngine.Attributes.GetAttribute("test_module.DbWrapper", "conn")
42+
require.NotNil(t, attr, "conn attribute should be extracted")
43+
assert.Equal(t, "call:sqlite3.connect", attr.Type.TypeFQN)
44+
45+
// Dotted constructor: configparser.ConfigParser → call:configparser.ConfigParser
46+
attr = typeEngine.Attributes.GetAttribute("test_module.ConfigWrapper", "parser")
47+
require.NotNil(t, attr, "parser attribute should be extracted")
48+
assert.Equal(t, "call:configparser.ConfigParser", attr.Type.TypeFQN)
49+
50+
// Literal dict
51+
attr = typeEngine.Attributes.GetAttribute("test_module.SimpleWrapper", "data")
52+
require.NotNil(t, attr)
53+
assert.Equal(t, "builtins.dict", attr.Type.TypeFQN)
54+
}

sast-engine/graph/callgraph/registry/stdlib_remote.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,14 @@ func (r *StdlibRegistryRemote) HasModule(moduleName string) bool {
214214
return false
215215
}
216216

217+
// GetCachedModule retrieves a module from the in-memory cache without triggering a CDN download.
218+
// Returns nil if the module is not cached. Safe to call without a logger.
219+
func (r *StdlibRegistryRemote) GetCachedModule(moduleName string) *core.StdlibModule {
220+
r.CacheMutex.RLock()
221+
defer r.CacheMutex.RUnlock()
222+
return r.ModuleCache[moduleName]
223+
}
224+
217225
// GetFunction retrieves a function from a module, downloading the module if needed.
218226
//
219227
// Parameters:

sast-engine/graph/callgraph/registry/stdlib_remote_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -981,3 +981,27 @@ func TestStdlibRegistryRemote_FindClassMethodAlias_Inherited(t *testing.T) {
981981
assert.Equal(t, "Base", className)
982982
assert.Equal(t, "builtins.bytes", method.ReturnType)
983983
}
984+
985+
func TestStdlibRegistryRemote_GetCachedModule(t *testing.T) {
986+
remote := NewStdlibRegistryRemote("https://example.com", "3.14")
987+
remote.Manifest = &core.Manifest{
988+
Modules: []*core.ModuleEntry{{Name: "os"}},
989+
}
990+
991+
// Not cached — returns nil without downloading
992+
result := remote.GetCachedModule("os")
993+
assert.Nil(t, result, "should return nil when module not in cache")
994+
995+
// Pre-populate cache
996+
module := &core.StdlibModule{Module: "os", Functions: map[string]*core.StdlibFunction{}}
997+
remote.ModuleCache["os"] = module
998+
999+
// Now cached — returns the module
1000+
result = remote.GetCachedModule("os")
1001+
assert.NotNil(t, result)
1002+
assert.Equal(t, "os", result.Module)
1003+
1004+
// Different module — still nil
1005+
result = remote.GetCachedModule("sys")
1006+
assert.Nil(t, result)
1007+
}

sast-engine/graph/callgraph/registry/thirdparty_remote.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,14 @@ func (r *ThirdPartyRegistryRemote) HasModule(moduleName string) bool {
171171
return false
172172
}
173173

174+
// GetCachedModule retrieves a module from the in-memory cache without triggering a CDN download.
175+
// Returns nil if the module is not cached. Safe to call without a logger.
176+
func (r *ThirdPartyRegistryRemote) GetCachedModule(moduleName string) *core.StdlibModule {
177+
r.CacheMutex.RLock()
178+
defer r.CacheMutex.RUnlock()
179+
return r.ModuleCache[moduleName]
180+
}
181+
174182
// GetFunction retrieves a function from a module, downloading the module if needed.
175183
func (r *ThirdPartyRegistryRemote) GetFunction(moduleName, functionName string, logger *output.Logger) *core.StdlibFunction {
176184
module, err := r.GetModule(moduleName, logger)

sast-engine/graph/callgraph/registry/thirdparty_remote_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,3 +598,27 @@ func TestVerifyThirdPartyChecksum_Deterministic(t *testing.T) {
598598
assert.True(t, verifyThirdPartyChecksum(data2, checksum),
599599
"checksum of first marshal must verify against second marshal")
600600
}
601+
602+
func TestThirdPartyRegistryRemote_GetCachedModule(t *testing.T) {
603+
remote := NewThirdPartyRegistryRemote("https://example.com")
604+
remote.Manifest = &core.Manifest{
605+
Modules: []*core.ModuleEntry{{Name: "flask"}},
606+
}
607+
608+
// Not cached — returns nil without downloading
609+
result := remote.GetCachedModule("flask")
610+
assert.Nil(t, result, "should return nil when module not in cache")
611+
612+
// Pre-populate cache
613+
module := &core.StdlibModule{Module: "flask", Functions: map[string]*core.StdlibFunction{}}
614+
remote.ModuleCache["flask"] = module
615+
616+
// Now cached — returns the module
617+
result = remote.GetCachedModule("flask")
618+
assert.NotNil(t, result)
619+
assert.Equal(t, "flask", result.Module)
620+
621+
// Different module — still nil
622+
result = remote.GetCachedModule("django")
623+
assert.Nil(t, result)
624+
}

sast-engine/graph/callgraph/resolution/attribute.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,20 @@ package resolution
22

33
import (
44
"fmt"
5+
"io"
56
"slices"
67
"strings"
78

89
"github.com/shivasurya/code-pathfinder/sast-engine/graph"
910
"github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core"
1011
"github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/registry"
12+
"github.com/shivasurya/code-pathfinder/sast-engine/output"
1113
)
1214

15+
// silentLogger is a shared logger that discards all output.
16+
// Used for registry lookups during attribute resolution where no logger is available.
17+
var silentLogger = output.NewLoggerWithWriter(output.VerbosityDefault, io.Discard)
18+
1319
// FailureStats tracks why attribute chain resolution fails.
1420
type FailureStats struct {
1521
TotalAttempts int
@@ -539,7 +545,11 @@ func ResolveAttributePlaceholders(
539545
attr.Type.TypeFQN = returnType.TypeFQN
540546
attr.Type.Confidence = returnType.Confidence * 0.8 // Decay confidence
541547
attr.Type.Source = "function_call_attribute"
548+
break
542549
}
550+
551+
// Fallback: try stdlib/thirdparty registry for calls like "sqlite3.connect"
552+
resolveCallPlaceholderViaRegistry(funcName, attr, typeEngine)
543553
case strings.HasPrefix(originalType, "param:"):
544554
// param:User → resolve type annotation
545555
typeName := strings.TrimPrefix(originalType, "param:")
@@ -556,6 +566,74 @@ func ResolveAttributePlaceholders(
556566
}
557567
}
558568

569+
// resolveCallPlaceholderViaRegistry resolves a "call:" placeholder by checking
570+
// the stdlib and third-party CDN registries for the function's return type.
571+
// For example, "sqlite3.connect" → checks stdlib for return type → "sqlite3.Connection".
572+
// Also handles constructor calls like "configparser.ConfigParser" → "configparser.ConfigParser".
573+
func resolveCallPlaceholderViaRegistry(funcName string, attr *core.ClassAttribute, typeEngine *TypeInferenceEngine) {
574+
if typeEngine == nil {
575+
return
576+
}
577+
578+
// Split "sqlite3.connect" → module="sqlite3", name="connect".
579+
lastDot := strings.LastIndex(funcName, ".")
580+
if lastDot < 0 {
581+
return
582+
}
583+
moduleName := funcName[:lastDot]
584+
name := funcName[lastDot+1:]
585+
586+
tryRegistryLookup(moduleName, name, attr, typeEngine)
587+
}
588+
589+
// tryRegistryLookup checks stdlib then thirdparty for a function or constructor.
590+
// Uses silentLogger for registry lookups that may trigger lazy module downloads.
591+
func tryRegistryLookup(moduleName, name string, attr *core.ClassAttribute, typeEngine *TypeInferenceEngine) bool {
592+
// Check stdlib
593+
if typeEngine.StdlibRemote != nil {
594+
if loader, ok := typeEngine.StdlibRemote.(*registry.StdlibRegistryRemote); ok && loader.HasModule(moduleName) {
595+
// Try as function (e.g., sqlite3.connect → returns sqlite3.Connection)
596+
fn := loader.GetFunction(moduleName, name, silentLogger)
597+
if fn != nil && fn.ReturnType != "" && fn.ReturnType != "unknown" {
598+
attr.Type.TypeFQN = fn.ReturnType
599+
attr.Type.Confidence = fn.Confidence * 0.85
600+
attr.Type.Source = "stdlib_function_call_attribute"
601+
return true
602+
}
603+
// Try as constructor (e.g., configparser.ConfigParser → type is configparser.ConfigParser)
604+
cls := loader.GetClass(moduleName, name, silentLogger)
605+
if cls != nil {
606+
attr.Type.TypeFQN = moduleName + "." + name
607+
attr.Type.Confidence = 0.9
608+
attr.Type.Source = "stdlib_constructor_attribute"
609+
return true
610+
}
611+
}
612+
}
613+
614+
// Check thirdparty
615+
if typeEngine.ThirdPartyRemote != nil {
616+
if loader, ok := typeEngine.ThirdPartyRemote.(*registry.ThirdPartyRegistryRemote); ok && loader.HasModule(moduleName) {
617+
fn := loader.GetFunction(moduleName, name, silentLogger)
618+
if fn != nil && fn.ReturnType != "" && fn.ReturnType != "unknown" {
619+
attr.Type.TypeFQN = fn.ReturnType
620+
attr.Type.Confidence = fn.Confidence * 0.85
621+
attr.Type.Source = "thirdparty_function_call_attribute"
622+
return true
623+
}
624+
cls := loader.GetClass(moduleName, name, silentLogger)
625+
if cls != nil {
626+
attr.Type.TypeFQN = moduleName + "." + name
627+
attr.Type.Confidence = 0.9
628+
attr.Type.Source = "thirdparty_constructor_attribute"
629+
return true
630+
}
631+
}
632+
}
633+
634+
return false
635+
}
636+
559637
// resolveClassName resolves a class name to its fully qualified name.
560638
// Uses module registry, code graph, and ImportMap to find the class definition.
561639
//

0 commit comments

Comments
 (0)