Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
83cdc20
Make initial changes to the final json files.
jess-lowe Mar 11, 2026
80e9eac
cache canonicalizing link
jess-lowe Mar 12, 2026
258139d
add interoperability with cve5 records too
jess-lowe Mar 16, 2026
be4008f
add database specific
jess-lowe Mar 16, 2026
380f9f6
Move MergeRangesAndCreateAffected to common
jess-lowe Mar 16, 2026
dbe777b
handle when introduced the same as lessthan or equal meaning its not …
jess-lowe Mar 16, 2026
f2d5477
handle unresolved signatures
jess-lowe Mar 16, 2026
2d34d0d
fix nested unresolved ranges and duplicate unresolved ranges.
jess-lowe Mar 16, 2026
f2e66a7
fix linter errors
jess-lowe Mar 22, 2026
dc3ec8b
reduce duplication of logic
jess-lowe Mar 22, 2026
030e8f9
fix linting errors
jess-lowe Mar 22, 2026
099130d
condense unresolved_ranges output
jess-lowe Mar 23, 2026
3ed1854
Make ToRangeWithMetadata a normal function
jess-lowe Mar 23, 2026
cc40dc5
Add source information to metadata
jess-lowe Mar 23, 2026
9a2515c
Added grouping tests
jess-lowe Mar 23, 2026
7d30cdf
make sure utility output is deterministic
jess-lowe Mar 23, 2026
79d46e6
make sure that if canonical link 429s, it sets outcome to Error
jess-lowe Mar 23, 2026
ccdeba3
remove skiponcloudbuild bc of vcr
jess-lowe Mar 23, 2026
348c95b
have resolved ranges also use metadata
jess-lowe Mar 23, 2026
8bd4a8c
add snapshot tests
jess-lowe Mar 27, 2026
4bdffea
Merge remote-tracking branch 'upstream/master' into feat/nvd/rebasing-db
jess-lowe Mar 27, 2026
c68f2b3
collect ranges by CPE
jess-lowe Mar 27, 2026
4f8c828
Update snaps
jess-lowe Mar 29, 2026
eb2afdd
Add testdata
jess-lowe Mar 29, 2026
b2baf76
fix lint + update snaps
jess-lowe Mar 29, 2026
d0044b0
Merge branch 'master' into feat/nvd/rebasing-db
jess-lowe Mar 29, 2026
6643a1b
add more relevant files
jess-lowe Mar 30, 2026
2252b2e
add nvd records
jess-lowe Mar 30, 2026
3c16fe6
add indenting
jess-lowe Mar 30, 2026
07a8e73
fixlint
jess-lowe Mar 31, 2026
e3bb5bc
remove combine-to-osv snaps for now
jess-lowe Mar 31, 2026
6747147
change unresolved_ranges "versions" to be "extracted_events"
jess-lowe Apr 7, 2026
e5f635b
update snaps
jess-lowe Apr 7, 2026
35c85fd
fix commits from refs range grouping issue
jess-lowe Apr 7, 2026
383a2fd
separate different git ranges into different affected packages
jess-lowe Apr 7, 2026
742437b
fix duplicate line
jess-lowe Apr 7, 2026
fa9189a
use reflect instead of string comparison
jess-lowe Apr 7, 2026
bdb3509
return link on err for FindCanonicalLink
jess-lowe Apr 7, 2026
4b75080
added more comments
jess-lowe Apr 7, 2026
669674e
addressing more nits
jess-lowe Apr 7, 2026
4fa502b
update todo
jess-lowe Apr 7, 2026
73c9ca6
Merge branch 'master' into feat/nvd/rebasing-db
jess-lowe Apr 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions vulnfeeds/cmd/combine-to-osv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,9 +313,7 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []*
}

if c5Intro != "" || c5Fixed != "" {
newRange := conversion.BuildVersionRange(c5Intro, "", c5Fixed)
newRange.Repo = repo
newRange.Type = osvschema.Range_GIT // Preserve the repo
newRange := conversion.BuildGitVersionRange(c5Intro, "", c5Fixed, repo)
newAffectedRanges = append(newAffectedRanges, newRange)
} else {
newAffectedRanges = cveRanges
Expand Down
262 changes: 243 additions & 19 deletions vulnfeeds/conversion/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"io/fs"
"log/slog"
"net/http"
"os"
"path/filepath"
"slices"
Expand All @@ -21,6 +22,7 @@ import (
"github.com/google/osv/vulnfeeds/utility/logger"
"github.com/google/osv/vulnfeeds/vulns"
"github.com/ossf/osv-schema/bindings/go/osvschema"
"google.golang.org/protobuf/types/known/structpb"
)

// AddAffected adds an osvschema.Affected to a vulnerability, ensuring that no duplicate ranges are added.
Expand Down Expand Up @@ -64,6 +66,7 @@ func AddAffected(v *vulns.Vulnerability, aff *osvschema.Affected, metrics *model
}

func DeduplicateRefs(refs []models.Reference) []models.Reference {
refs = slices.Clone(refs)
// Deduplicate references by URL.
refs = slices.Clone(refs)
slices.SortStableFunc(refs, func(a, b models.Reference) int {
Expand Down Expand Up @@ -175,8 +178,8 @@ func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) e

// GitVersionsToCommits examines repos and tries to convert versions to commits by treating them as Git tags.
// Returns the resolved ranges, unresolved ranges, and successful repos involved.
func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) ([]*osvschema.Range, []*osvschema.Range, []string) {
var newVersionRanges []*osvschema.Range
func GitVersionsToCommits(versionRanges []models.RangeWithMetadata, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) ([]models.RangeWithMetadata, []models.RangeWithMetadata, []string) {
var newVersionRanges []models.RangeWithMetadata
unresolvedRanges := versionRanges
var successfulRepos []string

Expand All @@ -187,6 +190,18 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr
if cache.IsInvalid(repo) {
continue
}

repo, err := git.FindCanonicalLink(repo, http.DefaultClient, cache)
if err != nil {
metrics.AddNote("Failed to find canonical link - %s %v", repo, err)
if errors.Is(err, git.ErrRateLimit) || strings.Contains(err.Error(), "429") {
metrics.Outcome = models.Error
return nil, nil, nil
}

continue
}

normalizedTags, err := git.NormalizeRepoTags(repo, cache)
if err != nil {
if errors.Is(err, git.ErrRateLimit) || strings.Contains(err.Error(), "429") {
Expand All @@ -198,10 +213,10 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr
continue
}

var stillUnresolvedRanges []*osvschema.Range
var stillUnresolvedRanges []models.RangeWithMetadata
for _, vr := range unresolvedRanges {
var introduced, fixed, lastAffected string
for _, e := range vr.GetEvents() {
for _, e := range vr.Range.GetEvents() {
if e.GetIntroduced() != "" {
introduced = e.GetIntroduced()
}
Expand Down Expand Up @@ -235,23 +250,33 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr
var newVR *osvschema.Range

if fixedCommit != "" {
newVR = BuildVersionRange(introducedCommit, "", fixedCommit)
newVR = BuildGitVersionRange(introducedCommit, "", fixedCommit, repo)
} else {
newVR = BuildVersionRange(introducedCommit, lastAffectedCommit, "")
newVR = BuildGitVersionRange(introducedCommit, lastAffectedCommit, "", repo)
}
successfulRepos = append(successfulRepos, repo)
newVR.Repo = repo
newVR.Type = osvschema.Range_GIT
if len(vr.GetEvents()) > 0 {
databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"versions": vr.GetEvents()})
if len(vr.Range.GetEvents()) > 0 {
dbSpecificMap := map[string]any{
"versions": vr.Range.GetEvents(),
}
if vr.Metadata.CPE != "" {
dbSpecificMap["cpe"] = vr.Metadata.CPE
}
if string(vr.Metadata.Source) != "" {
dbSpecificMap["source"] = string(vr.Metadata.Source)
}
databaseSpecific, err := utility.NewStructpbFromMap(dbSpecificMap)
if err != nil {
metrics.AddNote("failed to make database specific: %v", err)
} else {
newVR.DatabaseSpecific = databaseSpecific
}
}

newVersionRanges = append(newVersionRanges, newVR)
newVersionRanges = append(newVersionRanges, models.RangeWithMetadata{
Range: newVR,
Metadata: vr.Metadata,
})
} else {
stillUnresolvedRanges = append(stillUnresolvedRanges, vr)
}
Expand Down Expand Up @@ -287,6 +312,14 @@ func BuildVersionRange(intro string, lastAff string, fixed string) *osvschema.Ra
return &versionRange
}

func BuildGitVersionRange(intro string, lastAff string, fixed string, repo string) *osvschema.Range {
versionRange := BuildVersionRange(intro, lastAff, fixed)
versionRange.Repo = repo
versionRange.Type = osvschema.Range_GIT

return versionRange
}

// MergeTwoRanges combines two osvschema.Range objects into a single range.
// It merges the events and the DatabaseSpecific fields. If the ranges are
// not for the same repository or are of different types, it returns an error.
Expand Down Expand Up @@ -324,7 +357,7 @@ func MergeTwoRanges(range1, range2 *osvschema.Range) (*osvschema.Range, error) {
for k, v := range db2.GetFields() {
val2 := v.AsInterface()
if existing, ok := mergedMap[k]; ok {
mergedVal, err := mergeDatabaseSpecificValues(existing, val2)
mergedVal, err := MergeDatabaseSpecificValues(existing, val2)
if err != nil {
logger.Info("Failed to merge database specific key", "key", k, "err", err)
}
Expand All @@ -346,18 +379,26 @@ func MergeTwoRanges(range1, range2 *osvschema.Range) (*osvschema.Range, error) {
return mergedRange, nil
}

// mergeDatabaseSpecificValues is a helper function that recursively merges two
// MergeDatabaseSpecificValues is a helper function that recursively merges two
// values from a DatabaseSpecific field. It handles lists (by appending), maps
// (by recursively merging keys), and simple strings (by creating a list if they
// differ). It returns an error if the types of the two values do not match.
func mergeDatabaseSpecificValues(val1, val2 any) (any, error) {
func MergeDatabaseSpecificValues(val1, val2 any) (any, error) {
switch v1 := val1.(type) {
case []any:
if v2, ok := val2.([]any); ok {
return append(v1, v2...), nil
return deduplicateList(append(v1, v2...)), nil
}

return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2)
// Check if the list contains elements of the same type as val2
if len(v1) > 0 {
if fmt.Sprintf("%T", v1[0]) != fmt.Sprintf("%T", val2) {
return nil, fmt.Errorf("mismatching types: list of %T and %T", v1[0], val2)
}
}

// Append single value to list
return deduplicateList(append(v1, val2)), nil
case map[string]any:
if v2, ok := val2.(map[string]any); ok {
merged := make(map[string]any)
Expand All @@ -366,7 +407,7 @@ func mergeDatabaseSpecificValues(val1, val2 any) (any, error) {
}
for k, v := range v2 {
if existing, ok := merged[k]; ok {
mergedVal, err := mergeDatabaseSpecificValues(existing, v)
mergedVal, err := MergeDatabaseSpecificValues(existing, v)
if err != nil {
return nil, err
}
Expand All @@ -386,18 +427,201 @@ func mergeDatabaseSpecificValues(val1, val2 any) (any, error) {
return v1, nil
}

return []any{v1, v2}, nil
return deduplicateList([]any{v1, v2}), nil
}
if v2, ok := val2.([]any); ok {
if len(v2) > 0 {
if _, isString := v2[0].(string); !isString {
return nil, fmt.Errorf("mismatching types: string and list of %T", v2[0])
}
}

return deduplicateList(append([]any{v1}, v2...)), nil
}

return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2)
default:
if v2, ok := val2.([]any); ok {
if len(v2) > 0 {
if fmt.Sprintf("%T", val1) != fmt.Sprintf("%T", v2[0]) {
return nil, fmt.Errorf("mismatching types: %T and list of %T", val1, v2[0])
}
}

return deduplicateList(append([]any{val1}, v2...)), nil
}
if fmt.Sprintf("%T", val1) != fmt.Sprintf("%T", val2) {
return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2)
}
if val1 == val2 {
return val1, nil
}

return []any{val1, val2}, nil
return deduplicateList([]any{val1, val2}), nil
}
}

// deduplicateList removes duplicate comparable elements (like strings) from a list.
func deduplicateList(list []any) []any {
var unique []any
seen := make(map[any]bool)
for _, item := range list {
switch item.(type) {
case string, int, int32, int64, float32, float64, bool:
if !seen[item] {
seen[item] = true
unique = append(unique, item)
}
default:
unique = append(unique, item)
}
}

return unique
}

func ToRangeWithMetadata(r []*osvschema.Range, s models.VersionSource) []models.RangeWithMetadata {
nr := make([]models.RangeWithMetadata, 0, len(r))
for _, rng := range r {
nr = append(nr, models.RangeWithMetadata{
Range: rng,
Metadata: models.Metadata{
Source: s,
},
})
}

return nr
}

func CreateUnresolvedRanges(unresolvedRanges []models.RangeWithMetadata) *structpb.ListValue {
if len(unresolvedRanges) == 0 {
return nil
}

rangesBySource := make(map[string][]models.RangeWithMetadata)
var sources []string
for _, ur := range unresolvedRanges {
sourceStr := string(ur.Metadata.Source)
if _, ok := rangesBySource[sourceStr]; !ok {
sources = append(sources, sourceStr)
}
rangesBySource[sourceStr] = append(rangesBySource[sourceStr], ur)
}

slices.Sort(sources)

listElements := make([]any, 0, len(sources))

for _, source := range sources {
ranges := rangesBySource[source]
cpes := []string{}
unresolvedRangesMap := make(map[string]any)
var events []*osvschema.Event

// Create a range from all those with CPEs
for _, ur := range ranges {
if ur.Metadata.CPE != "" {
cpes = append(cpes, ur.Metadata.CPE)
}
urEvents := ur.Range.GetEvents()

for _, e := range urEvents {
if e.GetIntroduced() != "0" && e.GetIntroduced() != "" {
events = append(events, e)
continue
}
if e.GetLastAffected() != "" {
events = append(events, e)
continue
}
if e.GetFixed() != "" {
events = append(events, e)
}
}
}

metadata := make(map[string]any)
if len(cpes) > 1 {
slices.Sort(cpes)
cpes = slices.Compact(cpes)
metadata["cpes"] = cpes
} else if len(cpes) == 1 {
metadata["cpe"] = cpes[0]
}

if source != "" {
metadata["source"] = source
}

if len(metadata) > 0 {
unresolvedRangesMap["metadata"] = metadata
}

unresolvedRangesMap["versions"] = events
listElements = append(listElements, unresolvedRangesMap)
}

ds, err := utility.NewStructpbFromMap(map[string]any{
"list": listElements,
})
if err != nil {
logger.Warn("failed to convert unresolved ranges to structpb", "err", err)
return nil
}

return ds.GetFields()["list"].GetListValue()
}

func AddFieldToDatabaseSpecific(ds *structpb.Struct, field string, value any) error {
if ds == nil {
return errors.New("database specific is nil")
}
if ds.Fields == nil {
return errors.New("database specific fields is nil")
}
if ds.GetFields()[field] != nil {
return fmt.Errorf("field %s already exists", field)
}

switch v := value.(type) {
case *structpb.Value:
ds.Fields[field] = v
case *structpb.Struct:
ds.Fields[field] = structpb.NewStructValue(v)
case *structpb.ListValue:
ds.Fields[field] = structpb.NewListValue(v)
default:
val, err := structpb.NewValue(v)
if err != nil {
return fmt.Errorf("failed to create structpb value: %w", err)
}
ds.Fields[field] = val
}

return nil
}

// ProcessRanges attempts to resolve the given ranges to commits and updates the metrics accordingly.
func ProcessRanges(ranges []models.RangeWithMetadata, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache, source models.VersionSource) ([]models.RangeWithMetadata, []models.RangeWithMetadata, []string) {
if len(ranges) == 0 {
return nil, nil, nil
}

r, un, sR := GitVersionsToCommits(ranges, repos, metrics, cache)
if len(r) > 0 {
metrics.ResolvedRangesCount += len(r)
metrics.SetOutcome(models.Successful)
}

if len(un) > 0 {
metrics.UnresolvedRangesCount += len(un)
if len(r) == 0 {
metrics.SetOutcome(models.NoCommitRanges)
}
}

metrics.VersionSources = append(metrics.VersionSources, source)

return r, un, sR
}
Loading
Loading