github.com/khulnasoft-lab/tunnel-db@v0.0.0-20231117205118-74e1113bd007/pkg/vulnsrc/osv/osv.go (about) 1 package osv 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io" 7 "path/filepath" 8 "strings" 9 "time" 10 11 "github.com/goark/go-cvss/v3/metric" 12 "github.com/samber/lo" 13 bolt "go.etcd.io/bbolt" 14 "go.uber.org/zap" 15 "golang.org/x/exp/maps" 16 "golang.org/x/xerrors" 17 18 "github.com/khulnasoft-lab/tunnel-db/pkg/db" 19 "github.com/khulnasoft-lab/tunnel-db/pkg/log" 20 "github.com/khulnasoft-lab/tunnel-db/pkg/types" 21 "github.com/khulnasoft-lab/tunnel-db/pkg/utils" 22 "github.com/khulnasoft-lab/tunnel-db/pkg/vulnsrc/bucket" 23 "github.com/khulnasoft-lab/tunnel-db/pkg/vulnsrc/vulnerability" 24 ) 25 26 type Advisory struct { 27 Ecosystem types.Ecosystem 28 PkgName string 29 VulnerabilityID string 30 Aliases []string 31 32 // Advisory detail 33 VulnerableVersions []string 34 PatchedVersions []string 35 36 // Vulnerability detail 37 Severity types.Severity 38 Title string 39 Description string 40 References []string 41 CVSSScoreV3 float64 42 CVSSVectorV3 string 43 } 44 45 type OSV struct { 46 dir string 47 dbc db.Operation 48 sourceID types.SourceID 49 dataSources map[types.Ecosystem]types.DataSource 50 transformer Transformer 51 } 52 53 type Transformer interface { 54 TransformAdvisories([]Advisory, Entry) ([]Advisory, error) 55 } 56 57 type defaultTransformer struct{} 58 59 func (t *defaultTransformer) TransformAdvisories(advs []Advisory, _ Entry) ([]Advisory, error) { 60 return advs, nil 61 } 62 63 func New(dir string, sourceID types.SourceID, dataSources map[types.Ecosystem]types.DataSource, transformer Transformer) OSV { 64 if transformer == nil { 65 transformer = &defaultTransformer{} 66 } 67 return OSV{ 68 dir: dir, 69 dbc: db.Config{}, 70 sourceID: sourceID, 71 dataSources: dataSources, 72 transformer: transformer, 73 } 74 } 75 76 func (o OSV) Name() types.SourceID { 77 return o.sourceID 78 } 79 80 func (o OSV) Update(root string) error { 81 rootDir := filepath.Join(root, o.dir) 82 83 var entries []Entry 84 err := utils.FileWalk(rootDir, func(r io.Reader, path string) error { 85 if filepath.Ext(path) != ".json" { 86 return nil 87 } 88 var entry Entry 89 if err := json.NewDecoder(r).Decode(&entry); err != nil { 90 return xerrors.Errorf("JSON decode error (%s): %w", path, err) 91 } 92 entries = append(entries, entry) 93 return nil 94 }) 95 if err != nil { 96 return xerrors.Errorf("walk error: %w", err) 97 } 98 99 if err = o.save(entries); err != nil { 100 return xerrors.Errorf("save error: %w", err) 101 } 102 103 return nil 104 } 105 106 func (o OSV) save(entries []Entry) error { 107 err := o.dbc.BatchUpdate(func(tx *bolt.Tx) error { 108 for _, entry := range entries { 109 if err := o.commit(tx, entry); err != nil { 110 return err 111 } 112 } 113 return nil 114 }) 115 if err != nil { 116 return xerrors.Errorf("batch update error: %w", err) 117 } 118 return nil 119 } 120 121 func (o OSV) commit(tx *bolt.Tx, entry Entry) error { 122 if entry.Withdrawn != nil && entry.Withdrawn.Before(time.Now()) { 123 return nil 124 } 125 126 // Group IDs into primary vulnerability IDs and aliases. 127 vulnIDs, aliases := groupVulnIDs(entry.ID, entry.Aliases) 128 129 references := lo.Map(entry.References, func(ref Reference, _ int) string { 130 return ref.URL 131 }) 132 133 // Parse []affected 134 advisories, err := parseAffected(entry, vulnIDs, aliases, references) 135 if err != nil { 136 return xerrors.Errorf("failed to parse affected: %w", err) 137 } 138 139 // Transform advisories 140 advisories, err = o.transformer.TransformAdvisories(advisories, entry) 141 if err != nil { 142 return xerrors.Errorf("failed to transform advisories: %w", err) 143 } 144 145 for _, adv := range advisories { 146 dataSource, ok := o.dataSources[adv.Ecosystem] 147 if !ok { 148 continue 149 } 150 bktName := bucket.Name(adv.Ecosystem, dataSource.Name) 151 152 if err = o.dbc.PutDataSource(tx, bktName, dataSource); err != nil { 153 return xerrors.Errorf("failed to put data source: %w", err) 154 } 155 156 // Store advisories 157 advisory := types.Advisory{ 158 VendorIDs: adv.Aliases, 159 VulnerableVersions: adv.VulnerableVersions, 160 PatchedVersions: adv.PatchedVersions, 161 } 162 if err = o.dbc.PutAdvisoryDetail(tx, adv.VulnerabilityID, adv.PkgName, []string{bktName}, advisory); err != nil { 163 return xerrors.Errorf("failed to save OSV advisory: %w", err) 164 } 165 166 // Store vulnerability details 167 vuln := types.VulnerabilityDetail{ 168 Severity: adv.Severity, 169 References: adv.References, 170 Title: adv.Title, 171 Description: adv.Description, 172 CvssScoreV3: adv.CVSSScoreV3, 173 CvssVectorV3: adv.CVSSVectorV3, 174 } 175 176 if err = o.dbc.PutVulnerabilityDetail(tx, adv.VulnerabilityID, o.sourceID, vuln); err != nil { 177 return xerrors.Errorf("failed to put vulnerability detail (%s): %w", adv.VulnerabilityID, err) 178 } 179 180 if err = o.dbc.PutVulnerabilityID(tx, adv.VulnerabilityID); err != nil { 181 return xerrors.Errorf("failed to put vulnerability id (%s): %w", adv.VulnerabilityID, err) 182 } 183 } 184 return nil 185 } 186 187 func groupVulnIDs(id string, aliases []string) ([]string, []string) { 188 var cveIDs, nonCVEIDs []string 189 for _, a := range append(aliases, id) { 190 if strings.HasPrefix(a, "CVE-") { 191 cveIDs = append(cveIDs, a) 192 } else { 193 nonCVEIDs = append(nonCVEIDs, a) 194 } 195 } 196 if len(cveIDs) == 0 { 197 // Use the original vulnerability ID 198 // e.g. PYSEC-2021-335 and GHSA-wjx8-cgrm-hh8p 199 return []string{id}, aliases 200 } 201 return cveIDs, nonCVEIDs 202 } 203 204 // parseAffected parses the affected fields 205 // cf. https://ossf.github.io/osv-schema/#affected-fields 206 func parseAffected(entry Entry, vulnIDs, aliases, references []string) ([]Advisory, error) { 207 // Severities can be found both in severity and affected[].severity fields. 208 cvssVectorV3, cvssScoreV3, err := parseSeverity(entry.Severities) 209 if err != nil { 210 return nil, xerrors.Errorf("failed to decode CVSS vector (%s): %w", entry.ID, err) 211 } 212 213 uniqAdvisories := map[string]Advisory{} 214 for _, affected := range entry.Affected { 215 ecosystem := convertEcosystem(affected.Package.Ecosystem) 216 if ecosystem == vulnerability.Unknown { 217 continue 218 } 219 pkgName := vulnerability.NormalizePkgName(ecosystem, affected.Package.Name) 220 221 vulnerableVersions, patchedVersions, err := parseAffectedVersions(affected) 222 if err != nil { 223 return nil, xerrors.Errorf("failed to parse affected: %w", err) 224 } 225 226 // Parse affected[].severity 227 if vecV3, scoreV3, err := parseSeverity(affected.Severities); err != nil { 228 return nil, xerrors.Errorf("failed to decode CVSS vector (%s): %w", entry.ID, err) 229 } else if vecV3 != "" { 230 // Overwrite the CVSS vector and score if affected[].severity is set 231 cvssVectorV3, cvssScoreV3 = vecV3, scoreV3 232 } 233 234 key := fmt.Sprintf("%s/%s", ecosystem, pkgName) 235 for _, vulnID := range vulnIDs { 236 if adv, ok := uniqAdvisories[key]; ok { 237 // The same package could be repeated with different version ranges. 238 // cf. https://github.com/github/advisory-database/blob/0996f81ca6f1b65ba25f8e71fba263cb1e54ced5/advisories/github-reviewed/2019/12/GHSA-wjx8-cgrm-hh8p/GHSA-wjx8-cgrm-hh8p.json 239 adv.VulnerableVersions = append(adv.VulnerableVersions, vulnerableVersions...) 240 adv.PatchedVersions = append(adv.PatchedVersions, patchedVersions...) 241 uniqAdvisories[key] = adv 242 } else { 243 uniqAdvisories[key] = Advisory{ 244 Ecosystem: ecosystem, 245 PkgName: pkgName, 246 VulnerabilityID: vulnID, 247 Aliases: aliases, 248 VulnerableVersions: vulnerableVersions, 249 PatchedVersions: patchedVersions, 250 Title: entry.Summary, 251 Description: entry.Details, 252 References: references, 253 CVSSVectorV3: cvssVectorV3, 254 CVSSScoreV3: cvssScoreV3, 255 } 256 } 257 } 258 } 259 return maps.Values(uniqAdvisories), nil 260 } 261 262 // parseAffectedVersions parses the affected.versions and affected.ranges fields 263 // cf. 264 // - https://ossf.github.io/osv-schema/#affectedversions-field 265 // - https://ossf.github.io/osv-schema/#affectedranges-field 266 func parseAffectedVersions(affected Affected) ([]string, []string, error) { 267 var patchedVersions, vulnerableVersions []string 268 var affectedRanges []VersionRange 269 for _, affects := range affected.Ranges { 270 if affects.Type == RangeTypeGit { 271 continue 272 } 273 274 var index int 275 for _, event := range affects.Events { 276 switch { 277 // Each "introduced" event implies a new version range 278 // e.g. {"introduced": "1.2.0"}, {"introduced": "2.2.0"} 279 case event.Introduced != "": 280 affectedRanges = append(affectedRanges, NewVersionRange(affected.Package.Ecosystem, event.Introduced)) 281 index = len(affectedRanges) - 1 282 // e.g. {"introduced": "1.2.0"}, {"fixed": "1.2.5"} 283 case event.Fixed != "": 284 affectedRanges[index].SetFixed(event.Fixed) 285 patchedVersions = append(patchedVersions, event.Fixed) 286 // e.g. {"introduced": "1.2.0"}, {"last_affected": "1.2.5"} 287 case event.LastAffected != "": 288 affectedRanges[index].SetLastAffected(event.LastAffected) 289 } 290 } 291 } 292 293 for _, r := range affectedRanges { 294 vulnerableVersions = append(vulnerableVersions, r.String()) 295 } 296 297 for _, v := range affected.Versions { 298 // We don't need to add the versions that are already included in the ranges 299 ok, err := versionContains(affectedRanges, v) 300 if err != nil { 301 log.Logger.Errorw("Version comparison error", 302 zap.String("ecosystem", string(affected.Package.Ecosystem)), 303 zap.String("package", affected.Package.Name), 304 zap.Error(err), 305 ) 306 } 307 if !ok { 308 vulnerableVersions = append(vulnerableVersions, fmt.Sprintf("=%s", v)) 309 } 310 } 311 312 return vulnerableVersions, patchedVersions, nil 313 } 314 315 // parseSeverity parses the severity field and returns CVSSv3 vector and score 316 // cf. 317 // - https://ossf.github.io/osv-schema/#severity-field 318 // - https://ossf.github.io/osv-schema/#affectedseverity-field 319 func parseSeverity(severities []Severity) (string, float64, error) { 320 for _, s := range severities { 321 if s.Type == "CVSS_V3" && s.Score != "" { 322 // CVSS vectors possibly have `/` suffix 323 // e.g. https://github.com/github/advisory-database/blob/2d3bc73d2117893b217233aeb95b9236c7b93761/advisories/github-reviewed/2019/05/GHSA-j59f-6m4q-62h6/GHSA-j59f-6m4q-62h6.json#L14 324 // Trim the suffix to avoid errors 325 cvssVectorV3 := strings.TrimSuffix(s.Score, "/") 326 metrics, err := metric.NewTemporal().Decode(cvssVectorV3) 327 if err != nil { 328 return "", 0, xerrors.Errorf("failed to decode CVSSv3 vector: %w", err) 329 } 330 cvssScoreV3 := metrics.Score() 331 return cvssVectorV3, cvssScoreV3, nil 332 } 333 } 334 return "", 0, nil 335 } 336 337 func convertEcosystem(eco Ecosystem) types.Ecosystem { 338 // cf. https://ossf.github.io/osv-schema/#affectedpackage-field 339 switch strings.ToLower(string(eco)) { 340 case "go": 341 return vulnerability.Go 342 case "npm": 343 return vulnerability.Npm 344 case "pypi": 345 return vulnerability.Pip 346 case "rubygems": 347 return vulnerability.RubyGems 348 case "crates.io": 349 return vulnerability.Cargo 350 case "packagist": 351 return vulnerability.Composer 352 case "maven": 353 return vulnerability.Maven 354 case "nuget": 355 return vulnerability.NuGet 356 case "hex": 357 return vulnerability.Erlang 358 case "pub": 359 return vulnerability.Pub 360 case "swifturl", "purl-type:swift": 361 // GHSA still uses "purl-type:swift" for Swift advisories. 362 // cf. https://github.com/github/advisory-database/blob/db1cdfb553e48f18aa27d7e929d200563451391a/advisories/github-reviewed/2023/07/GHSA-jq43-q8mx-r7mq/GHSA-jq43-q8mx-r7mq.json#L20 363 return vulnerability.Swift 364 case "bitnami": 365 return vulnerability.Bitnami 366 case "kubernetes": 367 return vulnerability.Kubernetes 368 default: 369 return vulnerability.Unknown 370 } 371 } 372 373 func versionContains(ranges []VersionRange, version string) (bool, error) { 374 for _, r := range ranges { 375 if ok, err := r.Contains(version); err != nil { 376 return false, err 377 } else if ok { 378 return true, nil 379 } 380 } 381 return false, nil 382 }