github.com/khulnasoft-lab/tunnel-db@v0.0.0-20231117205118-74e1113bd007/pkg/vulnsrc/osv/osv.go (about)

     1  package osv
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io"
     7  	"path/filepath"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/goark/go-cvss/v3/metric"
    12  	"github.com/samber/lo"
    13  	bolt "go.etcd.io/bbolt"
    14  	"go.uber.org/zap"
    15  	"golang.org/x/exp/maps"
    16  	"golang.org/x/xerrors"
    17  
    18  	"github.com/khulnasoft-lab/tunnel-db/pkg/db"
    19  	"github.com/khulnasoft-lab/tunnel-db/pkg/log"
    20  	"github.com/khulnasoft-lab/tunnel-db/pkg/types"
    21  	"github.com/khulnasoft-lab/tunnel-db/pkg/utils"
    22  	"github.com/khulnasoft-lab/tunnel-db/pkg/vulnsrc/bucket"
    23  	"github.com/khulnasoft-lab/tunnel-db/pkg/vulnsrc/vulnerability"
    24  )
    25  
    26  type Advisory struct {
    27  	Ecosystem       types.Ecosystem
    28  	PkgName         string
    29  	VulnerabilityID string
    30  	Aliases         []string
    31  
    32  	// Advisory detail
    33  	VulnerableVersions []string
    34  	PatchedVersions    []string
    35  
    36  	// Vulnerability detail
    37  	Severity     types.Severity
    38  	Title        string
    39  	Description  string
    40  	References   []string
    41  	CVSSScoreV3  float64
    42  	CVSSVectorV3 string
    43  }
    44  
    45  type OSV struct {
    46  	dir         string
    47  	dbc         db.Operation
    48  	sourceID    types.SourceID
    49  	dataSources map[types.Ecosystem]types.DataSource
    50  	transformer Transformer
    51  }
    52  
    53  type Transformer interface {
    54  	TransformAdvisories([]Advisory, Entry) ([]Advisory, error)
    55  }
    56  
    57  type defaultTransformer struct{}
    58  
    59  func (t *defaultTransformer) TransformAdvisories(advs []Advisory, _ Entry) ([]Advisory, error) {
    60  	return advs, nil
    61  }
    62  
    63  func New(dir string, sourceID types.SourceID, dataSources map[types.Ecosystem]types.DataSource, transformer Transformer) OSV {
    64  	if transformer == nil {
    65  		transformer = &defaultTransformer{}
    66  	}
    67  	return OSV{
    68  		dir:         dir,
    69  		dbc:         db.Config{},
    70  		sourceID:    sourceID,
    71  		dataSources: dataSources,
    72  		transformer: transformer,
    73  	}
    74  }
    75  
    76  func (o OSV) Name() types.SourceID {
    77  	return o.sourceID
    78  }
    79  
    80  func (o OSV) Update(root string) error {
    81  	rootDir := filepath.Join(root, o.dir)
    82  
    83  	var entries []Entry
    84  	err := utils.FileWalk(rootDir, func(r io.Reader, path string) error {
    85  		if filepath.Ext(path) != ".json" {
    86  			return nil
    87  		}
    88  		var entry Entry
    89  		if err := json.NewDecoder(r).Decode(&entry); err != nil {
    90  			return xerrors.Errorf("JSON decode error (%s): %w", path, err)
    91  		}
    92  		entries = append(entries, entry)
    93  		return nil
    94  	})
    95  	if err != nil {
    96  		return xerrors.Errorf("walk error: %w", err)
    97  	}
    98  
    99  	if err = o.save(entries); err != nil {
   100  		return xerrors.Errorf("save error: %w", err)
   101  	}
   102  
   103  	return nil
   104  }
   105  
   106  func (o OSV) save(entries []Entry) error {
   107  	err := o.dbc.BatchUpdate(func(tx *bolt.Tx) error {
   108  		for _, entry := range entries {
   109  			if err := o.commit(tx, entry); err != nil {
   110  				return err
   111  			}
   112  		}
   113  		return nil
   114  	})
   115  	if err != nil {
   116  		return xerrors.Errorf("batch update error: %w", err)
   117  	}
   118  	return nil
   119  }
   120  
   121  func (o OSV) commit(tx *bolt.Tx, entry Entry) error {
   122  	if entry.Withdrawn != nil && entry.Withdrawn.Before(time.Now()) {
   123  		return nil
   124  	}
   125  
   126  	// Group IDs into primary vulnerability IDs and aliases.
   127  	vulnIDs, aliases := groupVulnIDs(entry.ID, entry.Aliases)
   128  
   129  	references := lo.Map(entry.References, func(ref Reference, _ int) string {
   130  		return ref.URL
   131  	})
   132  
   133  	// Parse []affected
   134  	advisories, err := parseAffected(entry, vulnIDs, aliases, references)
   135  	if err != nil {
   136  		return xerrors.Errorf("failed to parse affected: %w", err)
   137  	}
   138  
   139  	// Transform advisories
   140  	advisories, err = o.transformer.TransformAdvisories(advisories, entry)
   141  	if err != nil {
   142  		return xerrors.Errorf("failed to transform advisories: %w", err)
   143  	}
   144  
   145  	for _, adv := range advisories {
   146  		dataSource, ok := o.dataSources[adv.Ecosystem]
   147  		if !ok {
   148  			continue
   149  		}
   150  		bktName := bucket.Name(adv.Ecosystem, dataSource.Name)
   151  
   152  		if err = o.dbc.PutDataSource(tx, bktName, dataSource); err != nil {
   153  			return xerrors.Errorf("failed to put data source: %w", err)
   154  		}
   155  
   156  		// Store advisories
   157  		advisory := types.Advisory{
   158  			VendorIDs:          adv.Aliases,
   159  			VulnerableVersions: adv.VulnerableVersions,
   160  			PatchedVersions:    adv.PatchedVersions,
   161  		}
   162  		if err = o.dbc.PutAdvisoryDetail(tx, adv.VulnerabilityID, adv.PkgName, []string{bktName}, advisory); err != nil {
   163  			return xerrors.Errorf("failed to save OSV advisory: %w", err)
   164  		}
   165  
   166  		// Store vulnerability details
   167  		vuln := types.VulnerabilityDetail{
   168  			Severity:     adv.Severity,
   169  			References:   adv.References,
   170  			Title:        adv.Title,
   171  			Description:  adv.Description,
   172  			CvssScoreV3:  adv.CVSSScoreV3,
   173  			CvssVectorV3: adv.CVSSVectorV3,
   174  		}
   175  
   176  		if err = o.dbc.PutVulnerabilityDetail(tx, adv.VulnerabilityID, o.sourceID, vuln); err != nil {
   177  			return xerrors.Errorf("failed to put vulnerability detail (%s): %w", adv.VulnerabilityID, err)
   178  		}
   179  
   180  		if err = o.dbc.PutVulnerabilityID(tx, adv.VulnerabilityID); err != nil {
   181  			return xerrors.Errorf("failed to put vulnerability id (%s): %w", adv.VulnerabilityID, err)
   182  		}
   183  	}
   184  	return nil
   185  }
   186  
   187  func groupVulnIDs(id string, aliases []string) ([]string, []string) {
   188  	var cveIDs, nonCVEIDs []string
   189  	for _, a := range append(aliases, id) {
   190  		if strings.HasPrefix(a, "CVE-") {
   191  			cveIDs = append(cveIDs, a)
   192  		} else {
   193  			nonCVEIDs = append(nonCVEIDs, a)
   194  		}
   195  	}
   196  	if len(cveIDs) == 0 {
   197  		// Use the original vulnerability ID
   198  		// e.g. PYSEC-2021-335 and GHSA-wjx8-cgrm-hh8p
   199  		return []string{id}, aliases
   200  	}
   201  	return cveIDs, nonCVEIDs
   202  }
   203  
   204  // parseAffected parses the affected fields
   205  // cf. https://ossf.github.io/osv-schema/#affected-fields
   206  func parseAffected(entry Entry, vulnIDs, aliases, references []string) ([]Advisory, error) {
   207  	// Severities can be found both in severity and affected[].severity fields.
   208  	cvssVectorV3, cvssScoreV3, err := parseSeverity(entry.Severities)
   209  	if err != nil {
   210  		return nil, xerrors.Errorf("failed to decode CVSS vector (%s): %w", entry.ID, err)
   211  	}
   212  
   213  	uniqAdvisories := map[string]Advisory{}
   214  	for _, affected := range entry.Affected {
   215  		ecosystem := convertEcosystem(affected.Package.Ecosystem)
   216  		if ecosystem == vulnerability.Unknown {
   217  			continue
   218  		}
   219  		pkgName := vulnerability.NormalizePkgName(ecosystem, affected.Package.Name)
   220  
   221  		vulnerableVersions, patchedVersions, err := parseAffectedVersions(affected)
   222  		if err != nil {
   223  			return nil, xerrors.Errorf("failed to parse affected: %w", err)
   224  		}
   225  
   226  		// Parse affected[].severity
   227  		if vecV3, scoreV3, err := parseSeverity(affected.Severities); err != nil {
   228  			return nil, xerrors.Errorf("failed to decode CVSS vector (%s): %w", entry.ID, err)
   229  		} else if vecV3 != "" {
   230  			// Overwrite the CVSS vector and score if affected[].severity is set
   231  			cvssVectorV3, cvssScoreV3 = vecV3, scoreV3
   232  		}
   233  
   234  		key := fmt.Sprintf("%s/%s", ecosystem, pkgName)
   235  		for _, vulnID := range vulnIDs {
   236  			if adv, ok := uniqAdvisories[key]; ok {
   237  				// The same package could be repeated with different version ranges.
   238  				// cf. https://github.com/github/advisory-database/blob/0996f81ca6f1b65ba25f8e71fba263cb1e54ced5/advisories/github-reviewed/2019/12/GHSA-wjx8-cgrm-hh8p/GHSA-wjx8-cgrm-hh8p.json
   239  				adv.VulnerableVersions = append(adv.VulnerableVersions, vulnerableVersions...)
   240  				adv.PatchedVersions = append(adv.PatchedVersions, patchedVersions...)
   241  				uniqAdvisories[key] = adv
   242  			} else {
   243  				uniqAdvisories[key] = Advisory{
   244  					Ecosystem:          ecosystem,
   245  					PkgName:            pkgName,
   246  					VulnerabilityID:    vulnID,
   247  					Aliases:            aliases,
   248  					VulnerableVersions: vulnerableVersions,
   249  					PatchedVersions:    patchedVersions,
   250  					Title:              entry.Summary,
   251  					Description:        entry.Details,
   252  					References:         references,
   253  					CVSSVectorV3:       cvssVectorV3,
   254  					CVSSScoreV3:        cvssScoreV3,
   255  				}
   256  			}
   257  		}
   258  	}
   259  	return maps.Values(uniqAdvisories), nil
   260  }
   261  
   262  // parseAffectedVersions parses the affected.versions and affected.ranges fields
   263  // cf.
   264  // - https://ossf.github.io/osv-schema/#affectedversions-field
   265  // - https://ossf.github.io/osv-schema/#affectedranges-field
   266  func parseAffectedVersions(affected Affected) ([]string, []string, error) {
   267  	var patchedVersions, vulnerableVersions []string
   268  	var affectedRanges []VersionRange
   269  	for _, affects := range affected.Ranges {
   270  		if affects.Type == RangeTypeGit {
   271  			continue
   272  		}
   273  
   274  		var index int
   275  		for _, event := range affects.Events {
   276  			switch {
   277  			// Each "introduced" event implies a new version range
   278  			// e.g. {"introduced": "1.2.0"}, {"introduced": "2.2.0"}
   279  			case event.Introduced != "":
   280  				affectedRanges = append(affectedRanges, NewVersionRange(affected.Package.Ecosystem, event.Introduced))
   281  				index = len(affectedRanges) - 1
   282  			// e.g. {"introduced": "1.2.0"}, {"fixed": "1.2.5"}
   283  			case event.Fixed != "":
   284  				affectedRanges[index].SetFixed(event.Fixed)
   285  				patchedVersions = append(patchedVersions, event.Fixed)
   286  			// e.g. {"introduced": "1.2.0"}, {"last_affected": "1.2.5"}
   287  			case event.LastAffected != "":
   288  				affectedRanges[index].SetLastAffected(event.LastAffected)
   289  			}
   290  		}
   291  	}
   292  
   293  	for _, r := range affectedRanges {
   294  		vulnerableVersions = append(vulnerableVersions, r.String())
   295  	}
   296  
   297  	for _, v := range affected.Versions {
   298  		// We don't need to add the versions that are already included in the ranges
   299  		ok, err := versionContains(affectedRanges, v)
   300  		if err != nil {
   301  			log.Logger.Errorw("Version comparison error",
   302  				zap.String("ecosystem", string(affected.Package.Ecosystem)),
   303  				zap.String("package", affected.Package.Name),
   304  				zap.Error(err),
   305  			)
   306  		}
   307  		if !ok {
   308  			vulnerableVersions = append(vulnerableVersions, fmt.Sprintf("=%s", v))
   309  		}
   310  	}
   311  
   312  	return vulnerableVersions, patchedVersions, nil
   313  }
   314  
   315  // parseSeverity parses the severity field and returns CVSSv3 vector and score
   316  // cf.
   317  // - https://ossf.github.io/osv-schema/#severity-field
   318  // - https://ossf.github.io/osv-schema/#affectedseverity-field
   319  func parseSeverity(severities []Severity) (string, float64, error) {
   320  	for _, s := range severities {
   321  		if s.Type == "CVSS_V3" && s.Score != "" {
   322  			// CVSS vectors possibly have `/` suffix
   323  			// e.g. https://github.com/github/advisory-database/blob/2d3bc73d2117893b217233aeb95b9236c7b93761/advisories/github-reviewed/2019/05/GHSA-j59f-6m4q-62h6/GHSA-j59f-6m4q-62h6.json#L14
   324  			// Trim the suffix to avoid errors
   325  			cvssVectorV3 := strings.TrimSuffix(s.Score, "/")
   326  			metrics, err := metric.NewTemporal().Decode(cvssVectorV3)
   327  			if err != nil {
   328  				return "", 0, xerrors.Errorf("failed to decode CVSSv3 vector: %w", err)
   329  			}
   330  			cvssScoreV3 := metrics.Score()
   331  			return cvssVectorV3, cvssScoreV3, nil
   332  		}
   333  	}
   334  	return "", 0, nil
   335  }
   336  
   337  func convertEcosystem(eco Ecosystem) types.Ecosystem {
   338  	// cf. https://ossf.github.io/osv-schema/#affectedpackage-field
   339  	switch strings.ToLower(string(eco)) {
   340  	case "go":
   341  		return vulnerability.Go
   342  	case "npm":
   343  		return vulnerability.Npm
   344  	case "pypi":
   345  		return vulnerability.Pip
   346  	case "rubygems":
   347  		return vulnerability.RubyGems
   348  	case "crates.io":
   349  		return vulnerability.Cargo
   350  	case "packagist":
   351  		return vulnerability.Composer
   352  	case "maven":
   353  		return vulnerability.Maven
   354  	case "nuget":
   355  		return vulnerability.NuGet
   356  	case "hex":
   357  		return vulnerability.Erlang
   358  	case "pub":
   359  		return vulnerability.Pub
   360  	case "swifturl", "purl-type:swift":
   361  		// GHSA still uses "purl-type:swift" for Swift advisories.
   362  		// cf. https://github.com/github/advisory-database/blob/db1cdfb553e48f18aa27d7e929d200563451391a/advisories/github-reviewed/2023/07/GHSA-jq43-q8mx-r7mq/GHSA-jq43-q8mx-r7mq.json#L20
   363  		return vulnerability.Swift
   364  	case "bitnami":
   365  		return vulnerability.Bitnami
   366  	case "kubernetes":
   367  		return vulnerability.Kubernetes
   368  	default:
   369  		return vulnerability.Unknown
   370  	}
   371  }
   372  
   373  func versionContains(ranges []VersionRange, version string) (bool, error) {
   374  	for _, r := range ranges {
   375  		if ok, err := r.Contains(version); err != nil {
   376  			return false, err
   377  		} else if ok {
   378  			return true, nil
   379  		}
   380  	}
   381  	return false, nil
   382  }