github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/licensing/classifier.go (about)

     1  package licensing
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"sort"
     7  	"sync"
     8  
     9  	classifier "github.com/google/licenseclassifier/v2"
    10  	"github.com/google/licenseclassifier/v2/assets"
    11  	"golang.org/x/xerrors"
    12  
    13  	"github.com/devseccon/trivy/pkg/fanal/types"
    14  	"github.com/devseccon/trivy/pkg/log"
    15  )
    16  
    17  var (
    18  	cf             *classifier.Classifier
    19  	classifierOnce sync.Once
    20  	m              sync.Mutex
    21  )
    22  
    23  func initGoogleClassifier() error {
    24  	// Initialize the default classifier once.
    25  	// This loading is expensive and should be called only when the license classification is needed.
    26  	var err error
    27  	classifierOnce.Do(func() {
    28  		log.Logger.Debug("Loading the default license classifier...")
    29  		cf, err = assets.DefaultClassifier()
    30  	})
    31  	return err
    32  }
    33  
    34  // Classify detects and classifies the license found in a file
    35  func Classify(filePath string, r io.Reader, confidenceLevel float64) (*types.LicenseFile, error) {
    36  	content, err := io.ReadAll(r)
    37  	if err != nil {
    38  		return nil, xerrors.Errorf("unable to read a license file %q: %w", filePath, err)
    39  	}
    40  	if err = initGoogleClassifier(); err != nil {
    41  		return nil, err
    42  	}
    43  
    44  	var findings types.LicenseFindings
    45  	var matchType types.LicenseType
    46  	seen := make(map[string]struct{})
    47  
    48  	// cf.Match is not thread safe
    49  	m.Lock()
    50  
    51  	// Use 'github.com/google/licenseclassifier' to find licenses
    52  	result := cf.Match(cf.Normalize(content))
    53  
    54  	m.Unlock()
    55  
    56  	for _, match := range result.Matches {
    57  		if match.Confidence <= confidenceLevel {
    58  			continue
    59  		}
    60  		if _, ok := seen[match.Name]; ok {
    61  			continue
    62  		}
    63  
    64  		seen[match.Name] = struct{}{}
    65  
    66  		switch match.MatchType {
    67  		case "Header":
    68  			matchType = types.LicenseTypeHeader
    69  		case "License":
    70  			matchType = types.LicenseTypeFile
    71  		}
    72  		licenseLink := fmt.Sprintf("https://spdx.org/licenses/%s.html", match.Name)
    73  
    74  		findings = append(findings, types.LicenseFinding{
    75  			Name:       match.Name,
    76  			Confidence: match.Confidence,
    77  			Link:       licenseLink,
    78  		})
    79  	}
    80  	sort.Sort(findings)
    81  	return &types.LicenseFile{
    82  		Type:     matchType,
    83  		FilePath: filePath,
    84  		Findings: findings,
    85  	}, nil
    86  }