github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/fanal/analyzer/licensing/license.go (about)

     1  package licensing
     2  
     3  import (
     4  	"context"
     5  	"io"
     6  	"math"
     7  	"os"
     8  	"path/filepath"
     9  	"strings"
    10  
    11  	"golang.org/x/exp/slices"
    12  	"golang.org/x/xerrors"
    13  
    14  	dio "github.com/aquasecurity/go-dep-parser/pkg/io"
    15  	"github.com/devseccon/trivy/pkg/fanal/analyzer"
    16  	"github.com/devseccon/trivy/pkg/fanal/log"
    17  	"github.com/devseccon/trivy/pkg/fanal/types"
    18  	"github.com/devseccon/trivy/pkg/licensing"
    19  )
    20  
    21  const version = 1
    22  
    23  var (
    24  	skipDirs = []string{
    25  		"node_modules/",  // node scan will pick these up
    26  		"usr/share/doc/", // dpkg will pick these up
    27  
    28  		// Some heuristic exclusion
    29  		"usr/lib",
    30  		"usr/local/include",
    31  		"usr/include",
    32  		"usr/lib/python",
    33  		"usr/local/go",
    34  		"opt/yarn",
    35  		"usr/lib/gems",
    36  		"usr/src/wordpress",
    37  	}
    38  
    39  	acceptedExtensions = []string{
    40  		".asp", ".aspx", ".bas", ".bat", ".b", ".c", ".cue", ".cgi", ".cs", ".css", ".fish", ".html", ".h", ".ini",
    41  		".java", ".js", ".jsx", ".markdown", ".md", ".py", ".php", ".pl", ".r", ".rb", ".sh", ".sql", ".ts",
    42  		".tsx", ".txt", ".vue", ".zsh",
    43  	}
    44  
    45  	acceptedFileNames = []string{
    46  		"license", "licence", "copyright",
    47  	}
    48  )
    49  
    50  func init() {
    51  	analyzer.RegisterAnalyzer(&licenseFileAnalyzer{})
    52  }
    53  
    54  // licenseFileAnalyzer is an analyzer for file headers and license files
    55  type licenseFileAnalyzer struct {
    56  	classifierConfidenceLevel float64
    57  }
    58  
    59  func (a licenseFileAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) {
    60  	log.Logger.Debugf("License scanning: %s", input.FilePath)
    61  
    62  	// need files to be text based, readable files
    63  	readable, err := isHumanReadable(input.Content, input.Info.Size())
    64  	if err != nil || !readable {
    65  		return nil, nil
    66  	}
    67  	lf, err := licensing.Classify(input.FilePath, input.Content, a.classifierConfidenceLevel)
    68  	if err != nil {
    69  		return nil, xerrors.Errorf("license classification error: %w", err)
    70  	} else if len(lf.Findings) == 0 {
    71  		return nil, nil
    72  	}
    73  
    74  	return &analyzer.AnalysisResult{
    75  		Licenses: []types.LicenseFile{*lf},
    76  	}, nil
    77  }
    78  
    79  func (a *licenseFileAnalyzer) Init(opt analyzer.AnalyzerOptions) error {
    80  	a.classifierConfidenceLevel = opt.LicenseScannerOption.ClassifierConfidenceLevel
    81  	return nil
    82  }
    83  
    84  func (a licenseFileAnalyzer) Required(filePath string, _ os.FileInfo) bool {
    85  	for _, skipDir := range skipDirs {
    86  		if strings.Contains(filePath, skipDir) {
    87  			return false
    88  		}
    89  	}
    90  	ext := strings.ToLower(filepath.Ext(filePath))
    91  	if slices.Contains(acceptedExtensions, ext) {
    92  		return true
    93  	}
    94  
    95  	baseName := strings.ToLower(filepath.Base(filePath))
    96  	return slices.Contains(acceptedFileNames, baseName)
    97  }
    98  
    99  func isHumanReadable(content dio.ReadSeekerAt, fileSize int64) (bool, error) {
   100  	headSize := int(math.Min(float64(fileSize), 300))
   101  	head := make([]byte, headSize)
   102  	if _, err := content.Read(head); err != nil {
   103  		return false, err
   104  	}
   105  	if _, err := content.Seek(0, io.SeekStart); err != nil {
   106  		return false, err
   107  	}
   108  
   109  	// cf. https://github.com/file/file/blob/f2a6e7cb7db9b5fd86100403df6b2f830c7f22ba/src/encoding.c#L151-L228
   110  	for _, b := range head {
   111  		if b < 7 || b == 11 || (13 < b && b < 27) || (27 < b && b < 0x20) || b == 0x7f {
   112  			return false, nil
   113  		}
   114  	}
   115  
   116  	return true, nil
   117  }
   118  
   119  func (a licenseFileAnalyzer) Type() analyzer.Type {
   120  	return analyzer.TypeLicenseFile
   121  }
   122  
   123  func (a licenseFileAnalyzer) Version() int {
   124  	return version
   125  }