github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/fanal/analyzer/licensing/license.go (about) 1 package licensing 2 3 import ( 4 "context" 5 "io" 6 "math" 7 "os" 8 "path/filepath" 9 "strings" 10 11 "golang.org/x/exp/slices" 12 "golang.org/x/xerrors" 13 14 dio "github.com/aquasecurity/go-dep-parser/pkg/io" 15 "github.com/devseccon/trivy/pkg/fanal/analyzer" 16 "github.com/devseccon/trivy/pkg/fanal/log" 17 "github.com/devseccon/trivy/pkg/fanal/types" 18 "github.com/devseccon/trivy/pkg/licensing" 19 ) 20 21 const version = 1 22 23 var ( 24 skipDirs = []string{ 25 "node_modules/", // node scan will pick these up 26 "usr/share/doc/", // dpkg will pick these up 27 28 // Some heuristic exclusion 29 "usr/lib", 30 "usr/local/include", 31 "usr/include", 32 "usr/lib/python", 33 "usr/local/go", 34 "opt/yarn", 35 "usr/lib/gems", 36 "usr/src/wordpress", 37 } 38 39 acceptedExtensions = []string{ 40 ".asp", ".aspx", ".bas", ".bat", ".b", ".c", ".cue", ".cgi", ".cs", ".css", ".fish", ".html", ".h", ".ini", 41 ".java", ".js", ".jsx", ".markdown", ".md", ".py", ".php", ".pl", ".r", ".rb", ".sh", ".sql", ".ts", 42 ".tsx", ".txt", ".vue", ".zsh", 43 } 44 45 acceptedFileNames = []string{ 46 "license", "licence", "copyright", 47 } 48 ) 49 50 func init() { 51 analyzer.RegisterAnalyzer(&licenseFileAnalyzer{}) 52 } 53 54 // licenseFileAnalyzer is an analyzer for file headers and license files 55 type licenseFileAnalyzer struct { 56 classifierConfidenceLevel float64 57 } 58 59 func (a licenseFileAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) { 60 log.Logger.Debugf("License scanning: %s", input.FilePath) 61 62 // need files to be text based, readable files 63 readable, err := isHumanReadable(input.Content, input.Info.Size()) 64 if err != nil || !readable { 65 return nil, nil 66 } 67 lf, err := licensing.Classify(input.FilePath, input.Content, a.classifierConfidenceLevel) 68 if err != nil { 69 return nil, xerrors.Errorf("license classification error: %w", err) 70 } else if len(lf.Findings) == 0 { 71 return nil, nil 72 } 73 74 return &analyzer.AnalysisResult{ 75 Licenses: []types.LicenseFile{*lf}, 76 }, nil 77 } 78 79 func (a *licenseFileAnalyzer) Init(opt analyzer.AnalyzerOptions) error { 80 a.classifierConfidenceLevel = opt.LicenseScannerOption.ClassifierConfidenceLevel 81 return nil 82 } 83 84 func (a licenseFileAnalyzer) Required(filePath string, _ os.FileInfo) bool { 85 for _, skipDir := range skipDirs { 86 if strings.Contains(filePath, skipDir) { 87 return false 88 } 89 } 90 ext := strings.ToLower(filepath.Ext(filePath)) 91 if slices.Contains(acceptedExtensions, ext) { 92 return true 93 } 94 95 baseName := strings.ToLower(filepath.Base(filePath)) 96 return slices.Contains(acceptedFileNames, baseName) 97 } 98 99 func isHumanReadable(content dio.ReadSeekerAt, fileSize int64) (bool, error) { 100 headSize := int(math.Min(float64(fileSize), 300)) 101 head := make([]byte, headSize) 102 if _, err := content.Read(head); err != nil { 103 return false, err 104 } 105 if _, err := content.Seek(0, io.SeekStart); err != nil { 106 return false, err 107 } 108 109 // cf. https://github.com/file/file/blob/f2a6e7cb7db9b5fd86100403df6b2f830c7f22ba/src/encoding.c#L151-L228 110 for _, b := range head { 111 if b < 7 || b == 11 || (13 < b && b < 27) || (27 < b && b < 0x20) || b == 0x7f { 112 return false, nil 113 } 114 } 115 116 return true, nil 117 } 118 119 func (a licenseFileAnalyzer) Type() analyzer.Type { 120 return analyzer.TypeLicenseFile 121 } 122 123 func (a licenseFileAnalyzer) Version() int { 124 return version 125 }