github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/haskell/stacklock/stacklock.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package stacklock extracts stack.yaml.lock files from haskell projects. 16 package stacklock 17 18 import ( 19 "bufio" 20 "context" 21 "fmt" 22 "path/filepath" 23 "regexp" 24 "strings" 25 26 "github.com/google/osv-scalibr/extractor" 27 "github.com/google/osv-scalibr/extractor/filesystem" 28 "github.com/google/osv-scalibr/extractor/filesystem/internal/units" 29 "github.com/google/osv-scalibr/inventory" 30 "github.com/google/osv-scalibr/plugin" 31 "github.com/google/osv-scalibr/purl" 32 "github.com/google/osv-scalibr/stats" 33 ) 34 35 const ( 36 // Name is the unique name of this extractor. 37 Name = "haskell/stacklock" 38 39 // defaultMaxFileSizeBytes is the maximum file size an extractor will unmarshal. 40 // If Extract gets a bigger file, it will return an error. 41 defaultMaxFileSizeBytes = 30 * units.MiB 42 ) 43 44 // Config is the configuration for the Extractor. 45 type Config struct { 46 // Stats is a stats collector for reporting metrics. 47 Stats stats.Collector 48 // MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If 49 // `FileRequired` gets a bigger file, it will return false, 50 MaxFileSizeBytes int64 51 } 52 53 // DefaultConfig returns the default configuration for the extractor. 54 func DefaultConfig() Config { 55 return Config{ 56 MaxFileSizeBytes: defaultMaxFileSizeBytes, 57 Stats: nil, 58 } 59 } 60 61 // Extractor extracts stacklock package info from stack.yaml.lock files. 62 type Extractor struct { 63 stats stats.Collector 64 maxFileSizeBytes int64 65 } 66 67 // New returns a haskell stacklock extractor. 68 func New(cfg Config) *Extractor { 69 return &Extractor{ 70 stats: cfg.Stats, 71 maxFileSizeBytes: cfg.MaxFileSizeBytes, 72 } 73 } 74 75 // NewDefault returns an extractor with the default config settings. 76 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 77 78 // Config returns the configuration of the extractor. 79 func (e Extractor) Config() Config { 80 return Config{ 81 Stats: e.stats, 82 MaxFileSizeBytes: e.maxFileSizeBytes, 83 } 84 } 85 86 // Name of the extractor 87 func (e Extractor) Name() string { return Name } 88 89 // Version of the extractor 90 func (e Extractor) Version() int { return 0 } 91 92 // Requirements of the extractor. 93 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 94 95 // FileRequired return true if the specified file matched the stack.yaml.lock file pattern. 96 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 97 path := api.Path() 98 99 if filepath.Base(path) != "stack.yaml.lock" { 100 return false 101 } 102 103 fileinfo, err := api.Stat() 104 if err != nil { 105 return false 106 } 107 if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes { 108 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded) 109 return false 110 } 111 112 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK) 113 return true 114 } 115 116 func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) { 117 if e.stats == nil { 118 return 119 } 120 e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{ 121 Path: path, 122 Result: result, 123 FileSizeBytes: fileSizeBytes, 124 }) 125 } 126 127 // Extract extracts packages from the stack.yaml.lock file. 128 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 129 pkgs, err := e.extractFromInput(ctx, input) 130 131 if e.stats != nil { 132 var fileSizeBytes int64 133 if input.Info != nil { 134 fileSizeBytes = input.Info.Size() 135 } 136 e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{ 137 Path: input.Path, 138 Result: filesystem.ExtractorErrorToFileExtractedResult(err), 139 FileSizeBytes: fileSizeBytes, 140 }) 141 } 142 return inventory.Inventory{Packages: pkgs}, err 143 } 144 145 var packageVersionRe = regexp.MustCompile(`hackage:\s*([a-zA-Z0-9\-]+)-([0-9.]+)@`) 146 147 func (e Extractor) extractFromInput(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Package, error) { 148 s := bufio.NewScanner(input.Reader) 149 packages := []*extractor.Package{} 150 151 for s.Scan() { 152 // Return if canceled or exceeding deadline. 153 if err := ctx.Err(); err != nil { 154 return packages, fmt.Errorf("%s halted due to context error: %w", e.Name(), err) 155 } 156 157 line := strings.TrimSpace(s.Text()) 158 159 if line == "" { 160 continue 161 } 162 163 matches := packageVersionRe.FindStringSubmatch(line) 164 if len(matches) == 3 { 165 pkgName := matches[1] 166 pkgVersion := matches[2] 167 168 p := &extractor.Package{ 169 Name: pkgName, 170 Version: pkgVersion, 171 PURLType: purl.TypeHaskell, 172 Locations: []string{input.Path}, 173 } 174 175 packages = append(packages, p) 176 } 177 178 if s.Err() != nil { 179 return packages, fmt.Errorf("error while scanning cabal.project.freeze file: %w", s.Err()) 180 } 181 } 182 183 return packages, nil 184 }