github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/ruby/gemspec/gemspec.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package gemspec extracts *.gemspec files. 16 package gemspec 17 18 import ( 19 "bufio" 20 "context" 21 "fmt" 22 "io" 23 "io/fs" 24 "path/filepath" 25 "regexp" 26 27 "github.com/google/osv-scalibr/extractor" 28 "github.com/google/osv-scalibr/extractor/filesystem" 29 "github.com/google/osv-scalibr/inventory" 30 "github.com/google/osv-scalibr/log" 31 "github.com/google/osv-scalibr/plugin" 32 "github.com/google/osv-scalibr/purl" 33 "github.com/google/osv-scalibr/stats" 34 ) 35 36 const ( 37 // Name is the unique name of this extractor. 38 Name = "ruby/gemspec" 39 ) 40 41 // Regex expressions used for extracting gemspec package name and version. 42 var ( 43 reSpec = regexp.MustCompile(`^Gem::Specification\.new`) 44 reName = regexp.MustCompile(`\s*\w+\.name\s*=\s*["']([^"']+)["']`) 45 reVerLiteral = regexp.MustCompile(`\s*\w+\.version\s*=\s*["']([^"']+)["']`) 46 reVerConst = regexp.MustCompile(`\s*\w+\.version\s*=\s*([A-Za-z0-9_:]+)`) 47 reRequireRel = regexp.MustCompile(`^\s*require_relative\s+["']([^"']+)["']`) 48 reRequireLiteral = regexp.MustCompile(`^\s*require\s+["']([^"']+)["']`) 49 reConstAssignment = regexp.MustCompile(`\b([A-Z][A-Za-z0-9_]*)\s*=\s*(?:'([^']+)'|"([^"]+)")(?:\s*\.freeze)?`) 50 ) 51 52 // Config is the configuration for the Extractor. 53 type Config struct { 54 // Stats is a stats collector for reporting metrics. 55 Stats stats.Collector 56 // MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If 57 // `FileRequired` gets a bigger file, it will return false, 58 MaxFileSizeBytes int64 59 } 60 61 // DefaultConfig returns the default configuration for the extractor. 62 func DefaultConfig() Config { 63 return Config{ 64 Stats: nil, 65 MaxFileSizeBytes: 0, 66 } 67 } 68 69 // Extractor extracts RubyGem package info from *.gemspec files. 70 type Extractor struct { 71 stats stats.Collector 72 maxFileSizeBytes int64 73 } 74 75 // New returns a Ruby gemspec extractor. 76 // 77 // For most use cases, initialize with: 78 // ``` 79 // e := New(DefaultConfig()) 80 // ``` 81 func New(cfg Config) *Extractor { 82 return &Extractor{ 83 stats: cfg.Stats, 84 maxFileSizeBytes: cfg.MaxFileSizeBytes, 85 } 86 } 87 88 // NewDefault returns an extractor with the default config settings. 89 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 90 91 // Name of the extractor 92 func (e Extractor) Name() string { return Name } 93 94 // Version of the extractor 95 func (e Extractor) Version() int { return 0 } 96 97 // Requirements of the extractor. 98 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 99 100 // FileRequired return true if the specified file matched the .gemspec file 101 // pattern. 102 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 103 path := api.Path() 104 if filepath.Ext(path) != ".gemspec" { 105 return false 106 } 107 108 fileinfo, err := api.Stat() 109 if err != nil { 110 return false 111 } 112 if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes { 113 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded) 114 return false 115 } 116 117 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK) 118 return true 119 } 120 121 func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) { 122 if e.stats == nil { 123 return 124 } 125 e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{ 126 Path: path, 127 Result: result, 128 FileSizeBytes: fileSizeBytes, 129 }) 130 } 131 132 // Extract extracts packages from the .gemspec file. 133 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 134 p, err := extract(input.Path, input.FS, input.Reader) 135 e.reportFileExtracted(input.Path, input.Info, filesystem.ExtractorErrorToFileExtractedResult(err)) 136 if err != nil { 137 return inventory.Inventory{}, fmt.Errorf("gemspec.parse: %w", err) 138 } 139 if p == nil { 140 return inventory.Inventory{}, nil 141 } 142 143 p.Locations = []string{input.Path} 144 return inventory.Inventory{Packages: []*extractor.Package{p}}, nil 145 } 146 147 func (e Extractor) reportFileExtracted(path string, fileinfo fs.FileInfo, result stats.FileExtractedResult) { 148 if e.stats == nil { 149 return 150 } 151 var fileSizeBytes int64 152 if fileinfo != nil { 153 fileSizeBytes = fileinfo.Size() 154 } 155 e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{ 156 Path: path, 157 Result: result, 158 FileSizeBytes: fileSizeBytes, 159 }) 160 } 161 162 // extract searches for the required name and version lines in the gemspec 163 // file using regex. It handles version strings defined either inline or via a 164 // constant populated through require_relative. 165 // Based on: https://guides.rubygems.org/specification-reference/ 166 func extract(path string, fsys fs.FS, r io.Reader) (*extractor.Package, error) { 167 buf := bufio.NewScanner(r) 168 gemName, gemVer := "", "" 169 foundStart := false 170 var ( 171 requirePaths []string 172 versionConst string 173 inlineConstants = make(map[string]string) 174 ) 175 reqAccum := &requireAccumulator{} 176 177 for buf.Scan() { 178 line := buf.Text() 179 180 requirePaths = appendUnique(requirePaths, reqAccum.Add(line)...) 181 182 if matches := reConstAssignment.FindStringSubmatch(line); len(matches) > 1 { 183 if val := constantValueFromMatch(matches); val != "" { 184 inlineConstants[matches[1]] = val 185 } 186 } 187 188 if !foundStart { 189 start := reSpec.FindString(line) 190 if start != "" { 191 foundStart = true 192 } 193 continue 194 } 195 if gemName != "" && gemVer != "" { 196 break 197 } 198 if gemName == "" { 199 nameArr := reName.FindStringSubmatch(line) 200 if len(nameArr) > 1 { 201 gemName = nameArr[1] 202 continue 203 } 204 } 205 if gemVer == "" { 206 if verArr := reVerLiteral.FindStringSubmatch(line); len(verArr) > 1 { 207 gemVer = verArr[1] 208 continue 209 } 210 if versionConst == "" { 211 if constMatch := reVerConst.FindStringSubmatch(line); len(constMatch) > 1 { 212 versionConst = constMatch[1] 213 } 214 } 215 } 216 } 217 218 if err := buf.Err(); err != nil { 219 log.Warnf("error scanning gemspec file %s: %v", path, err) 220 } 221 requirePaths = appendUnique(requirePaths, reqAccum.Flush()...) 222 223 // This was likely a marshalled gemspec. Not a readable text file. 224 if !foundStart { 225 log.Warnf("error scanning gemspec (%s) could not find start of spec definition", path) 226 return nil, nil 227 } 228 229 if gemVer == "" && versionConst != "" { 230 if constName, ok := versionConstantName(versionConst); ok { 231 if v, ok := inlineConstants[constName]; ok { 232 gemVer = v 233 } else if resolved, err := resolveVersionFromRequires(fsys, path, requirePaths, constName); err == nil { 234 gemVer = resolved 235 } else { 236 log.Debugf("unable to resolve version constant %q in gemspec %s: %v", versionConst, path, err) 237 } 238 } 239 } 240 241 if gemName == "" || gemVer == "" { 242 return nil, fmt.Errorf("failed to parse gemspec name (%v) and version (%v)", gemName, gemVer) 243 } 244 245 return &extractor.Package{ 246 Name: gemName, 247 Version: gemVer, 248 PURLType: purl.TypeGem, 249 }, nil 250 }