github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/ruby/gemspec/gemspec.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package gemspec extracts *.gemspec files.
    16  package gemspec
    17  
    18  import (
    19  	"bufio"
    20  	"context"
    21  	"fmt"
    22  	"io"
    23  	"io/fs"
    24  	"path/filepath"
    25  	"regexp"
    26  
    27  	"github.com/google/osv-scalibr/extractor"
    28  	"github.com/google/osv-scalibr/extractor/filesystem"
    29  	"github.com/google/osv-scalibr/inventory"
    30  	"github.com/google/osv-scalibr/log"
    31  	"github.com/google/osv-scalibr/plugin"
    32  	"github.com/google/osv-scalibr/purl"
    33  	"github.com/google/osv-scalibr/stats"
    34  )
    35  
    36  const (
    37  	// Name is the unique name of this extractor.
    38  	Name = "ruby/gemspec"
    39  )
    40  
    41  // Regex expressions used for extracting gemspec package name and version.
    42  var (
    43  	reSpec            = regexp.MustCompile(`^Gem::Specification\.new`)
    44  	reName            = regexp.MustCompile(`\s*\w+\.name\s*=\s*["']([^"']+)["']`)
    45  	reVerLiteral      = regexp.MustCompile(`\s*\w+\.version\s*=\s*["']([^"']+)["']`)
    46  	reVerConst        = regexp.MustCompile(`\s*\w+\.version\s*=\s*([A-Za-z0-9_:]+)`)
    47  	reRequireRel      = regexp.MustCompile(`^\s*require_relative\s+["']([^"']+)["']`)
    48  	reRequireLiteral  = regexp.MustCompile(`^\s*require\s+["']([^"']+)["']`)
    49  	reConstAssignment = regexp.MustCompile(`\b([A-Z][A-Za-z0-9_]*)\s*=\s*(?:'([^']+)'|"([^"]+)")(?:\s*\.freeze)?`)
    50  )
    51  
    52  // Config is the configuration for the Extractor.
    53  type Config struct {
    54  	// Stats is a stats collector for reporting metrics.
    55  	Stats stats.Collector
    56  	// MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If
    57  	// `FileRequired` gets a bigger file, it will return false,
    58  	MaxFileSizeBytes int64
    59  }
    60  
    61  // DefaultConfig returns the default configuration for the extractor.
    62  func DefaultConfig() Config {
    63  	return Config{
    64  		Stats:            nil,
    65  		MaxFileSizeBytes: 0,
    66  	}
    67  }
    68  
    69  // Extractor extracts RubyGem package info from *.gemspec files.
    70  type Extractor struct {
    71  	stats            stats.Collector
    72  	maxFileSizeBytes int64
    73  }
    74  
    75  // New returns a Ruby gemspec extractor.
    76  //
    77  // For most use cases, initialize with:
    78  // ```
    79  // e := New(DefaultConfig())
    80  // ```
    81  func New(cfg Config) *Extractor {
    82  	return &Extractor{
    83  		stats:            cfg.Stats,
    84  		maxFileSizeBytes: cfg.MaxFileSizeBytes,
    85  	}
    86  }
    87  
    88  // NewDefault returns an extractor with the default config settings.
    89  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
    90  
    91  // Name of the extractor
    92  func (e Extractor) Name() string { return Name }
    93  
    94  // Version of the extractor
    95  func (e Extractor) Version() int { return 0 }
    96  
    97  // Requirements of the extractor.
    98  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
    99  
   100  // FileRequired return true if the specified file matched the .gemspec file
   101  // pattern.
   102  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
   103  	path := api.Path()
   104  	if filepath.Ext(path) != ".gemspec" {
   105  		return false
   106  	}
   107  
   108  	fileinfo, err := api.Stat()
   109  	if err != nil {
   110  		return false
   111  	}
   112  	if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes {
   113  		e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded)
   114  		return false
   115  	}
   116  
   117  	e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK)
   118  	return true
   119  }
   120  
   121  func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) {
   122  	if e.stats == nil {
   123  		return
   124  	}
   125  	e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
   126  		Path:          path,
   127  		Result:        result,
   128  		FileSizeBytes: fileSizeBytes,
   129  	})
   130  }
   131  
   132  // Extract extracts packages from the .gemspec file.
   133  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   134  	p, err := extract(input.Path, input.FS, input.Reader)
   135  	e.reportFileExtracted(input.Path, input.Info, filesystem.ExtractorErrorToFileExtractedResult(err))
   136  	if err != nil {
   137  		return inventory.Inventory{}, fmt.Errorf("gemspec.parse: %w", err)
   138  	}
   139  	if p == nil {
   140  		return inventory.Inventory{}, nil
   141  	}
   142  
   143  	p.Locations = []string{input.Path}
   144  	return inventory.Inventory{Packages: []*extractor.Package{p}}, nil
   145  }
   146  
   147  func (e Extractor) reportFileExtracted(path string, fileinfo fs.FileInfo, result stats.FileExtractedResult) {
   148  	if e.stats == nil {
   149  		return
   150  	}
   151  	var fileSizeBytes int64
   152  	if fileinfo != nil {
   153  		fileSizeBytes = fileinfo.Size()
   154  	}
   155  	e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
   156  		Path:          path,
   157  		Result:        result,
   158  		FileSizeBytes: fileSizeBytes,
   159  	})
   160  }
   161  
   162  // extract searches for the required name and version lines in the gemspec
   163  // file using regex. It handles version strings defined either inline or via a
   164  // constant populated through require_relative.
   165  // Based on: https://guides.rubygems.org/specification-reference/
   166  func extract(path string, fsys fs.FS, r io.Reader) (*extractor.Package, error) {
   167  	buf := bufio.NewScanner(r)
   168  	gemName, gemVer := "", ""
   169  	foundStart := false
   170  	var (
   171  		requirePaths    []string
   172  		versionConst    string
   173  		inlineConstants = make(map[string]string)
   174  	)
   175  	reqAccum := &requireAccumulator{}
   176  
   177  	for buf.Scan() {
   178  		line := buf.Text()
   179  
   180  		requirePaths = appendUnique(requirePaths, reqAccum.Add(line)...)
   181  
   182  		if matches := reConstAssignment.FindStringSubmatch(line); len(matches) > 1 {
   183  			if val := constantValueFromMatch(matches); val != "" {
   184  				inlineConstants[matches[1]] = val
   185  			}
   186  		}
   187  
   188  		if !foundStart {
   189  			start := reSpec.FindString(line)
   190  			if start != "" {
   191  				foundStart = true
   192  			}
   193  			continue
   194  		}
   195  		if gemName != "" && gemVer != "" {
   196  			break
   197  		}
   198  		if gemName == "" {
   199  			nameArr := reName.FindStringSubmatch(line)
   200  			if len(nameArr) > 1 {
   201  				gemName = nameArr[1]
   202  				continue
   203  			}
   204  		}
   205  		if gemVer == "" {
   206  			if verArr := reVerLiteral.FindStringSubmatch(line); len(verArr) > 1 {
   207  				gemVer = verArr[1]
   208  				continue
   209  			}
   210  			if versionConst == "" {
   211  				if constMatch := reVerConst.FindStringSubmatch(line); len(constMatch) > 1 {
   212  					versionConst = constMatch[1]
   213  				}
   214  			}
   215  		}
   216  	}
   217  
   218  	if err := buf.Err(); err != nil {
   219  		log.Warnf("error scanning gemspec file %s: %v", path, err)
   220  	}
   221  	requirePaths = appendUnique(requirePaths, reqAccum.Flush()...)
   222  
   223  	// This was likely a marshalled gemspec. Not a readable text file.
   224  	if !foundStart {
   225  		log.Warnf("error scanning gemspec (%s) could not find start of spec definition", path)
   226  		return nil, nil
   227  	}
   228  
   229  	if gemVer == "" && versionConst != "" {
   230  		if constName, ok := versionConstantName(versionConst); ok {
   231  			if v, ok := inlineConstants[constName]; ok {
   232  				gemVer = v
   233  			} else if resolved, err := resolveVersionFromRequires(fsys, path, requirePaths, constName); err == nil {
   234  				gemVer = resolved
   235  			} else {
   236  				log.Debugf("unable to resolve version constant %q in gemspec %s: %v", versionConst, path, err)
   237  			}
   238  		}
   239  	}
   240  
   241  	if gemName == "" || gemVer == "" {
   242  		return nil, fmt.Errorf("failed to parse gemspec name (%v) and version (%v)", gemName, gemVer)
   243  	}
   244  
   245  	return &extractor.Package{
   246  		Name:     gemName,
   247  		Version:  gemVer,
   248  		PURLType: purl.TypeGem,
   249  	}, nil
   250  }