github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/ruby/gemfilelock/gemfilelock.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package gemfilelock extracts Gemfile.lock files.
    16  package gemfilelock
    17  
    18  import (
    19  	"bufio"
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"path/filepath"
    24  	"regexp"
    25  	"slices"
    26  	"strings"
    27  
    28  	"github.com/google/osv-scalibr/extractor"
    29  	"github.com/google/osv-scalibr/extractor/filesystem"
    30  	"github.com/google/osv-scalibr/inventory"
    31  	"github.com/google/osv-scalibr/log"
    32  	"github.com/google/osv-scalibr/plugin"
    33  	"github.com/google/osv-scalibr/purl"
    34  )
    35  
    36  const (
    37  	// Name is the unique name of this extractor.
    38  	Name = "ruby/gemfilelock"
    39  )
    40  
    41  var (
    42  	// Gemfile.lock spec lines follow the format "name (version)"
    43  	nameVersionRegexp = regexp.MustCompile(`^(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
    44  	indentRegexp      = regexp.MustCompile(`^( +)`)
    45  )
    46  
    47  // Extractor extracts package info from Gemfile.lock files.
    48  type Extractor struct{}
    49  
    50  // New returns a new instance of the extractor.
    51  func New() filesystem.Extractor { return &Extractor{} }
    52  
    53  // Name of the extractor.
    54  func (e Extractor) Name() string { return Name }
    55  
    56  // Version of the extractor.
    57  func (e Extractor) Version() int { return 0 }
    58  
    59  // Requirements of the extractor.
    60  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
    61  
    62  // FileRequired return true if the specified file is a Gemfile.lock file.
    63  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
    64  	return slices.Contains([]string{"Gemfile.lock", "gems.locked"}, filepath.Base(api.Path()))
    65  }
    66  
    67  type gemlockSection struct {
    68  	name     string
    69  	revision string
    70  	specs    []string
    71  }
    72  
    73  func parseLockfileSections(input *filesystem.ScanInput) ([]*gemlockSection, error) {
    74  	sections := []*gemlockSection{}
    75  	var currentSection *gemlockSection
    76  	scanner := bufio.NewScanner(input.Reader)
    77  	for scanner.Scan() {
    78  		if err := scanner.Err(); err != nil {
    79  			return nil, fmt.Errorf("error while scanning: %w", err)
    80  		}
    81  		line := scanner.Text()
    82  		if len(line) == 0 {
    83  			// Skip empty lines.
    84  			continue
    85  		}
    86  		m := indentRegexp.FindStringSubmatch(line)
    87  		if m == nil { // No spaces at the start, this is a new section.
    88  			if currentSection != nil {
    89  				sections = append(sections, currentSection)
    90  			}
    91  			currentSection = &gemlockSection{name: line}
    92  		} else if len(m[0]) == 4 {
    93  			// Indented with 4 spaces: This line contains a top-level spec for the current section.
    94  			if currentSection == nil {
    95  				return nil, errors.New("invalid lockfile: specs entry before a section declaration")
    96  			}
    97  			currentSection.specs = append(currentSection.specs, strings.TrimPrefix(line, "    "))
    98  		} else if strings.HasPrefix(line, "  revision: ") {
    99  			// The commit for the given section. Always stored at an indentation level of 2.
   100  			if currentSection == nil {
   101  				return nil, errors.New("invalid lockfile: revision entry before a section declaration")
   102  			}
   103  			currentSection.revision = strings.TrimPrefix(line, "  revision: ")
   104  		}
   105  		// We don't store info about any other entries at the moment.
   106  	}
   107  	// Append the trailing section too.
   108  	if currentSection != nil {
   109  		sections = append(sections, currentSection)
   110  	}
   111  	return sections, nil
   112  }
   113  
   114  // Extract extracts packages from the Gemfile.lock file.
   115  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   116  	sections, err := parseLockfileSections(input)
   117  	if err != nil {
   118  		return inventory.Inventory{}, fmt.Errorf("error parsing: %w", err)
   119  	}
   120  
   121  	pkgs := []*extractor.Package{}
   122  	for _, section := range sections {
   123  		if !slices.Contains([]string{"GIT", "GEM", "PATH", "PLUGIN SOURCE"}, section.name) {
   124  			// Not a source section.
   125  			continue
   126  		}
   127  		for _, s := range section.specs {
   128  			m := nameVersionRegexp.FindStringSubmatch(s)
   129  			if len(m) < 3 || m[1] == "" || m[2] == "" {
   130  				log.Errorf("Invalid spec line: %s", s)
   131  				continue
   132  			}
   133  			name, version := m[1], m[2]
   134  			p := &extractor.Package{
   135  				Name:      name,
   136  				Version:   version,
   137  				PURLType:  purl.TypeGem,
   138  				Locations: []string{input.Path},
   139  			}
   140  			if section.revision != "" {
   141  				p.SourceCode = &extractor.SourceCodeIdentifier{
   142  					Commit: section.revision,
   143  				}
   144  			}
   145  			pkgs = append(pkgs, p)
   146  		}
   147  	}
   148  	return inventory.Inventory{Packages: pkgs}, nil
   149  }
   150  
   151  var _ filesystem.Extractor = Extractor{}