github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/haskell/stacklock/stacklock.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package stacklock extracts stack.yaml.lock files from haskell projects.
    16  package stacklock
    17  
    18  import (
    19  	"bufio"
    20  	"context"
    21  	"fmt"
    22  	"path/filepath"
    23  	"regexp"
    24  	"strings"
    25  
    26  	"github.com/google/osv-scalibr/extractor"
    27  	"github.com/google/osv-scalibr/extractor/filesystem"
    28  	"github.com/google/osv-scalibr/extractor/filesystem/internal/units"
    29  	"github.com/google/osv-scalibr/inventory"
    30  	"github.com/google/osv-scalibr/plugin"
    31  	"github.com/google/osv-scalibr/purl"
    32  	"github.com/google/osv-scalibr/stats"
    33  )
    34  
    35  const (
    36  	// Name is the unique name of this extractor.
    37  	Name = "haskell/stacklock"
    38  
    39  	// defaultMaxFileSizeBytes is the maximum file size an extractor will unmarshal.
    40  	// If Extract gets a bigger file, it will return an error.
    41  	defaultMaxFileSizeBytes = 30 * units.MiB
    42  )
    43  
    44  // Config is the configuration for the Extractor.
    45  type Config struct {
    46  	// Stats is a stats collector for reporting metrics.
    47  	Stats stats.Collector
    48  	// MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If
    49  	// `FileRequired` gets a bigger file, it will return false,
    50  	MaxFileSizeBytes int64
    51  }
    52  
    53  // DefaultConfig returns the default configuration for the extractor.
    54  func DefaultConfig() Config {
    55  	return Config{
    56  		MaxFileSizeBytes: defaultMaxFileSizeBytes,
    57  		Stats:            nil,
    58  	}
    59  }
    60  
    61  // Extractor extracts stacklock package info from stack.yaml.lock files.
    62  type Extractor struct {
    63  	stats            stats.Collector
    64  	maxFileSizeBytes int64
    65  }
    66  
    67  // New returns a haskell stacklock extractor.
    68  func New(cfg Config) *Extractor {
    69  	return &Extractor{
    70  		stats:            cfg.Stats,
    71  		maxFileSizeBytes: cfg.MaxFileSizeBytes,
    72  	}
    73  }
    74  
    75  // NewDefault returns an extractor with the default config settings.
    76  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
    77  
    78  // Config returns the configuration of the extractor.
    79  func (e Extractor) Config() Config {
    80  	return Config{
    81  		Stats:            e.stats,
    82  		MaxFileSizeBytes: e.maxFileSizeBytes,
    83  	}
    84  }
    85  
    86  // Name of the extractor
    87  func (e Extractor) Name() string { return Name }
    88  
    89  // Version of the extractor
    90  func (e Extractor) Version() int { return 0 }
    91  
    92  // Requirements of the extractor.
    93  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
    94  
    95  // FileRequired return true if the specified file matched the stack.yaml.lock file pattern.
    96  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
    97  	path := api.Path()
    98  
    99  	if filepath.Base(path) != "stack.yaml.lock" {
   100  		return false
   101  	}
   102  
   103  	fileinfo, err := api.Stat()
   104  	if err != nil {
   105  		return false
   106  	}
   107  	if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes {
   108  		e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded)
   109  		return false
   110  	}
   111  
   112  	e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK)
   113  	return true
   114  }
   115  
   116  func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) {
   117  	if e.stats == nil {
   118  		return
   119  	}
   120  	e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
   121  		Path:          path,
   122  		Result:        result,
   123  		FileSizeBytes: fileSizeBytes,
   124  	})
   125  }
   126  
   127  // Extract extracts packages from the stack.yaml.lock file.
   128  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   129  	pkgs, err := e.extractFromInput(ctx, input)
   130  
   131  	if e.stats != nil {
   132  		var fileSizeBytes int64
   133  		if input.Info != nil {
   134  			fileSizeBytes = input.Info.Size()
   135  		}
   136  		e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
   137  			Path:          input.Path,
   138  			Result:        filesystem.ExtractorErrorToFileExtractedResult(err),
   139  			FileSizeBytes: fileSizeBytes,
   140  		})
   141  	}
   142  	return inventory.Inventory{Packages: pkgs}, err
   143  }
   144  
   145  var packageVersionRe = regexp.MustCompile(`hackage:\s*([a-zA-Z0-9\-]+)-([0-9.]+)@`)
   146  
   147  func (e Extractor) extractFromInput(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Package, error) {
   148  	s := bufio.NewScanner(input.Reader)
   149  	packages := []*extractor.Package{}
   150  
   151  	for s.Scan() {
   152  		// Return if canceled or exceeding deadline.
   153  		if err := ctx.Err(); err != nil {
   154  			return packages, fmt.Errorf("%s halted due to context error: %w", e.Name(), err)
   155  		}
   156  
   157  		line := strings.TrimSpace(s.Text())
   158  
   159  		if line == "" {
   160  			continue
   161  		}
   162  
   163  		matches := packageVersionRe.FindStringSubmatch(line)
   164  		if len(matches) == 3 {
   165  			pkgName := matches[1]
   166  			pkgVersion := matches[2]
   167  
   168  			p := &extractor.Package{
   169  				Name:      pkgName,
   170  				Version:   pkgVersion,
   171  				PURLType:  purl.TypeHaskell,
   172  				Locations: []string{input.Path},
   173  			}
   174  
   175  			packages = append(packages, p)
   176  		}
   177  
   178  		if s.Err() != nil {
   179  			return packages, fmt.Errorf("error while scanning cabal.project.freeze file: %w", s.Err())
   180  		}
   181  	}
   182  
   183  	return packages, nil
   184  }