github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/os/pacman/pacman.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package pacman extracts packages from archlinux desc file.
    16  package pacman
    17  
    18  import (
    19  	"bufio"
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"path/filepath"
    25  	"strings"
    26  
    27  	"github.com/google/osv-scalibr/extractor"
    28  	"github.com/google/osv-scalibr/extractor/filesystem"
    29  	"github.com/google/osv-scalibr/extractor/filesystem/internal/units"
    30  	"github.com/google/osv-scalibr/extractor/filesystem/os/osrelease"
    31  	pacmanmeta "github.com/google/osv-scalibr/extractor/filesystem/os/pacman/metadata"
    32  	"github.com/google/osv-scalibr/inventory"
    33  	"github.com/google/osv-scalibr/log"
    34  	"github.com/google/osv-scalibr/plugin"
    35  	"github.com/google/osv-scalibr/purl"
    36  	"github.com/google/osv-scalibr/stats"
    37  )
    38  
    39  const (
    40  	// Name is the unique name of this extractor.
    41  	Name = "os/pacman"
    42  
    43  	// defaultMaxFileSizeBytes is the maximum file size an extractor will unmarshal.
    44  	// If Extract gets a bigger file, it will return an error.
    45  	defaultMaxFileSizeBytes = 100 * units.MiB
    46  )
    47  
    48  // Config is the configuration for the Extractor.
    49  type Config struct {
    50  	// Stats is a stats collector for reporting metrics.
    51  	Stats stats.Collector
    52  	// MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If
    53  	// `FileRequired` gets a bigger file, it will return false,
    54  	MaxFileSizeBytes int64
    55  }
    56  
    57  // DefaultConfig returns the default configuration for the pacman extractor.
    58  func DefaultConfig() Config {
    59  	return Config{
    60  		Stats:            nil,
    61  		MaxFileSizeBytes: defaultMaxFileSizeBytes,
    62  	}
    63  }
    64  
    65  // Extractor extracts pacman packages from /var/lib/pacman/local/<package>/desc file.
    66  type Extractor struct {
    67  	stats            stats.Collector
    68  	maxFileSizeBytes int64
    69  }
    70  
    71  // New returns a pacman extractor.
    72  func New(cfg Config) *Extractor {
    73  	return &Extractor{
    74  		stats:            cfg.Stats,
    75  		maxFileSizeBytes: cfg.MaxFileSizeBytes,
    76  	}
    77  }
    78  
    79  // NewDefault returns an extractor with the default config settings.
    80  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
    81  
    82  // Config returns the configuration of the extractor.
    83  func (e Extractor) Config() Config {
    84  	return Config{
    85  		Stats:            e.stats,
    86  		MaxFileSizeBytes: e.maxFileSizeBytes,
    87  	}
    88  }
    89  
    90  // Name of the extractor.
    91  func (e Extractor) Name() string { return Name }
    92  
    93  // Version of the extractor.
    94  func (e Extractor) Version() int { return 0 }
    95  
    96  // Requirements of the extractor.
    97  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
    98  
    99  // FileRequired returns true if the specified file matches the "desc" file patterns.
   100  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
   101  	// archPrefix and archSuffix are used to match the right file and location.
   102  	archPrefix := "var/lib/pacman/local/"
   103  	archSuffix := "desc"
   104  	path := api.Path()
   105  
   106  	if !strings.HasPrefix(path, archPrefix) || filepath.Base(path) != archSuffix {
   107  		return false
   108  	}
   109  
   110  	fileinfo, err := api.Stat()
   111  	if err != nil {
   112  		return false
   113  	}
   114  	if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes {
   115  		e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded)
   116  		return false
   117  	}
   118  
   119  	e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK)
   120  	return true
   121  }
   122  
   123  func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) {
   124  	if e.stats == nil {
   125  		return
   126  	}
   127  	e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
   128  		Path:          path,
   129  		Result:        result,
   130  		FileSizeBytes: fileSizeBytes,
   131  	})
   132  }
   133  
   134  // Extract extracts packages from "desc" files passed through the scan input.
   135  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   136  	pkgs, err := e.extractFromInput(ctx, input)
   137  
   138  	if e.stats != nil {
   139  		var fileSizeBytes int64
   140  		if input.Info != nil {
   141  			fileSizeBytes = input.Info.Size()
   142  		}
   143  		e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
   144  			Path:          input.Path,
   145  			Result:        filesystem.ExtractorErrorToFileExtractedResult(err),
   146  			FileSizeBytes: fileSizeBytes,
   147  		})
   148  	}
   149  	return inventory.Inventory{Packages: pkgs}, err
   150  }
   151  
   152  func (e Extractor) extractFromInput(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Package, error) {
   153  	m, err := osrelease.GetOSRelease(input.FS)
   154  	if err != nil {
   155  		log.Errorf("osrelease.ParseOsRelease(): %v", err)
   156  	}
   157  
   158  	s := bufio.NewScanner(input.Reader)
   159  	var pkgName, pkgVersion, pkgDependencies string
   160  	packages := []*extractor.Package{}
   161  
   162  	for s.Scan() {
   163  		// Return if canceled or exceeding deadline.
   164  		if err := ctx.Err(); err != nil {
   165  			return packages, fmt.Errorf("%s halted due to context error: %w", e.Name(), err)
   166  		}
   167  
   168  		line := s.Text()
   169  		line = strings.TrimSpace(line)
   170  
   171  		if len(line) == 0 {
   172  			continue
   173  		}
   174  
   175  		if strings.HasPrefix(line, "%NAME%") {
   176  			pkgName, err = extractValue(s)
   177  		} else if strings.HasPrefix(line, "%VERSION%") {
   178  			pkgVersion, err = extractValue(s)
   179  		} else if strings.HasPrefix(line, "%DEPENDS%") {
   180  			pkgDependencies, err = extractValues(s)
   181  		}
   182  
   183  		if err != nil {
   184  			if errors.Is(err, io.EOF) {
   185  				log.Warnf("Reached EOF for desc file in %v", input.Path)
   186  				break
   187  			}
   188  			return packages, fmt.Errorf("%s halted: %w", e.Name(), err)
   189  		}
   190  	}
   191  
   192  	if pkgName != "" && pkgVersion != "" {
   193  		p := &extractor.Package{
   194  			Name:     pkgName,
   195  			Version:  pkgVersion,
   196  			PURLType: purl.TypePacman,
   197  			Metadata: &pacmanmeta.Metadata{
   198  				PackageName:    pkgName,
   199  				PackageVersion: pkgVersion,
   200  				OSID:           m["ID"],
   201  				OSVersionID:    m["VERSION_ID"],
   202  			},
   203  			Locations: []string{input.Path},
   204  		}
   205  
   206  		if len(pkgDependencies) != 0 {
   207  			p.Metadata.(*pacmanmeta.Metadata).PackageDependencies = pkgDependencies
   208  		}
   209  
   210  		packages = append(packages, p)
   211  	}
   212  
   213  	return packages, nil
   214  }
   215  
   216  func extractValue(scanner *bufio.Scanner) (string, error) {
   217  	if !scanner.Scan() {
   218  		if err := scanner.Err(); err != nil {
   219  			return "", err
   220  		}
   221  
   222  		// EOF
   223  		return "", io.EOF
   224  	}
   225  
   226  	return strings.TrimSpace(scanner.Text()), nil
   227  }
   228  
   229  func extractValues(scanner *bufio.Scanner) (string, error) {
   230  	var values []string
   231  
   232  	for {
   233  		if !scanner.Scan() {
   234  			if err := scanner.Err(); err != nil {
   235  				return "", err
   236  			}
   237  
   238  			// EOF
   239  			return strings.Join(values, ", "), io.EOF
   240  		}
   241  
   242  		line := strings.TrimSpace(scanner.Text())
   243  
   244  		if len(line) == 0 {
   245  			break
   246  		}
   247  
   248  		values = append(values, line)
   249  	}
   250  
   251  	return strings.Join(values, ", "), nil
   252  }