github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/os/rpm/rpm.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build !windows
    16  
    17  // Package rpm extracts packages from rpm database.
    18  package rpm
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"os"
    24  	"path/filepath"
    25  	"slices"
    26  	"time"
    27  
    28  	rpmdb "github.com/erikvarga/go-rpmdb/pkg"
    29  	"github.com/google/osv-scalibr/extractor"
    30  	"github.com/google/osv-scalibr/extractor/filesystem"
    31  	"github.com/google/osv-scalibr/extractor/filesystem/os/osrelease"
    32  	rpmmeta "github.com/google/osv-scalibr/extractor/filesystem/os/rpm/metadata"
    33  	"github.com/google/osv-scalibr/inventory"
    34  	"github.com/google/osv-scalibr/log"
    35  	"github.com/google/osv-scalibr/plugin"
    36  	"github.com/google/osv-scalibr/purl"
    37  	"github.com/google/osv-scalibr/stats"
    38  
    39  	// SQLite driver needed for parsing rpmdb.sqlite files.
    40  	_ "modernc.org/sqlite"
    41  )
    42  
    43  // Name is the name for the RPM extractor
    44  const Name = "os/rpm"
    45  
    46  const defaultTimeout = 5 * time.Minute
    47  
    48  var (
    49  	requiredDirectory = []string{
    50  		"usr/lib/sysimage/rpm/",
    51  		"var/lib/rpm/",
    52  		"usr/share/rpm/",
    53  	}
    54  
    55  	requiredFilename = []string{
    56  		// Berkley DB (old format)
    57  		"Packages",
    58  		// NDB (very rare alternative to sqlite)
    59  		"Packages.db",
    60  		// SQLite3 (new format)
    61  		"rpmdb.sqlite",
    62  	}
    63  )
    64  
    65  // Config contains RPM specific configuration values
    66  type Config struct {
    67  	// Stats is a stats collector for reporting metrics.
    68  	Stats stats.Collector
    69  	// MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If
    70  	// `FileRequired` gets a bigger file, it will return false,
    71  	MaxFileSizeBytes int64
    72  	// Timeout is the timeout duration for parsing the RPM database.
    73  	Timeout time.Duration
    74  }
    75  
    76  // DefaultConfig returns the default configuration values for the RPM extractor.
    77  func DefaultConfig() Config {
    78  	return Config{
    79  		Stats:            nil,
    80  		MaxFileSizeBytes: 0,
    81  		Timeout:          defaultTimeout,
    82  	}
    83  }
    84  
    85  // Extractor extracts rpm packages from rpm database.
    86  type Extractor struct {
    87  	stats            stats.Collector
    88  	maxFileSizeBytes int64
    89  	Timeout          time.Duration
    90  }
    91  
    92  // New returns an RPM extractor.
    93  //
    94  // For most use cases, initialize with:
    95  // ```
    96  // e := New(DefaultConfig())
    97  // ```
    98  func New(cfg Config) *Extractor {
    99  	return &Extractor{
   100  		stats:            cfg.Stats,
   101  		maxFileSizeBytes: cfg.MaxFileSizeBytes,
   102  		Timeout:          cfg.Timeout,
   103  	}
   104  }
   105  
   106  // NewDefault returns an extractor with the default config settings.
   107  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
   108  
   109  // Name of the extractor.
   110  func (e Extractor) Name() string { return Name }
   111  
   112  // Version of the extractor.
   113  func (e Extractor) Version() int { return 0 }
   114  
   115  // Requirements of the extractor.
   116  func (e Extractor) Requirements() *plugin.Capabilities {
   117  	return &plugin.Capabilities{}
   118  }
   119  
   120  // FileRequired returns true if the specified file matches rpm status file pattern.
   121  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
   122  	path := api.Path()
   123  	dir, filename := filepath.Split(filepath.ToSlash(path))
   124  	if !slices.Contains(requiredDirectory, dir) || !slices.Contains(requiredFilename, filename) {
   125  		return false
   126  	}
   127  
   128  	fileinfo, err := api.Stat()
   129  	if err != nil {
   130  		return false
   131  	}
   132  	if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes {
   133  		e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded)
   134  		return false
   135  	}
   136  
   137  	e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK)
   138  	return true
   139  }
   140  
   141  func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) {
   142  	if e.stats == nil {
   143  		return
   144  	}
   145  	e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
   146  		Path:          path,
   147  		Result:        result,
   148  		FileSizeBytes: fileSizeBytes,
   149  	})
   150  }
   151  
   152  // Extract extracts packages from rpm status files passed through the scan input.
   153  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   154  	pkgs, err := e.extractFromInput(ctx, input)
   155  	if e.stats != nil {
   156  		var fileSizeBytes int64
   157  		if input.Info != nil {
   158  			fileSizeBytes = input.Info.Size()
   159  		}
   160  		e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
   161  			Path:          input.Path,
   162  			Result:        filesystem.ExtractorErrorToFileExtractedResult(err),
   163  			FileSizeBytes: fileSizeBytes,
   164  		})
   165  	}
   166  	return inventory.Inventory{Packages: pkgs}, err
   167  }
   168  
   169  func (e Extractor) extractFromInput(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Package, error) {
   170  	absPath, err := input.GetRealPath()
   171  	if err != nil {
   172  		return nil, fmt.Errorf("GetRealPath(%v): %w", input, err)
   173  	}
   174  	if input.Root == "" {
   175  		// The file got copied to a temporary dir, remove it at the end.
   176  		defer func() {
   177  			dir := filepath.Dir(absPath)
   178  			if err := os.RemoveAll(dir); err != nil {
   179  				log.Errorf("os.RemoveAll(%q): %v", dir, err)
   180  			}
   181  		}()
   182  	}
   183  	rpmPkgs, err := e.parseRPMDB(ctx, absPath)
   184  	if err != nil {
   185  		return nil, fmt.Errorf("ParseRPMDB(%s): %w", absPath, err)
   186  	}
   187  
   188  	m, err := osrelease.GetOSRelease(input.FS)
   189  	if err != nil {
   190  		log.Errorf("osrelease.ParseOsRelease(): %v", err)
   191  	}
   192  
   193  	pkgs := []*extractor.Package{}
   194  	for _, p := range rpmPkgs {
   195  		metadata := &rpmmeta.Metadata{
   196  			PackageName:  p.Name,
   197  			SourceRPM:    p.SourceRPM,
   198  			Epoch:        p.Epoch,
   199  			OSName:       m["NAME"],
   200  			OSPrettyName: m["PRETTY_NAME"],
   201  			OSID:         m["ID"],
   202  			OSVersionID:  m["VERSION_ID"],
   203  			OSBuildID:    m["BUILD_ID"],
   204  			Vendor:       p.Vendor,
   205  			Architecture: p.Architecture,
   206  		}
   207  
   208  		pkgs = append(pkgs, &extractor.Package{
   209  			Name:      p.Name,
   210  			Version:   fmt.Sprintf("%s-%s", p.Version, p.Release),
   211  			PURLType:  purl.TypeRPM,
   212  			Locations: []string{input.Path},
   213  			Metadata:  metadata,
   214  			Licenses:  []string{p.License},
   215  		})
   216  	}
   217  
   218  	return pkgs, nil
   219  }
   220  
   221  // parseRPMDB returns a slice of OS packages parsed from a RPM DB.
   222  func (e Extractor) parseRPMDB(ctx context.Context, path string) ([]rpmPackageInfo, error) {
   223  	db, err := rpmdb.Open(path)
   224  	if err != nil {
   225  		return nil, err
   226  	}
   227  	defer db.Close()
   228  
   229  	var pkgs []*rpmdb.PackageInfo
   230  	if e.Timeout == 0 {
   231  		pkgs, err = db.ListPackages()
   232  		if err != nil {
   233  			return nil, err
   234  		}
   235  	} else {
   236  		ctx, cancelFunc := context.WithTimeout(ctx, e.Timeout)
   237  		defer cancelFunc()
   238  
   239  		// The timeout is only for corrupt bdb databases
   240  		pkgs, err = db.ListPackagesWithContext(ctx)
   241  		if err != nil {
   242  			return nil, err
   243  		}
   244  	}
   245  
   246  	var result []rpmPackageInfo
   247  	for _, pkg := range pkgs {
   248  		newPkg := rpmPackageInfo{
   249  			Name:         pkg.Name,
   250  			Version:      pkg.Version,
   251  			Release:      pkg.Release,
   252  			Epoch:        pkg.EpochNum(),
   253  			SourceRPM:    pkg.SourceRpm,
   254  			Vendor:       pkg.Vendor,
   255  			Architecture: pkg.Arch,
   256  			License:      pkg.License,
   257  		}
   258  
   259  		result = append(result, newPkg)
   260  	}
   261  
   262  	return result, nil
   263  }
   264  
   265  type rpmPackageInfo struct {
   266  	Name         string
   267  	Version      string
   268  	Release      string
   269  	Epoch        int
   270  	SourceRPM    string
   271  	Maintainer   string
   272  	Vendor       string
   273  	Architecture string
   274  	License      string
   275  }