github.com/google/osv-scalibr@v0.4.1/annotator/misc/dpkgsource/dpkgsource.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package dpkgsource provides a way to annotate packages with repository source information.
    16  package dpkgsource
    17  
    18  import (
    19  	"bufio"
    20  	"context"
    21  	"fmt"
    22  	"os/exec"
    23  	"strings"
    24  
    25  	"github.com/google/osv-scalibr/annotator"
    26  	"github.com/google/osv-scalibr/extractor"
    27  	"github.com/google/osv-scalibr/extractor/filesystem/os/dpkg/metadata"
    28  	"github.com/google/osv-scalibr/inventory"
    29  	"github.com/google/osv-scalibr/log"
    30  	"github.com/google/osv-scalibr/plugin"
    31  	"github.com/google/osv-scalibr/purl"
    32  )
    33  
    34  const (
    35  	// Name of the Annotator
    36  	Name = "misc/dpkg-source"
    37  )
    38  
    39  // FetchAptCachePolicy to allow for mocking in testing.
    40  var FetchAptCachePolicy = aptCachePolicy
    41  
    42  // Annotator adds repository source context for extracted Debian packages from dpkg extractor.
    43  type Annotator struct{}
    44  
    45  // New returns a new Annotator.
    46  func New() annotator.Annotator { return Annotator{} }
    47  
    48  // Name returns the name of the annotator.
    49  func (Annotator) Name() string { return Name }
    50  
    51  // Version returns the version of the annotator.
    52  func (Annotator) Version() int { return 0 }
    53  
    54  // Requirements returns the requirements of the annotator.
    55  func (Annotator) Requirements() *plugin.Capabilities {
    56  	return &plugin.Capabilities{OS: plugin.OSLinux, RunningSystem: true}
    57  }
    58  
    59  // Annotate adds repository source context for extracted Debian packages from dpkg extractor.
    60  func (a Annotator) Annotate(ctx context.Context, input *annotator.ScanInput, results *inventory.Inventory) error {
    61  	// Call apt-cache policy once with all packages.
    62  	dpkgToSources, err := FetchAptCachePolicy(ctx, results.Packages)
    63  	if err != nil {
    64  		return fmt.Errorf("%s halted while fetching apt-cache policy: %w", a.Name(), err)
    65  	}
    66  
    67  	// Update package metadata with source information.
    68  	for _, pkg := range results.Packages {
    69  		// Return if canceled or exceeding deadline.
    70  		if err := ctx.Err(); err != nil {
    71  			return fmt.Errorf("%s halted at %q because of context error: %w", a.Name(), input.ScanRoot.Path, err)
    72  		}
    73  		// Only annotate debian packages.
    74  		if pkg.PURLType != purl.TypeDebian {
    75  			continue
    76  		}
    77  		md, ok := pkg.Metadata.(*metadata.Metadata)
    78  		if !ok {
    79  			continue
    80  		}
    81  		// Update dpkg metadata PackageSource field.
    82  		if source, ok := dpkgToSources[pkg.Name]; ok {
    83  			md.PackageSource = source
    84  		} else {
    85  			md.PackageSource = "unknown"
    86  		}
    87  	}
    88  
    89  	return nil
    90  }
    91  
    92  func aptCachePolicy(ctx context.Context, packages []*extractor.Package) (map[string]string, error) {
    93  	// List all installed Debian package names.
    94  	var pkgNames []string
    95  	for _, pkg := range packages {
    96  		if pkg.PURLType != purl.TypeDebian {
    97  			continue
    98  		}
    99  		pkgNames = append(pkgNames, pkg.Metadata.(*metadata.Metadata).PackageName)
   100  	}
   101  
   102  	// Call apt-cache policy once with all package names.
   103  	args := append([]string{"policy"}, pkgNames...)
   104  	cmd := exec.CommandContext(ctx, "apt-cache", args...)
   105  	output, err := cmd.Output()
   106  	if err != nil {
   107  		return nil, fmt.Errorf("calling apt-cache policy failed: %w", err)
   108  	}
   109  
   110  	// Return packages mapped to package sources.
   111  	return MapPackageToSource(ctx, string(output))
   112  }
   113  
   114  // MapPackageToSource parses the output of "apt-cache policy" and returns a map
   115  // from package names to their repository sources.
   116  func MapPackageToSource(ctx context.Context, aptCacheOutput string) (map[string]string, error) {
   117  	// Parse apt-cache policy output and map package names to repository sources.
   118  	dpkgSource := make(map[string]string)
   119  	var pkgName string
   120  
   121  	scanner := bufio.NewScanner(strings.NewReader(aptCacheOutput))
   122  
   123  	for scanner.Scan() {
   124  		// Return if canceled or exceeding deadline.
   125  		if err := ctx.Err(); err != nil {
   126  			return nil, err
   127  		}
   128  
   129  		line := scanner.Text()
   130  		trimmedLine := strings.TrimSpace(line)
   131  		if trimmedLine == "" {
   132  			continue
   133  		}
   134  		// A new package block starts when a line is not indented and begins with package name.
   135  		if !strings.HasPrefix(line, " ") && !strings.HasPrefix(line, "\t") {
   136  			pkgName = strings.TrimSuffix(line, ":")
   137  		}
   138  		// Installed version is signified by leading '***'.
   139  		if pkgName != "" && strings.HasPrefix(trimmedLine, "***") {
   140  			// Advance scanner to next line to read the top priority source.
   141  			if !scanner.Scan() {
   142  				log.Warnf("dpkg-source: could not find source for package %q, unexpected end of apt-cache policy output", pkgName)
   143  				dpkgSource[pkgName] = "unknown"
   144  				pkgName = ""
   145  				continue
   146  			}
   147  			priorityLine := strings.TrimSpace(scanner.Text())
   148  			// Remove priority number and other information, return the repository source.
   149  			repoSource := strings.Split(priorityLine, " ")
   150  			if len(repoSource) < 2 {
   151  				log.Warnf("dpkg-source: could not parse source for package %q from line: %q", pkgName, priorityLine)
   152  				dpkgSource[pkgName] = "unknown"
   153  				pkgName = ""
   154  				continue
   155  			}
   156  			dpkgSource[pkgName] = repoSource[1]
   157  			// Reset package name string and continue scanning.
   158  			pkgName = ""
   159  			continue
   160  		}
   161  	}
   162  
   163  	if err := scanner.Err(); err != nil {
   164  		return nil, err
   165  	}
   166  	return dpkgSource, nil
   167  }