github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/dotnet/depsjson/depsjson.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package depsjson extracts packages from .NET deps.json files.
    16  package depsjson
    17  
    18  import (
    19  	"context"
    20  	"encoding/json"
    21  	"errors"
    22  	"strings"
    23  
    24  	"github.com/google/osv-scalibr/extractor"
    25  	"github.com/google/osv-scalibr/extractor/filesystem"
    26  	"github.com/google/osv-scalibr/extractor/filesystem/internal/units"
    27  	"github.com/google/osv-scalibr/inventory"
    28  	"github.com/google/osv-scalibr/log"
    29  	"github.com/google/osv-scalibr/plugin"
    30  	"github.com/google/osv-scalibr/purl"
    31  	"github.com/google/osv-scalibr/stats"
    32  )
    33  
    34  const (
    35  	// Name is the unique name of this extractor.
    36  	Name = "dotnet/depsjson"
    37  
    38  	// defaultMaxFileSizeBytes is the maximum file size this extractor will process.
    39  	defaultMaxFileSizeBytes = 10 * units.MiB // 10 MB
    40  )
    41  
    42  // Config is the configuration for the deps.json extractor.
    43  type Config struct {
    44  	// Stats is a stats collector for reporting metrics.
    45  	Stats stats.Collector
    46  	// MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If
    47  	// `FileRequired` gets a bigger file, it will return false.
    48  	MaxFileSizeBytes int64
    49  }
    50  
    51  // DefaultConfig returns the default configuration for the deps.json extractor.
    52  func DefaultConfig() Config {
    53  	return Config{
    54  		MaxFileSizeBytes: defaultMaxFileSizeBytes,
    55  	}
    56  }
    57  
    58  // Extractor structure for deps.json files.
    59  type Extractor struct {
    60  	stats            stats.Collector
    61  	maxFileSizeBytes int64
    62  }
    63  
    64  // New returns a deps.json extractor.
    65  func New(cfg Config) *Extractor {
    66  	return &Extractor{
    67  		stats:            cfg.Stats,
    68  		maxFileSizeBytes: cfg.MaxFileSizeBytes,
    69  	}
    70  }
    71  
    72  // NewDefault returns an extractor with the default config settings.
    73  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
    74  
    75  // Config returns the configuration of the extractor.
    76  func (e Extractor) Config() Config {
    77  	return Config{
    78  		Stats:            e.stats,
    79  		MaxFileSizeBytes: e.maxFileSizeBytes,
    80  	}
    81  }
    82  
    83  // Name of the extractor.
    84  func (e Extractor) Name() string { return Name }
    85  
    86  // Version of the extractor.
    87  func (e Extractor) Version() int { return 0 }
    88  
    89  // Requirements of the extractor.
    90  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
    91  
    92  // FileRequired returns true if the specified file matches the deps.json pattern.
    93  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
    94  	path := api.Path()
    95  	if !strings.HasSuffix(path, ".deps.json") {
    96  		return false
    97  	}
    98  
    99  	fileinfo, err := api.Stat()
   100  	if err != nil || (e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes) {
   101  		e.reportFileRequired(path, stats.FileRequiredResultSizeLimitExceeded)
   102  		return false
   103  	}
   104  
   105  	e.reportFileRequired(path, stats.FileRequiredResultOK)
   106  	return true
   107  }
   108  
   109  func (e Extractor) reportFileRequired(path string, result stats.FileRequiredResult) {
   110  	if e.stats == nil {
   111  		return
   112  	}
   113  	e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
   114  		Path:   path,
   115  		Result: result,
   116  	})
   117  }
   118  
   119  // Extract parses the deps.json file to extract .NET package dependencies.
   120  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   121  	packages, err := e.extractFromInput(input)
   122  	if e.stats != nil {
   123  		var fileSizeBytes int64
   124  		if input.Info != nil {
   125  			fileSizeBytes = input.Info.Size()
   126  		}
   127  		e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
   128  			Path:          input.Path,
   129  			Result:        filesystem.ExtractorErrorToFileExtractedResult(err),
   130  			FileSizeBytes: fileSizeBytes,
   131  		})
   132  	}
   133  	return inventory.Inventory{Packages: packages}, err
   134  }
   135  
   136  // DepsJSON represents the structure of the deps.json file.
   137  type DepsJSON struct {
   138  	// Note: Libraries does not include transitive dependencies.
   139  	// Targets is not currently extracted because it introduces significant
   140  	// complexity and is not always necessary for basic dependency analysis.
   141  	Libraries map[string]struct {
   142  		Version string `json:"version"`
   143  		// Type represents the package type, if present. Examples of types include:
   144  		// - "package": Indicates a standard NuGet package dependency.
   145  		// - "project": Represents a project-level dependency, such as the main application or a locally developed library.
   146  		Type string `json:"type"`
   147  	} `json:"libraries"`
   148  }
   149  
   150  func (e Extractor) extractFromInput(input *filesystem.ScanInput) ([]*extractor.Package, error) {
   151  	var deps DepsJSON
   152  	decoder := json.NewDecoder(input.Reader)
   153  	if err := decoder.Decode(&deps); err != nil {
   154  		log.Errorf("Error parsing deps.json: %v", err)
   155  		return nil, err
   156  	}
   157  
   158  	// Check if the decoded content is empty (i.e., no libraries)
   159  	if len(deps.Libraries) == 0 {
   160  		log.Warn("Empty deps.json file or no libraries found")
   161  		return nil, errors.New("empty deps.json file or no libraries found")
   162  	}
   163  
   164  	var packages []*extractor.Package
   165  	for nameVersion, library := range deps.Libraries {
   166  		// Split name and version from "package/version" format
   167  		name, version := splitNameAndVersion(nameVersion)
   168  		if name == "" || version == "" {
   169  			log.Warnf("Skipping library with missing name or version: %s", nameVersion)
   170  			continue
   171  		}
   172  		// If the library type is "project", this is the root/main package.
   173  		p := &extractor.Package{
   174  			Name:     name,
   175  			Version:  version,
   176  			PURLType: purl.TypeNuget,
   177  			Metadata: &Metadata{
   178  				PackageName:    name,
   179  				PackageVersion: version,
   180  				Type:           library.Type,
   181  			},
   182  			Locations: []string{input.Path},
   183  		}
   184  		packages = append(packages, p)
   185  	}
   186  
   187  	return packages, nil
   188  }
   189  
   190  // splitNameAndVersion splits the name and version from a "package/version" string.
   191  func splitNameAndVersion(nameVersion string) (string, string) {
   192  	parts := strings.Split(nameVersion, "/")
   193  	if len(parts) != 2 {
   194  		return "", ""
   195  	}
   196  	return parts[0], parts[1]
   197  }