github.com/google/osv-scalibr@v0.4.1/guidedremediation/internal/lockfile/npm/packagelockjsonv2.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package npm
    16  
    17  import (
    18  	"cmp"
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"maps"
    23  	"path/filepath"
    24  	"slices"
    25  	"strings"
    26  
    27  	"deps.dev/util/resolve"
    28  	"deps.dev/util/resolve/dep"
    29  	"github.com/google/osv-scalibr/clients/datasource"
    30  	"github.com/google/osv-scalibr/internal/dependencyfile/packagelockjson"
    31  	"github.com/tidwall/gjson"
    32  	"github.com/tidwall/sjson"
    33  )
    34  
    35  // nodesFromPackages extracts graph from new-style (npm >= 7 / lockfileVersion 2+) structure
    36  // https://docs.npmjs.com/cli/v9/configuring-npm/package-lock-json
    37  // Installed packages are in the flat "packages" object, keyed by the install path
    38  // e.g. "node_modules/foo/node_modules/bar"
    39  // packages contain most information from their own manifests.
    40  func nodesFromPackages(lockJSON packagelockjson.LockFile) (*resolve.Graph, *nodeModule, error) {
    41  	g := &resolve.Graph{}
    42  	// Create graph nodes and reconstruct the node_modules folder structure in memory
    43  	root, ok := lockJSON.Packages[""]
    44  	if !ok {
    45  		return nil, nil, errors.New("missing root node")
    46  	}
    47  	nID := g.AddNode(resolve.VersionKey{
    48  		PackageKey: resolve.PackageKey{
    49  			System: resolve.NPM,
    50  			Name:   root.Name,
    51  		},
    52  		VersionType: resolve.Concrete,
    53  		Version:     root.Version,
    54  	})
    55  	nodeModuleTree := makeNodeModuleDeps(root, true)
    56  	nodeModuleTree.NodeID = nID
    57  
    58  	// paths for npm workspace subfolders, not inside root node_modules
    59  	workspaceModules := make(map[string]*nodeModule)
    60  	workspaceModules[""] = nodeModuleTree
    61  
    62  	// iterate keys by node_modules depth
    63  	for _, k := range packageNamesByNodeModuleDepth(lockJSON.Packages) {
    64  		if k == "" {
    65  			// skip the root node
    66  			continue
    67  		}
    68  		pkg, ok := lockJSON.Packages[k]
    69  		if !ok {
    70  			return nil, nil, fmt.Errorf("expected key %q not found in packages", k)
    71  		}
    72  		path := strings.Split(k, "node_modules/")
    73  		if len(path) == 1 {
    74  			// the path does not contain "node_modules/", assume this is a workspace directory
    75  			nID := g.AddNode(resolve.VersionKey{
    76  				PackageKey: resolve.PackageKey{
    77  					System: resolve.NPM,
    78  					Name:   path[0], // This will get replaced by the name from the symlink
    79  				},
    80  				VersionType: resolve.Concrete,
    81  				Version:     pkg.Version,
    82  			})
    83  			m := makeNodeModuleDeps(pkg, true) // NB: including the dev dependencies
    84  			m.NodeID = nID
    85  			workspaceModules[path[0]] = m
    86  
    87  			continue
    88  		}
    89  
    90  		if pkg.Link {
    91  			// This is the symlink to the workspace directory in node_modules
    92  			if len(path) != 2 || path[0] != "" {
    93  				// Not sure if this situation is actually possible.
    94  				return nil, nil, errors.New("found symlink in package-lock.json that's not in root node_modules directory")
    95  			}
    96  			m := workspaceModules[pkg.Resolved]
    97  			if m == nil {
    98  				// Not sure if this situation is actually possible.
    99  				return nil, nil, errors.New("symlink in package-lock.json processed before real directory")
   100  			}
   101  
   102  			// attach the workspace to the tree
   103  			pkgName := path[1]
   104  			nodeModuleTree.Children[pkgName] = m
   105  			if pkg.Resolved == "" {
   106  				// weird case: the root directory is symlinked into its own node_modules
   107  				continue
   108  			}
   109  			m.Parent = nodeModuleTree
   110  
   111  			// rename the node to the name it would be referred to as in package.json
   112  			g.Nodes[m.NodeID].Version.Name = pkgName
   113  			// add it as a dependency of the root node, so it's not orphaned
   114  			if _, ok := nodeModuleTree.Deps[pkgName]; !ok {
   115  				nodeModuleTree.Deps[pkgName] = dependencyVersionSpec{Version: "*"}
   116  			}
   117  
   118  			continue
   119  		}
   120  
   121  		// find the direct parent package by traversing the path
   122  		parent := nodeModuleTree
   123  		if path[0] != "" {
   124  			// jump to the corresponding workspace if package is in one
   125  			if parent, ok = workspaceModules[strings.TrimSuffix(path[0], "/")]; !ok {
   126  				// The package exists in a node_modules of a folder that doesn't belong to this project.
   127  				// npm seems to silently ignore these, so we will too.
   128  				continue
   129  			}
   130  		}
   131  
   132  		parentFound := true
   133  		for _, p := range path[1 : len(path)-1] { // skip root directory
   134  			p = strings.TrimSuffix(p, "/")
   135  			if parent, parentFound = parent.Children[p]; !parentFound {
   136  				break
   137  			}
   138  		}
   139  
   140  		if !parentFound {
   141  			// The package this supposed to be installed under is not installed.
   142  			// npm seems to silently ignore these, so we will too.
   143  			continue
   144  		}
   145  
   146  		name := path[len(path)-1]
   147  		nID := g.AddNode(resolve.VersionKey{
   148  			PackageKey: resolve.PackageKey{
   149  				System: resolve.NPM,
   150  				Name:   name,
   151  			},
   152  			VersionType: resolve.Concrete,
   153  			Version:     pkg.Version,
   154  		})
   155  		parent.Children[name] = makeNodeModuleDeps(pkg, false)
   156  		parent.Children[name].NodeID = nID
   157  		parent.Children[name].Parent = parent
   158  		parent.Children[name].ActualName = pkg.Name
   159  	}
   160  
   161  	return g, nodeModuleTree, nil
   162  }
   163  
   164  func makeNodeModuleDeps(pkg packagelockjson.Package, includeDev bool) *nodeModule {
   165  	nm := nodeModule{
   166  		Children: make(map[string]*nodeModule),
   167  		Deps:     make(map[string]dependencyVersionSpec),
   168  	}
   169  
   170  	// The order we process dependency types here is to match npm's behavior.
   171  	for name, version := range pkg.PeerDependencies {
   172  		var typ dep.Type
   173  		typ.AddAttr(dep.Scope, "peer")
   174  		if pkg.PeerDependenciesMeta[name].Optional {
   175  			typ.AddAttr(dep.Opt, "")
   176  		}
   177  		nm.Deps[name] = dependencyVersionSpec{Version: version, DepType: typ}
   178  	}
   179  	for name, version := range pkg.Dependencies {
   180  		nm.Deps[name] = dependencyVersionSpec{Version: version}
   181  	}
   182  	for name, version := range pkg.OptionalDependencies {
   183  		nm.Deps[name] = dependencyVersionSpec{Version: version, DepType: dep.NewType(dep.Opt)}
   184  	}
   185  	if includeDev {
   186  		for name, version := range pkg.DevDependencies {
   187  			nm.Deps[name] = dependencyVersionSpec{Version: version, DepType: dep.NewType(dep.Dev)}
   188  		}
   189  	}
   190  	reVersionAliasedDeps(nm.Deps)
   191  
   192  	return &nm
   193  }
   194  
   195  func packageNamesByNodeModuleDepth(packages map[string]packagelockjson.Package) []string {
   196  	keys := slices.Collect(maps.Keys(packages))
   197  	slices.SortFunc(keys, func(a, b string) int {
   198  		aSplit := strings.Split(a, "node_modules/")
   199  		bSplit := strings.Split(b, "node_modules/")
   200  		if c := cmp.Compare(len(aSplit), len(bSplit)); c != 0 {
   201  			return c
   202  		}
   203  		// sort alphabetically if they're the same depth
   204  		return cmp.Compare(a, b)
   205  	})
   206  
   207  	return keys
   208  }
   209  
   210  // writePackages writes the patches to the "packages" section (v2+) of the lockfile (if it exists).
   211  func writePackages(lockf []byte, patchMap map[string]map[string]string, api *datasource.NPMRegistryAPIClient) ([]byte, error) {
   212  	// Check if the lockfile is using CRLF or LF by checking the first newline.
   213  	i := slices.Index(lockf, byte('\n'))
   214  	crlf := i > 0 && lockf[i-1] == '\r'
   215  	packages := gjson.GetBytes(lockf, "packages")
   216  	if !packages.Exists() {
   217  		return lockf, nil
   218  	}
   219  
   220  	for key, value := range packages.Map() {
   221  		parts := strings.Split(key, "node_modules/")
   222  		if len(parts) == 0 {
   223  			continue
   224  		}
   225  		pkg := parts[len(parts)-1]
   226  		if n := value.Get("name"); n.Exists() { // if this is an alias, use the real package as the name
   227  			pkg = n.String()
   228  		}
   229  		if upgrades, ok := patchMap[pkg]; ok {
   230  			if newVer, ok := upgrades[value.Get("version").String()]; ok {
   231  				fullPath := "packages." + gjson.Escape(key)
   232  				var err error
   233  				if lockf, err = updatePackage(lockf, fullPath, pkg, newVer, api, crlf); err != nil {
   234  					return lockf, err
   235  				}
   236  			}
   237  		}
   238  	}
   239  
   240  	return lockf, nil
   241  }
   242  
   243  func updatePackage(lockf []byte, fullPath string, pkg string, newVer string, api *datasource.NPMRegistryAPIClient, crlf bool) ([]byte, error) {
   244  	npmData, err := api.FullJSON(context.Background(), pkg, newVer)
   245  	if err != nil {
   246  		return lockf, err
   247  	}
   248  
   249  	// The "dependencies" returned from the registry may include both optional and regular dependencies,
   250  	// but the "optionalDependencies" are removed from "dependencies" in package-lock.json.
   251  	for _, opt := range npmData.Get("optionalDependencies|@keys").Array() {
   252  		depName := gjson.Escape(opt.String())
   253  		s, _ := sjson.Delete(npmData.Raw, "dependencies."+depName)
   254  		npmData = gjson.Parse(s)
   255  	}
   256  
   257  	if len(npmData.Get("dependencies").Map()) == 0 {
   258  		s, _ := sjson.Delete(npmData.Raw, "dependencies")
   259  		npmData = gjson.Parse(s)
   260  	}
   261  
   262  	pkgData := gjson.GetBytes(lockf, fullPath)
   263  	pkgText := pkgData.Raw
   264  
   265  	// There doesn't appear to be a consistent list of what fields should be included in package-lock.json packages.
   266  	// https://docs.npmjs.com/cli/v9/configuring-npm/package-lock-json#packages seems list some,
   267  	// but it's not exhaustive and some listed fields may be missing in package-lock files in the wild.
   268  	// It may depend on the npm version.
   269  	// Just modify the fields that are already present to avoid too much churn.
   270  	keyArray := pkgData.Get("@keys").Array()
   271  	// If dependency types were not previously present, we want to add them.
   272  	necessaryKeys := []string{"dependencies", "optionalDependencies", "peerDependencies"}
   273  	keys := make([]string, len(keyArray), len(keyArray)+len(necessaryKeys))
   274  	for i, key := range keyArray {
   275  		keys[i] = gjson.Escape(key.String())
   276  	}
   277  	for _, key := range necessaryKeys {
   278  		if npmData.Get(key).Exists() && !pkgData.Get(key).Exists() {
   279  			keys = append(keys, key)
   280  		}
   281  	}
   282  
   283  	// Write all the updated fields
   284  	for _, key := range keys {
   285  		// some keys require special handling.
   286  		switch key {
   287  		case "resolved":
   288  			pkgText, _ = sjson.Set(pkgText, "resolved", npmData.Get("dist.tarball").String())
   289  		case "integrity":
   290  			pkgText, _ = sjson.Set(pkgText, "integrity", npmData.Get("dist.integrity").String())
   291  		case "bin":
   292  			// the api formats the paths as "./path/to", while package-lock.json seem to use "path/to"
   293  			newVal := npmData.Get("bin")
   294  			if newVal.Exists() {
   295  				text := newVal.Raw
   296  				for k, v := range newVal.Map() {
   297  					text, _ = sjson.Set(text, k, filepath.Clean(v.String()))
   298  				}
   299  				pkgText, _ = sjson.SetRaw(pkgText, "bin", text)
   300  			} else {
   301  				// explicitly remove it if it's no longer present.
   302  				pkgText, _ = sjson.Delete(pkgText, "bin")
   303  			}
   304  		case "dependencies", "devDependencies", "peerDependencies", "optionalDependencies":
   305  			// If all dependencies of a type have been removed, explicitly remove the field.
   306  			// NB: devDependencies shouldn't be in the lockfile anyway.
   307  			if !npmData.Get(key).Exists() {
   308  				pkgText, _ = sjson.Delete(pkgText, key)
   309  				continue
   310  			}
   311  			fallthrough
   312  		default:
   313  			newVal := npmData.Get(key)
   314  			if newVal.Exists() {
   315  				pkgText, _ = sjson.SetRaw(pkgText, key, newVal.Raw)
   316  			}
   317  		}
   318  	}
   319  
   320  	// Pretty-print the JSON because setting nested fields break the formatting.
   321  	// setting prefix to match indentation at the level.
   322  	pkgText = gjson.Get(pkgText, "@this|@pretty:{\"prefix\": \"    \", \"intent\": \"  \"}").Raw
   323  	// Trim trailing newline that @pretty creates.
   324  	pkgText = strings.TrimSuffix(pkgText, "\n")
   325  	if crlf {
   326  		pkgText = strings.ReplaceAll(pkgText, "\n", "\r\n")
   327  	}
   328  
   329  	return sjson.SetRawBytes(lockf, fullPath, []byte(pkgText))
   330  }