github.com/google/osv-scalibr@v0.4.1/guidedremediation/internal/lockfile/npm/packagelockjsonv2.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package npm 16 17 import ( 18 "cmp" 19 "context" 20 "errors" 21 "fmt" 22 "maps" 23 "path/filepath" 24 "slices" 25 "strings" 26 27 "deps.dev/util/resolve" 28 "deps.dev/util/resolve/dep" 29 "github.com/google/osv-scalibr/clients/datasource" 30 "github.com/google/osv-scalibr/internal/dependencyfile/packagelockjson" 31 "github.com/tidwall/gjson" 32 "github.com/tidwall/sjson" 33 ) 34 35 // nodesFromPackages extracts graph from new-style (npm >= 7 / lockfileVersion 2+) structure 36 // https://docs.npmjs.com/cli/v9/configuring-npm/package-lock-json 37 // Installed packages are in the flat "packages" object, keyed by the install path 38 // e.g. "node_modules/foo/node_modules/bar" 39 // packages contain most information from their own manifests. 40 func nodesFromPackages(lockJSON packagelockjson.LockFile) (*resolve.Graph, *nodeModule, error) { 41 g := &resolve.Graph{} 42 // Create graph nodes and reconstruct the node_modules folder structure in memory 43 root, ok := lockJSON.Packages[""] 44 if !ok { 45 return nil, nil, errors.New("missing root node") 46 } 47 nID := g.AddNode(resolve.VersionKey{ 48 PackageKey: resolve.PackageKey{ 49 System: resolve.NPM, 50 Name: root.Name, 51 }, 52 VersionType: resolve.Concrete, 53 Version: root.Version, 54 }) 55 nodeModuleTree := makeNodeModuleDeps(root, true) 56 nodeModuleTree.NodeID = nID 57 58 // paths for npm workspace subfolders, not inside root node_modules 59 workspaceModules := make(map[string]*nodeModule) 60 workspaceModules[""] = nodeModuleTree 61 62 // iterate keys by node_modules depth 63 for _, k := range packageNamesByNodeModuleDepth(lockJSON.Packages) { 64 if k == "" { 65 // skip the root node 66 continue 67 } 68 pkg, ok := lockJSON.Packages[k] 69 if !ok { 70 return nil, nil, fmt.Errorf("expected key %q not found in packages", k) 71 } 72 path := strings.Split(k, "node_modules/") 73 if len(path) == 1 { 74 // the path does not contain "node_modules/", assume this is a workspace directory 75 nID := g.AddNode(resolve.VersionKey{ 76 PackageKey: resolve.PackageKey{ 77 System: resolve.NPM, 78 Name: path[0], // This will get replaced by the name from the symlink 79 }, 80 VersionType: resolve.Concrete, 81 Version: pkg.Version, 82 }) 83 m := makeNodeModuleDeps(pkg, true) // NB: including the dev dependencies 84 m.NodeID = nID 85 workspaceModules[path[0]] = m 86 87 continue 88 } 89 90 if pkg.Link { 91 // This is the symlink to the workspace directory in node_modules 92 if len(path) != 2 || path[0] != "" { 93 // Not sure if this situation is actually possible. 94 return nil, nil, errors.New("found symlink in package-lock.json that's not in root node_modules directory") 95 } 96 m := workspaceModules[pkg.Resolved] 97 if m == nil { 98 // Not sure if this situation is actually possible. 99 return nil, nil, errors.New("symlink in package-lock.json processed before real directory") 100 } 101 102 // attach the workspace to the tree 103 pkgName := path[1] 104 nodeModuleTree.Children[pkgName] = m 105 if pkg.Resolved == "" { 106 // weird case: the root directory is symlinked into its own node_modules 107 continue 108 } 109 m.Parent = nodeModuleTree 110 111 // rename the node to the name it would be referred to as in package.json 112 g.Nodes[m.NodeID].Version.Name = pkgName 113 // add it as a dependency of the root node, so it's not orphaned 114 if _, ok := nodeModuleTree.Deps[pkgName]; !ok { 115 nodeModuleTree.Deps[pkgName] = dependencyVersionSpec{Version: "*"} 116 } 117 118 continue 119 } 120 121 // find the direct parent package by traversing the path 122 parent := nodeModuleTree 123 if path[0] != "" { 124 // jump to the corresponding workspace if package is in one 125 if parent, ok = workspaceModules[strings.TrimSuffix(path[0], "/")]; !ok { 126 // The package exists in a node_modules of a folder that doesn't belong to this project. 127 // npm seems to silently ignore these, so we will too. 128 continue 129 } 130 } 131 132 parentFound := true 133 for _, p := range path[1 : len(path)-1] { // skip root directory 134 p = strings.TrimSuffix(p, "/") 135 if parent, parentFound = parent.Children[p]; !parentFound { 136 break 137 } 138 } 139 140 if !parentFound { 141 // The package this supposed to be installed under is not installed. 142 // npm seems to silently ignore these, so we will too. 143 continue 144 } 145 146 name := path[len(path)-1] 147 nID := g.AddNode(resolve.VersionKey{ 148 PackageKey: resolve.PackageKey{ 149 System: resolve.NPM, 150 Name: name, 151 }, 152 VersionType: resolve.Concrete, 153 Version: pkg.Version, 154 }) 155 parent.Children[name] = makeNodeModuleDeps(pkg, false) 156 parent.Children[name].NodeID = nID 157 parent.Children[name].Parent = parent 158 parent.Children[name].ActualName = pkg.Name 159 } 160 161 return g, nodeModuleTree, nil 162 } 163 164 func makeNodeModuleDeps(pkg packagelockjson.Package, includeDev bool) *nodeModule { 165 nm := nodeModule{ 166 Children: make(map[string]*nodeModule), 167 Deps: make(map[string]dependencyVersionSpec), 168 } 169 170 // The order we process dependency types here is to match npm's behavior. 171 for name, version := range pkg.PeerDependencies { 172 var typ dep.Type 173 typ.AddAttr(dep.Scope, "peer") 174 if pkg.PeerDependenciesMeta[name].Optional { 175 typ.AddAttr(dep.Opt, "") 176 } 177 nm.Deps[name] = dependencyVersionSpec{Version: version, DepType: typ} 178 } 179 for name, version := range pkg.Dependencies { 180 nm.Deps[name] = dependencyVersionSpec{Version: version} 181 } 182 for name, version := range pkg.OptionalDependencies { 183 nm.Deps[name] = dependencyVersionSpec{Version: version, DepType: dep.NewType(dep.Opt)} 184 } 185 if includeDev { 186 for name, version := range pkg.DevDependencies { 187 nm.Deps[name] = dependencyVersionSpec{Version: version, DepType: dep.NewType(dep.Dev)} 188 } 189 } 190 reVersionAliasedDeps(nm.Deps) 191 192 return &nm 193 } 194 195 func packageNamesByNodeModuleDepth(packages map[string]packagelockjson.Package) []string { 196 keys := slices.Collect(maps.Keys(packages)) 197 slices.SortFunc(keys, func(a, b string) int { 198 aSplit := strings.Split(a, "node_modules/") 199 bSplit := strings.Split(b, "node_modules/") 200 if c := cmp.Compare(len(aSplit), len(bSplit)); c != 0 { 201 return c 202 } 203 // sort alphabetically if they're the same depth 204 return cmp.Compare(a, b) 205 }) 206 207 return keys 208 } 209 210 // writePackages writes the patches to the "packages" section (v2+) of the lockfile (if it exists). 211 func writePackages(lockf []byte, patchMap map[string]map[string]string, api *datasource.NPMRegistryAPIClient) ([]byte, error) { 212 // Check if the lockfile is using CRLF or LF by checking the first newline. 213 i := slices.Index(lockf, byte('\n')) 214 crlf := i > 0 && lockf[i-1] == '\r' 215 packages := gjson.GetBytes(lockf, "packages") 216 if !packages.Exists() { 217 return lockf, nil 218 } 219 220 for key, value := range packages.Map() { 221 parts := strings.Split(key, "node_modules/") 222 if len(parts) == 0 { 223 continue 224 } 225 pkg := parts[len(parts)-1] 226 if n := value.Get("name"); n.Exists() { // if this is an alias, use the real package as the name 227 pkg = n.String() 228 } 229 if upgrades, ok := patchMap[pkg]; ok { 230 if newVer, ok := upgrades[value.Get("version").String()]; ok { 231 fullPath := "packages." + gjson.Escape(key) 232 var err error 233 if lockf, err = updatePackage(lockf, fullPath, pkg, newVer, api, crlf); err != nil { 234 return lockf, err 235 } 236 } 237 } 238 } 239 240 return lockf, nil 241 } 242 243 func updatePackage(lockf []byte, fullPath string, pkg string, newVer string, api *datasource.NPMRegistryAPIClient, crlf bool) ([]byte, error) { 244 npmData, err := api.FullJSON(context.Background(), pkg, newVer) 245 if err != nil { 246 return lockf, err 247 } 248 249 // The "dependencies" returned from the registry may include both optional and regular dependencies, 250 // but the "optionalDependencies" are removed from "dependencies" in package-lock.json. 251 for _, opt := range npmData.Get("optionalDependencies|@keys").Array() { 252 depName := gjson.Escape(opt.String()) 253 s, _ := sjson.Delete(npmData.Raw, "dependencies."+depName) 254 npmData = gjson.Parse(s) 255 } 256 257 if len(npmData.Get("dependencies").Map()) == 0 { 258 s, _ := sjson.Delete(npmData.Raw, "dependencies") 259 npmData = gjson.Parse(s) 260 } 261 262 pkgData := gjson.GetBytes(lockf, fullPath) 263 pkgText := pkgData.Raw 264 265 // There doesn't appear to be a consistent list of what fields should be included in package-lock.json packages. 266 // https://docs.npmjs.com/cli/v9/configuring-npm/package-lock-json#packages seems list some, 267 // but it's not exhaustive and some listed fields may be missing in package-lock files in the wild. 268 // It may depend on the npm version. 269 // Just modify the fields that are already present to avoid too much churn. 270 keyArray := pkgData.Get("@keys").Array() 271 // If dependency types were not previously present, we want to add them. 272 necessaryKeys := []string{"dependencies", "optionalDependencies", "peerDependencies"} 273 keys := make([]string, len(keyArray), len(keyArray)+len(necessaryKeys)) 274 for i, key := range keyArray { 275 keys[i] = gjson.Escape(key.String()) 276 } 277 for _, key := range necessaryKeys { 278 if npmData.Get(key).Exists() && !pkgData.Get(key).Exists() { 279 keys = append(keys, key) 280 } 281 } 282 283 // Write all the updated fields 284 for _, key := range keys { 285 // some keys require special handling. 286 switch key { 287 case "resolved": 288 pkgText, _ = sjson.Set(pkgText, "resolved", npmData.Get("dist.tarball").String()) 289 case "integrity": 290 pkgText, _ = sjson.Set(pkgText, "integrity", npmData.Get("dist.integrity").String()) 291 case "bin": 292 // the api formats the paths as "./path/to", while package-lock.json seem to use "path/to" 293 newVal := npmData.Get("bin") 294 if newVal.Exists() { 295 text := newVal.Raw 296 for k, v := range newVal.Map() { 297 text, _ = sjson.Set(text, k, filepath.Clean(v.String())) 298 } 299 pkgText, _ = sjson.SetRaw(pkgText, "bin", text) 300 } else { 301 // explicitly remove it if it's no longer present. 302 pkgText, _ = sjson.Delete(pkgText, "bin") 303 } 304 case "dependencies", "devDependencies", "peerDependencies", "optionalDependencies": 305 // If all dependencies of a type have been removed, explicitly remove the field. 306 // NB: devDependencies shouldn't be in the lockfile anyway. 307 if !npmData.Get(key).Exists() { 308 pkgText, _ = sjson.Delete(pkgText, key) 309 continue 310 } 311 fallthrough 312 default: 313 newVal := npmData.Get(key) 314 if newVal.Exists() { 315 pkgText, _ = sjson.SetRaw(pkgText, key, newVal.Raw) 316 } 317 } 318 } 319 320 // Pretty-print the JSON because setting nested fields break the formatting. 321 // setting prefix to match indentation at the level. 322 pkgText = gjson.Get(pkgText, "@this|@pretty:{\"prefix\": \" \", \"intent\": \" \"}").Raw 323 // Trim trailing newline that @pretty creates. 324 pkgText = strings.TrimSuffix(pkgText, "\n") 325 if crlf { 326 pkgText = strings.ReplaceAll(pkgText, "\n", "\r\n") 327 } 328 329 return sjson.SetRawBytes(lockf, fullPath, []byte(pkgText)) 330 }