go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/providers/os/resources/python.go (about) 1 // Copyright (c) Mondoo, Inc. 2 // SPDX-License-Identifier: BUSL-1.1 3 4 package resources 5 6 import ( 7 "bufio" 8 "errors" 9 "fmt" 10 "io" 11 "net/textproto" 12 "os" 13 "path/filepath" 14 "regexp" 15 "runtime" 16 "strings" 17 18 "github.com/rs/zerolog/log" 19 "github.com/spf13/afero" 20 "go.mondoo.com/cnquery/llx" 21 "go.mondoo.com/cnquery/providers-sdk/v1/plugin" 22 "go.mondoo.com/cnquery/providers/os/connection/shared" 23 "go.mondoo.com/cnquery/types" 24 ) 25 26 type pythonDirectory struct { 27 path string 28 addLib bool 29 } 30 31 var pythonDirectories = []pythonDirectory{ 32 { 33 path: "/usr/local/lib/python*", 34 }, 35 { 36 path: "/usr/local/lib64/python*", 37 }, 38 { 39 path: "/usr/lib/python*", 40 }, 41 { 42 path: "/usr/lib64/python*", 43 }, 44 { 45 path: "/opt/homebrew/lib/python*", 46 }, 47 { 48 // surprisingly, this is handled in a case-sensitive way in go (the filepath.Match() glob/pattern matching) 49 path: "C:/Python*", 50 // true because in Windows the 'site-packages' dir lives in a path like: 51 // C:\Python3.11\Lib\site-packages 52 addLib: true, 53 }, 54 } 55 56 var pythonDirectoriesDarwin = []string{ 57 "/System/Library/Frameworks/Python.framework/Versions", 58 "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions", 59 } 60 61 func initPython(runtime *plugin.Runtime, args map[string]*llx.RawData) (map[string]*llx.RawData, plugin.Resource, error) { 62 if x, ok := args["path"]; ok { 63 _, ok := x.Value.(string) 64 if !ok { 65 return nil, nil, errors.New("Wrong type for 'path' in python initialization, it must be a string") 66 } 67 } else { 68 // empty path means search through default locations 69 args["path"] = llx.StringData("") 70 } 71 72 return args, nil, nil 73 } 74 75 func (k *mqlPython) id() (string, error) { 76 return "python", nil 77 } 78 79 func (k *mqlPython) getAllPackages() ([]pythonPackageDetails, error) { 80 allResults := []pythonPackageDetails{} 81 82 conn, ok := k.MqlRuntime.Connection.(shared.Connection) 83 if !ok { 84 return nil, fmt.Errorf("provider is not an operating system provider") 85 } 86 afs := &afero.Afero{Fs: conn.FileSystem()} 87 88 if k.Path.Error != nil { 89 return nil, k.Path.Error 90 } 91 pyPath := k.Path.Data 92 if pyPath != "" { 93 // only search the specific path provided (if it was provided) 94 allResults = gatherPackages(afs, pyPath) 95 } else { 96 // search through default locations 97 searchFunctions := []func(*afero.Afero) ([]pythonPackageDetails, error){ 98 genericSearch, 99 darwinSearch, 100 } 101 102 for _, sFunc := range searchFunctions { 103 results, err := sFunc(afs) 104 if err != nil { 105 log.Error().Err(err).Msg("error while searching for python packages") 106 return nil, err 107 } 108 allResults = append(allResults, results...) 109 } 110 } 111 112 return allResults, nil 113 } 114 115 func (k *mqlPython) packages() ([]interface{}, error) { 116 allPyPkgDetails, err := k.getAllPackages() 117 if err != nil { 118 return nil, err 119 } 120 121 // this is the "global" map so that the recursive function calls can keep track of 122 // resources already created 123 pythonPackageResourceMap := map[string]plugin.Resource{} 124 125 resp := []interface{}{} 126 127 for _, pyPkgDetails := range allPyPkgDetails { 128 res, err := pythonPackageDetailsWithDependenciesToResource(k.MqlRuntime, pyPkgDetails, allPyPkgDetails, pythonPackageResourceMap) 129 if err != nil { 130 log.Error().Err(err).Msg("error while creating resource(s) for python package") 131 // we will keep trying to make resources even if a single one failed 132 continue 133 } 134 resp = append(resp, res) 135 } 136 137 return resp, nil 138 } 139 140 func pythonPackageDetailsWithDependenciesToResource(runtime *plugin.Runtime, newPyPkgDetails pythonPackageDetails, 141 pythonPgkDetailsList []pythonPackageDetails, pythonPackageResourceMap map[string]plugin.Resource, 142 ) (interface{}, error) { 143 res := pythonPackageResourceMap[newPyPkgDetails.name] 144 if res != nil { 145 // already created the pythonPackage resource 146 return res, nil 147 } 148 149 dependencies := []interface{}{} 150 for _, dep := range newPyPkgDetails.dependencies { 151 found := false 152 var depPyPkgDetails pythonPackageDetails 153 for i, pyPkgDetails := range pythonPgkDetailsList { 154 if pyPkgDetails.name == dep { 155 depPyPkgDetails = pythonPgkDetailsList[i] 156 found = true 157 break 158 } 159 } 160 if !found { 161 // can't create a resource for something we didn't discover ¯\_(ツ)_/¯ 162 continue 163 } 164 res, err := pythonPackageDetailsWithDependenciesToResource(runtime, depPyPkgDetails, pythonPgkDetailsList, pythonPackageResourceMap) 165 if err != nil { 166 log.Warn().Err(err).Msg("failed to create python packag resource") 167 continue 168 } 169 dependencies = append(dependencies, res) 170 } 171 172 // finally create the resource 173 r, err := pythonPackageDetailsToResource(runtime, newPyPkgDetails, dependencies) 174 if err != nil { 175 log.Error().Err(err).Str("resource", newPyPkgDetails.file).Msg("error while creating MQL resource") 176 return nil, err 177 } 178 179 pythonPackageResourceMap[newPyPkgDetails.name] = r 180 181 return r, nil 182 } 183 184 func pythonPackageDetailsToResource(runtime *plugin.Runtime, ppd pythonPackageDetails, dependencies []interface{}) (plugin.Resource, error) { 185 f, err := CreateResource(runtime, "file", map[string]*llx.RawData{ 186 "path": llx.StringData(ppd.file), 187 }) 188 if err != nil { 189 log.Error().Err(err).Msg("error while creating file resource for python package resource") 190 return nil, err 191 } 192 193 r, err := CreateResource(runtime, "python.package", map[string]*llx.RawData{ 194 "id": llx.StringData(ppd.file), 195 "name": llx.StringData(ppd.name), 196 "version": llx.StringData(ppd.version), 197 "author": llx.StringData(ppd.author), 198 "summary": llx.StringData(ppd.summary), 199 "license": llx.StringData(ppd.license), 200 "file": llx.ResourceData(f, f.MqlName()), 201 "dependencies": llx.ArrayData(dependencies, types.Any), 202 }) 203 if err != nil { 204 log.Error().AnErr("err", err).Msg("error while creating MQL resource") 205 return nil, err 206 } 207 return r, nil 208 } 209 210 func (k *mqlPython) toplevel() ([]interface{}, error) { 211 allPyPkgDetails, err := k.getAllPackages() 212 if err != nil { 213 return nil, err 214 } 215 216 // this is the "global" map so that the recursive function calls can keep track of 217 // resources already created 218 pythonPackageResourceMap := map[string]plugin.Resource{} 219 220 resp := []interface{}{} 221 222 for _, pyPkgDetails := range allPyPkgDetails { 223 if !pyPkgDetails.isLeaf { 224 continue 225 } 226 227 res, err := pythonPackageDetailsWithDependenciesToResource(k.MqlRuntime, pyPkgDetails, allPyPkgDetails, pythonPackageResourceMap) 228 if err != nil { 229 log.Error().Err(err).Msg("error while creating resource(s) for python package") 230 // we will keep trying to make resources even if a single one failed 231 continue 232 } 233 resp = append(resp, res) 234 } 235 236 return resp, nil 237 } 238 239 type pythonPackageDetails struct { 240 name string 241 file string 242 license string 243 author string 244 summary string 245 version string 246 dependencies []string 247 isLeaf bool 248 } 249 250 func gatherPackages(afs *afero.Afero, pythonPackagePath string) (allResults []pythonPackageDetails) { 251 fileList, err := afs.ReadDir(pythonPackagePath) 252 if err != nil { 253 if !os.IsNotExist(err) { 254 log.Warn().Err(err).Str("dir", pythonPackagePath).Msg("unable to open directory") 255 } 256 return 257 } 258 for _, dEntry := range fileList { 259 // only process files/directories that might acctually contain 260 // the data we're looking for 261 if !strings.HasSuffix(dEntry.Name(), ".dist-info") && 262 !strings.HasSuffix(dEntry.Name(), ".egg-info") { 263 continue 264 } 265 266 // There is the possibility that the .egg-info entry is a file 267 // (not a directory) that we can directly process. 268 packagePayload := dEntry.Name() 269 270 // requestedPackage just marks whether we found the empty REQUESTED file 271 // to indicate a child/leaf package 272 requestedPackage := false 273 274 requiresTxtPath := "" 275 276 // in the event the directory entry is itself another directory 277 // go into each directory looking for our parsable payload 278 // (ie. METADATA and PKG-INFO files) 279 if dEntry.IsDir() { 280 pythonPackageDir := filepath.Join(pythonPackagePath, packagePayload) 281 packageDirFiles, err := afs.ReadDir(pythonPackageDir) 282 if err != nil { 283 log.Warn().Err(err).Str("dir", pythonPackageDir).Msg("error while walking through files in directory") 284 return 285 } 286 287 foundMeta := false 288 for _, packageFile := range packageDirFiles { 289 if packageFile.Name() == "METADATA" || packageFile.Name() == "PKG-INFO" { 290 // use the METADATA / PKG-INFO file as our source of python package info 291 packagePayload = filepath.Join(dEntry.Name(), packageFile.Name()) 292 foundMeta = true 293 } 294 if packageFile.Name() == "REQUESTED" { 295 requestedPackage = true 296 } 297 if packageFile.Name() == "requires.txt" { 298 requiresTxtPath = filepath.Join(dEntry.Name(), packageFile.Name()) 299 } 300 } 301 if !foundMeta { 302 // nothing to process (happens when we've traversed a directory 303 // containing the actual python source files) 304 continue 305 } 306 307 } 308 309 pythonPackageFilepath := filepath.Join(pythonPackagePath, packagePayload) 310 ppd, err := parseMIME(afs, pythonPackageFilepath) 311 if err != nil { 312 continue 313 } 314 ppd.isLeaf = requestedPackage 315 316 // if the MIME data didn't include dependency information, but there was a requires.txt file available, 317 // then use that for dependency info (as pip appears to do) 318 if len(ppd.dependencies) == 0 && requiresTxtPath != "" { 319 requiresTxtDeps, err := parseRequiresTxtDependencies(afs, filepath.Join(pythonPackagePath, requiresTxtPath)) 320 if err != nil { 321 log.Warn().Err(err).Str("dir", pythonPackageFilepath).Msg("failed to parse requires.txt") 322 } else { 323 ppd.dependencies = requiresTxtDeps 324 } 325 } 326 327 allResults = append(allResults, *ppd) 328 } 329 330 return 331 } 332 333 func searchForPythonPackages(afs *afero.Afero, path string) []pythonPackageDetails { 334 allResults := []pythonPackageDetails{} 335 336 packageDirs := []string{"site-packages", "dist-packages"} 337 for _, packageDir := range packageDirs { 338 pythonPackageDir := filepath.Join(path, packageDir) 339 allResults = append(allResults, gatherPackages(afs, pythonPackageDir)...) 340 } 341 342 return allResults 343 } 344 345 // firstWordRegexp is just trying to catch everything leading up the >, >=, = in a requires.txt 346 // Example: 347 // 348 // nose>=1.2 349 // Mock>=1.0 350 // pycryptodome 351 // 352 // [crypto] 353 // pycryptopp>=0.5.12 354 // 355 // [cryptography] 356 // cryptography 357 // 358 // would match nose / Mock / pycrptodome / etc 359 360 var firstWordRegexp = regexp.MustCompile(`^[a-zA-Z0-9\._-]*`) 361 362 func parseRequiresTxtDependencies(afs *afero.Afero, requiresTxtPath string) ([]string, error) { 363 f, err := afs.Open(requiresTxtPath) 364 if err != nil { 365 return nil, err 366 } 367 defer f.Close() 368 369 fileScanner := bufio.NewScanner(f) 370 fileScanner.Split(bufio.ScanLines) 371 372 depdendencies := []string{} 373 for fileScanner.Scan() { 374 line := fileScanner.Text() 375 if strings.HasPrefix(line, "[") { 376 // this means a new optional section of dependencies 377 // so stop processing 378 break 379 } 380 matched := firstWordRegexp.FindString(line) 381 if matched == "" { 382 continue 383 } 384 depdendencies = append(depdendencies, matched) 385 } 386 387 return depdendencies, nil 388 } 389 390 func parseMIME(afs *afero.Afero, pythonMIMEFilepath string) (*pythonPackageDetails, error) { 391 f, err := afs.Open(pythonMIMEFilepath) 392 if err != nil { 393 log.Warn().Err(err).Msg("error opening python metadata file") 394 return nil, err 395 } 396 defer f.Close() 397 398 textReader := textproto.NewReader(bufio.NewReader(f)) 399 mimeData, err := textReader.ReadMIMEHeader() 400 if err != nil && err != io.EOF { 401 return nil, fmt.Errorf("error reading MIME data: %s", err) 402 } 403 404 deps := extractMimeDeps(mimeData.Values("Requires-Dist")) 405 406 return &pythonPackageDetails{ 407 name: mimeData.Get("Name"), 408 summary: mimeData.Get("Summary"), 409 author: mimeData.Get("Author"), 410 license: mimeData.Get("License"), 411 version: mimeData.Get("Version"), 412 dependencies: deps, 413 file: pythonMIMEFilepath, 414 }, nil 415 } 416 417 // extractMimeDeps will go through each of the listed dependencies 418 // from the "Requires-Dist" values, and strip off everything but 419 // the name of the package/dependency itself 420 func extractMimeDeps(deps []string) []string { 421 parsedDeps := []string{} 422 for _, dep := range deps { 423 // the semicolon indicates an optional dependency 424 if strings.Contains(dep, ";") { 425 continue 426 } 427 parsedDep := strings.Split(dep, " ") 428 if len(parsedDep) > 0 { 429 parsedDeps = append(parsedDeps, parsedDep[0]) 430 } 431 } 432 return parsedDeps 433 } 434 435 func genericSearch(afs *afero.Afero) ([]pythonPackageDetails, error) { 436 allResults := []pythonPackageDetails{} 437 438 // Look through each potential location for the existence of a matching python* directory 439 for _, pyDir := range pythonDirectories { 440 parentDir := filepath.Dir(pyDir.path) 441 442 fileList, err := afs.ReadDir(parentDir) 443 if err != nil { 444 if !os.IsNotExist(err) { 445 log.Warn().Err(err).Str("dir", parentDir).Msg("unable to read directory") 446 } 447 continue 448 } 449 450 for _, dEntry := range fileList { 451 base := filepath.Base(pyDir.path) 452 matched, err := filepath.Match(base, dEntry.Name()) 453 if err != nil { 454 return nil, err 455 } 456 if matched { 457 matchedPath := filepath.Join(parentDir, dEntry.Name()) 458 log.Debug().Str("filepath", matchedPath).Msg("found matching python path") 459 460 if pyDir.addLib { 461 matchedPath = filepath.Join(matchedPath, "lib") 462 } 463 464 results := searchForPythonPackages(afs, matchedPath) 465 allResults = append(allResults, results...) 466 } 467 } 468 } 469 return allResults, nil 470 } 471 472 // darwinSearch has custom handling for the specific way that darwin 473 // can structure the paths holding python packages 474 func darwinSearch(afs *afero.Afero) ([]pythonPackageDetails, error) { 475 allResults := []pythonPackageDetails{} 476 477 if runtime.GOOS != "darwin" { 478 return allResults, nil 479 } 480 481 for _, pyPath := range pythonDirectoriesDarwin { 482 483 fileList, err := afs.ReadDir(pyPath) 484 if err != nil { 485 if !os.IsNotExist(err) { 486 log.Warn().Err(err).Str("dir", pyPath).Msg("unable to read directory") 487 } 488 continue 489 } 490 491 for _, aFile := range fileList { 492 // want to not double-search the case where the files look like: 493 // 3.9 494 // Current -> 3.9 495 // FIXME: doesn't work with AFS (we actually want an Lstat() call here) 496 // fStat, err := afs.Stat(filepath.Join(pyPath, aFile.Name())) 497 // if err != nil { 498 // log.Warn().Err(err).Str("file", aFile.Name()).Msg("error trying to stat file") 499 // continue 500 // } 501 // if fStat.Mode()&os.ModeSymlink != 0 { 502 // // ignore symlinks (basically the Current -> 3.9 symlink) so that 503 // // we don't process the same set of packages twice 504 // continue 505 // } 506 if aFile.Name() == "Current" { 507 continue 508 } 509 510 pythonPackagePath := filepath.Join(pyPath, aFile.Name(), "lib") 511 fileList, err := afs.ReadDir(pythonPackagePath) 512 if err != nil { 513 log.Warn().Err(err).Str("path", pythonPackagePath).Msg("failed to read directory") 514 continue 515 } 516 for _, oneFile := range fileList { 517 // if we run into a directory name that starts with "python" 518 // then we have a candidate to search through 519 match, err := filepath.Match("python*", oneFile.Name()) 520 if err != nil { 521 log.Error().Err(err).Msg("unexpected error while checking for python file pattern") 522 continue 523 } 524 if match { 525 matchedPath := filepath.Join(pythonPackagePath, oneFile.Name()) 526 log.Debug().Str("filepath", matchedPath).Msg("found matching python path") 527 results := searchForPythonPackages(afs, matchedPath) 528 allResults = append(allResults, results...) 529 } 530 } 531 } 532 } 533 return allResults, nil 534 }