go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/providers/os/resources/python.go (about)

     1  // Copyright (c) Mondoo, Inc.
     2  // SPDX-License-Identifier: BUSL-1.1
     3  
     4  package resources
     5  
     6  import (
     7  	"bufio"
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"net/textproto"
    12  	"os"
    13  	"path/filepath"
    14  	"regexp"
    15  	"runtime"
    16  	"strings"
    17  
    18  	"github.com/rs/zerolog/log"
    19  	"github.com/spf13/afero"
    20  	"go.mondoo.com/cnquery/llx"
    21  	"go.mondoo.com/cnquery/providers-sdk/v1/plugin"
    22  	"go.mondoo.com/cnquery/providers/os/connection/shared"
    23  	"go.mondoo.com/cnquery/types"
    24  )
    25  
    26  type pythonDirectory struct {
    27  	path   string
    28  	addLib bool
    29  }
    30  
    31  var pythonDirectories = []pythonDirectory{
    32  	{
    33  		path: "/usr/local/lib/python*",
    34  	},
    35  	{
    36  		path: "/usr/local/lib64/python*",
    37  	},
    38  	{
    39  		path: "/usr/lib/python*",
    40  	},
    41  	{
    42  		path: "/usr/lib64/python*",
    43  	},
    44  	{
    45  		path: "/opt/homebrew/lib/python*",
    46  	},
    47  	{
    48  		// surprisingly, this is handled in a case-sensitive way in go (the filepath.Match() glob/pattern matching)
    49  		path: "C:/Python*",
    50  		// true because in Windows the 'site-packages' dir lives in a path like:
    51  		// C:\Python3.11\Lib\site-packages
    52  		addLib: true,
    53  	},
    54  }
    55  
    56  var pythonDirectoriesDarwin = []string{
    57  	"/System/Library/Frameworks/Python.framework/Versions",
    58  	"/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions",
    59  }
    60  
    61  func initPython(runtime *plugin.Runtime, args map[string]*llx.RawData) (map[string]*llx.RawData, plugin.Resource, error) {
    62  	if x, ok := args["path"]; ok {
    63  		_, ok := x.Value.(string)
    64  		if !ok {
    65  			return nil, nil, errors.New("Wrong type for 'path' in python initialization, it must be a string")
    66  		}
    67  	} else {
    68  		// empty path means search through default locations
    69  		args["path"] = llx.StringData("")
    70  	}
    71  
    72  	return args, nil, nil
    73  }
    74  
    75  func (k *mqlPython) id() (string, error) {
    76  	return "python", nil
    77  }
    78  
    79  func (k *mqlPython) getAllPackages() ([]pythonPackageDetails, error) {
    80  	allResults := []pythonPackageDetails{}
    81  
    82  	conn, ok := k.MqlRuntime.Connection.(shared.Connection)
    83  	if !ok {
    84  		return nil, fmt.Errorf("provider is not an operating system provider")
    85  	}
    86  	afs := &afero.Afero{Fs: conn.FileSystem()}
    87  
    88  	if k.Path.Error != nil {
    89  		return nil, k.Path.Error
    90  	}
    91  	pyPath := k.Path.Data
    92  	if pyPath != "" {
    93  		// only search the specific path provided (if it was provided)
    94  		allResults = gatherPackages(afs, pyPath)
    95  	} else {
    96  		// search through default locations
    97  		searchFunctions := []func(*afero.Afero) ([]pythonPackageDetails, error){
    98  			genericSearch,
    99  			darwinSearch,
   100  		}
   101  
   102  		for _, sFunc := range searchFunctions {
   103  			results, err := sFunc(afs)
   104  			if err != nil {
   105  				log.Error().Err(err).Msg("error while searching for python packages")
   106  				return nil, err
   107  			}
   108  			allResults = append(allResults, results...)
   109  		}
   110  	}
   111  
   112  	return allResults, nil
   113  }
   114  
   115  func (k *mqlPython) packages() ([]interface{}, error) {
   116  	allPyPkgDetails, err := k.getAllPackages()
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  
   121  	// this is the "global" map so that the recursive function calls can keep track of
   122  	// resources already created
   123  	pythonPackageResourceMap := map[string]plugin.Resource{}
   124  
   125  	resp := []interface{}{}
   126  
   127  	for _, pyPkgDetails := range allPyPkgDetails {
   128  		res, err := pythonPackageDetailsWithDependenciesToResource(k.MqlRuntime, pyPkgDetails, allPyPkgDetails, pythonPackageResourceMap)
   129  		if err != nil {
   130  			log.Error().Err(err).Msg("error while creating resource(s) for python package")
   131  			// we will keep trying to make resources even if a single one failed
   132  			continue
   133  		}
   134  		resp = append(resp, res)
   135  	}
   136  
   137  	return resp, nil
   138  }
   139  
   140  func pythonPackageDetailsWithDependenciesToResource(runtime *plugin.Runtime, newPyPkgDetails pythonPackageDetails,
   141  	pythonPgkDetailsList []pythonPackageDetails, pythonPackageResourceMap map[string]plugin.Resource,
   142  ) (interface{}, error) {
   143  	res := pythonPackageResourceMap[newPyPkgDetails.name]
   144  	if res != nil {
   145  		// already created the pythonPackage resource
   146  		return res, nil
   147  	}
   148  
   149  	dependencies := []interface{}{}
   150  	for _, dep := range newPyPkgDetails.dependencies {
   151  		found := false
   152  		var depPyPkgDetails pythonPackageDetails
   153  		for i, pyPkgDetails := range pythonPgkDetailsList {
   154  			if pyPkgDetails.name == dep {
   155  				depPyPkgDetails = pythonPgkDetailsList[i]
   156  				found = true
   157  				break
   158  			}
   159  		}
   160  		if !found {
   161  			// can't create a resource for something we didn't discover ¯\_(ツ)_/¯
   162  			continue
   163  		}
   164  		res, err := pythonPackageDetailsWithDependenciesToResource(runtime, depPyPkgDetails, pythonPgkDetailsList, pythonPackageResourceMap)
   165  		if err != nil {
   166  			log.Warn().Err(err).Msg("failed to create python packag resource")
   167  			continue
   168  		}
   169  		dependencies = append(dependencies, res)
   170  	}
   171  
   172  	// finally create the resource
   173  	r, err := pythonPackageDetailsToResource(runtime, newPyPkgDetails, dependencies)
   174  	if err != nil {
   175  		log.Error().Err(err).Str("resource", newPyPkgDetails.file).Msg("error while creating MQL resource")
   176  		return nil, err
   177  	}
   178  
   179  	pythonPackageResourceMap[newPyPkgDetails.name] = r
   180  
   181  	return r, nil
   182  }
   183  
   184  func pythonPackageDetailsToResource(runtime *plugin.Runtime, ppd pythonPackageDetails, dependencies []interface{}) (plugin.Resource, error) {
   185  	f, err := CreateResource(runtime, "file", map[string]*llx.RawData{
   186  		"path": llx.StringData(ppd.file),
   187  	})
   188  	if err != nil {
   189  		log.Error().Err(err).Msg("error while creating file resource for python package resource")
   190  		return nil, err
   191  	}
   192  
   193  	r, err := CreateResource(runtime, "python.package", map[string]*llx.RawData{
   194  		"id":           llx.StringData(ppd.file),
   195  		"name":         llx.StringData(ppd.name),
   196  		"version":      llx.StringData(ppd.version),
   197  		"author":       llx.StringData(ppd.author),
   198  		"summary":      llx.StringData(ppd.summary),
   199  		"license":      llx.StringData(ppd.license),
   200  		"file":         llx.ResourceData(f, f.MqlName()),
   201  		"dependencies": llx.ArrayData(dependencies, types.Any),
   202  	})
   203  	if err != nil {
   204  		log.Error().AnErr("err", err).Msg("error while creating MQL resource")
   205  		return nil, err
   206  	}
   207  	return r, nil
   208  }
   209  
   210  func (k *mqlPython) toplevel() ([]interface{}, error) {
   211  	allPyPkgDetails, err := k.getAllPackages()
   212  	if err != nil {
   213  		return nil, err
   214  	}
   215  
   216  	// this is the "global" map so that the recursive function calls can keep track of
   217  	// resources already created
   218  	pythonPackageResourceMap := map[string]plugin.Resource{}
   219  
   220  	resp := []interface{}{}
   221  
   222  	for _, pyPkgDetails := range allPyPkgDetails {
   223  		if !pyPkgDetails.isLeaf {
   224  			continue
   225  		}
   226  
   227  		res, err := pythonPackageDetailsWithDependenciesToResource(k.MqlRuntime, pyPkgDetails, allPyPkgDetails, pythonPackageResourceMap)
   228  		if err != nil {
   229  			log.Error().Err(err).Msg("error while creating resource(s) for python package")
   230  			// we will keep trying to make resources even if a single one failed
   231  			continue
   232  		}
   233  		resp = append(resp, res)
   234  	}
   235  
   236  	return resp, nil
   237  }
   238  
   239  type pythonPackageDetails struct {
   240  	name         string
   241  	file         string
   242  	license      string
   243  	author       string
   244  	summary      string
   245  	version      string
   246  	dependencies []string
   247  	isLeaf       bool
   248  }
   249  
   250  func gatherPackages(afs *afero.Afero, pythonPackagePath string) (allResults []pythonPackageDetails) {
   251  	fileList, err := afs.ReadDir(pythonPackagePath)
   252  	if err != nil {
   253  		if !os.IsNotExist(err) {
   254  			log.Warn().Err(err).Str("dir", pythonPackagePath).Msg("unable to open directory")
   255  		}
   256  		return
   257  	}
   258  	for _, dEntry := range fileList {
   259  		// only process files/directories that might acctually contain
   260  		// the data we're looking for
   261  		if !strings.HasSuffix(dEntry.Name(), ".dist-info") &&
   262  			!strings.HasSuffix(dEntry.Name(), ".egg-info") {
   263  			continue
   264  		}
   265  
   266  		// There is the possibility that the .egg-info entry is a file
   267  		// (not a directory) that we can directly process.
   268  		packagePayload := dEntry.Name()
   269  
   270  		// requestedPackage just marks whether we found the empty REQUESTED file
   271  		// to indicate a child/leaf package
   272  		requestedPackage := false
   273  
   274  		requiresTxtPath := ""
   275  
   276  		// in the event the directory entry is itself another directory
   277  		// go into each directory looking for our parsable payload
   278  		// (ie. METADATA and PKG-INFO files)
   279  		if dEntry.IsDir() {
   280  			pythonPackageDir := filepath.Join(pythonPackagePath, packagePayload)
   281  			packageDirFiles, err := afs.ReadDir(pythonPackageDir)
   282  			if err != nil {
   283  				log.Warn().Err(err).Str("dir", pythonPackageDir).Msg("error while walking through files in directory")
   284  				return
   285  			}
   286  
   287  			foundMeta := false
   288  			for _, packageFile := range packageDirFiles {
   289  				if packageFile.Name() == "METADATA" || packageFile.Name() == "PKG-INFO" {
   290  					// use the METADATA / PKG-INFO file as our source of python package info
   291  					packagePayload = filepath.Join(dEntry.Name(), packageFile.Name())
   292  					foundMeta = true
   293  				}
   294  				if packageFile.Name() == "REQUESTED" {
   295  					requestedPackage = true
   296  				}
   297  				if packageFile.Name() == "requires.txt" {
   298  					requiresTxtPath = filepath.Join(dEntry.Name(), packageFile.Name())
   299  				}
   300  			}
   301  			if !foundMeta {
   302  				// nothing to process (happens when we've traversed a directory
   303  				// containing the actual python source files)
   304  				continue
   305  			}
   306  
   307  		}
   308  
   309  		pythonPackageFilepath := filepath.Join(pythonPackagePath, packagePayload)
   310  		ppd, err := parseMIME(afs, pythonPackageFilepath)
   311  		if err != nil {
   312  			continue
   313  		}
   314  		ppd.isLeaf = requestedPackage
   315  
   316  		// if the MIME data didn't include dependency information, but there was a requires.txt file available,
   317  		// then use that for dependency info (as pip appears to do)
   318  		if len(ppd.dependencies) == 0 && requiresTxtPath != "" {
   319  			requiresTxtDeps, err := parseRequiresTxtDependencies(afs, filepath.Join(pythonPackagePath, requiresTxtPath))
   320  			if err != nil {
   321  				log.Warn().Err(err).Str("dir", pythonPackageFilepath).Msg("failed to parse requires.txt")
   322  			} else {
   323  				ppd.dependencies = requiresTxtDeps
   324  			}
   325  		}
   326  
   327  		allResults = append(allResults, *ppd)
   328  	}
   329  
   330  	return
   331  }
   332  
   333  func searchForPythonPackages(afs *afero.Afero, path string) []pythonPackageDetails {
   334  	allResults := []pythonPackageDetails{}
   335  
   336  	packageDirs := []string{"site-packages", "dist-packages"}
   337  	for _, packageDir := range packageDirs {
   338  		pythonPackageDir := filepath.Join(path, packageDir)
   339  		allResults = append(allResults, gatherPackages(afs, pythonPackageDir)...)
   340  	}
   341  
   342  	return allResults
   343  }
   344  
   345  // firstWordRegexp is just trying to catch everything leading up the >, >=, = in a requires.txt
   346  // Example:
   347  //
   348  // nose>=1.2
   349  // Mock>=1.0
   350  // pycryptodome
   351  //
   352  // [crypto]
   353  // pycryptopp>=0.5.12
   354  //
   355  // [cryptography]
   356  // cryptography
   357  //
   358  // would match nose / Mock / pycrptodome / etc
   359  
   360  var firstWordRegexp = regexp.MustCompile(`^[a-zA-Z0-9\._-]*`)
   361  
   362  func parseRequiresTxtDependencies(afs *afero.Afero, requiresTxtPath string) ([]string, error) {
   363  	f, err := afs.Open(requiresTxtPath)
   364  	if err != nil {
   365  		return nil, err
   366  	}
   367  	defer f.Close()
   368  
   369  	fileScanner := bufio.NewScanner(f)
   370  	fileScanner.Split(bufio.ScanLines)
   371  
   372  	depdendencies := []string{}
   373  	for fileScanner.Scan() {
   374  		line := fileScanner.Text()
   375  		if strings.HasPrefix(line, "[") {
   376  			// this means a new optional section of dependencies
   377  			// so stop processing
   378  			break
   379  		}
   380  		matched := firstWordRegexp.FindString(line)
   381  		if matched == "" {
   382  			continue
   383  		}
   384  		depdendencies = append(depdendencies, matched)
   385  	}
   386  
   387  	return depdendencies, nil
   388  }
   389  
   390  func parseMIME(afs *afero.Afero, pythonMIMEFilepath string) (*pythonPackageDetails, error) {
   391  	f, err := afs.Open(pythonMIMEFilepath)
   392  	if err != nil {
   393  		log.Warn().Err(err).Msg("error opening python metadata file")
   394  		return nil, err
   395  	}
   396  	defer f.Close()
   397  
   398  	textReader := textproto.NewReader(bufio.NewReader(f))
   399  	mimeData, err := textReader.ReadMIMEHeader()
   400  	if err != nil && err != io.EOF {
   401  		return nil, fmt.Errorf("error reading MIME data: %s", err)
   402  	}
   403  
   404  	deps := extractMimeDeps(mimeData.Values("Requires-Dist"))
   405  
   406  	return &pythonPackageDetails{
   407  		name:         mimeData.Get("Name"),
   408  		summary:      mimeData.Get("Summary"),
   409  		author:       mimeData.Get("Author"),
   410  		license:      mimeData.Get("License"),
   411  		version:      mimeData.Get("Version"),
   412  		dependencies: deps,
   413  		file:         pythonMIMEFilepath,
   414  	}, nil
   415  }
   416  
   417  // extractMimeDeps will go through each of the listed dependencies
   418  // from the "Requires-Dist" values, and strip off everything but
   419  // the name of the package/dependency itself
   420  func extractMimeDeps(deps []string) []string {
   421  	parsedDeps := []string{}
   422  	for _, dep := range deps {
   423  		// the semicolon indicates an optional dependency
   424  		if strings.Contains(dep, ";") {
   425  			continue
   426  		}
   427  		parsedDep := strings.Split(dep, " ")
   428  		if len(parsedDep) > 0 {
   429  			parsedDeps = append(parsedDeps, parsedDep[0])
   430  		}
   431  	}
   432  	return parsedDeps
   433  }
   434  
   435  func genericSearch(afs *afero.Afero) ([]pythonPackageDetails, error) {
   436  	allResults := []pythonPackageDetails{}
   437  
   438  	// Look through each potential location for the existence of a matching python* directory
   439  	for _, pyDir := range pythonDirectories {
   440  		parentDir := filepath.Dir(pyDir.path)
   441  
   442  		fileList, err := afs.ReadDir(parentDir)
   443  		if err != nil {
   444  			if !os.IsNotExist(err) {
   445  				log.Warn().Err(err).Str("dir", parentDir).Msg("unable to read directory")
   446  			}
   447  			continue
   448  		}
   449  
   450  		for _, dEntry := range fileList {
   451  			base := filepath.Base(pyDir.path)
   452  			matched, err := filepath.Match(base, dEntry.Name())
   453  			if err != nil {
   454  				return nil, err
   455  			}
   456  			if matched {
   457  				matchedPath := filepath.Join(parentDir, dEntry.Name())
   458  				log.Debug().Str("filepath", matchedPath).Msg("found matching python path")
   459  
   460  				if pyDir.addLib {
   461  					matchedPath = filepath.Join(matchedPath, "lib")
   462  				}
   463  
   464  				results := searchForPythonPackages(afs, matchedPath)
   465  				allResults = append(allResults, results...)
   466  			}
   467  		}
   468  	}
   469  	return allResults, nil
   470  }
   471  
   472  // darwinSearch has custom handling for the specific way that darwin
   473  // can structure the paths holding python packages
   474  func darwinSearch(afs *afero.Afero) ([]pythonPackageDetails, error) {
   475  	allResults := []pythonPackageDetails{}
   476  
   477  	if runtime.GOOS != "darwin" {
   478  		return allResults, nil
   479  	}
   480  
   481  	for _, pyPath := range pythonDirectoriesDarwin {
   482  
   483  		fileList, err := afs.ReadDir(pyPath)
   484  		if err != nil {
   485  			if !os.IsNotExist(err) {
   486  				log.Warn().Err(err).Str("dir", pyPath).Msg("unable to read directory")
   487  			}
   488  			continue
   489  		}
   490  
   491  		for _, aFile := range fileList {
   492  			// want to not double-search the case where the files look like:
   493  			// 3.9
   494  			// Current -> 3.9
   495  			// FIXME: doesn't work with AFS (we actually want an Lstat() call here)
   496  			// fStat, err := afs.Stat(filepath.Join(pyPath, aFile.Name()))
   497  			// if err != nil {
   498  			// 	log.Warn().Err(err).Str("file", aFile.Name()).Msg("error trying to stat file")
   499  			// 	continue
   500  			// }
   501  			// if fStat.Mode()&os.ModeSymlink != 0 {
   502  			// 	// ignore symlinks (basically the Current -> 3.9 symlink) so that
   503  			// 	// we don't process the same set of packages twice
   504  			// 	continue
   505  			// }
   506  			if aFile.Name() == "Current" {
   507  				continue
   508  			}
   509  
   510  			pythonPackagePath := filepath.Join(pyPath, aFile.Name(), "lib")
   511  			fileList, err := afs.ReadDir(pythonPackagePath)
   512  			if err != nil {
   513  				log.Warn().Err(err).Str("path", pythonPackagePath).Msg("failed to read directory")
   514  				continue
   515  			}
   516  			for _, oneFile := range fileList {
   517  				// if we run into a directory name that starts with "python"
   518  				// then we have a candidate to search through
   519  				match, err := filepath.Match("python*", oneFile.Name())
   520  				if err != nil {
   521  					log.Error().Err(err).Msg("unexpected error while checking for python file pattern")
   522  					continue
   523  				}
   524  				if match {
   525  					matchedPath := filepath.Join(pythonPackagePath, oneFile.Name())
   526  					log.Debug().Str("filepath", matchedPath).Msg("found matching python path")
   527  					results := searchForPythonPackages(afs, matchedPath)
   528  					allResults = append(allResults, results...)
   529  				}
   530  			}
   531  		}
   532  	}
   533  	return allResults, nil
   534  }