github.com/jfrog/build-info-go@v1.9.26/utils/pythonutils/utils.go (about)

     1  package pythonutils
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"net/url"
     7  	"path/filepath"
     8  	"regexp"
     9  	"strings"
    10  
    11  	"github.com/jfrog/build-info-go/entities"
    12  	"github.com/jfrog/build-info-go/utils"
    13  	"github.com/jfrog/gofrog/io"
    14  	gofrogcmd "github.com/jfrog/gofrog/io"
    15  )
    16  
    17  const (
    18  	Pip    PythonTool = "pip"
    19  	Pipenv PythonTool = "pipenv"
    20  	Poetry PythonTool = "poetry"
    21  
    22  	startDownloadingPattern = `^\s*Downloading\s`
    23  	downloadingCaptureGroup = `[^\s]*`
    24  	startUsingCachedPattern = `^\s*Using\scached\s`
    25  	usingCacheCaptureGroup  = `[\S]+`
    26  	endPattern              = `\s\(`
    27  )
    28  
    29  type PythonTool string
    30  
    31  // Parse pythonDependencyPackage list to dependencies map. (mapping dependency to his child deps)
    32  // Also returns a list of project's root dependencies
    33  func parseDependenciesToGraph(packages []pythonDependencyPackage) (map[string][]string, []string, error) {
    34  	// Create packages map.
    35  	packagesMap := map[string][]string{}
    36  	allSubPackages := map[string]bool{}
    37  	for _, pkg := range packages {
    38  		var subPackages []string
    39  		for _, subPkg := range pkg.Dependencies {
    40  			subPkgFullName := subPkg.Key + ":" + subPkg.InstalledVersion
    41  			subPackages = append(subPackages, subPkgFullName)
    42  			allSubPackages[subPkgFullName] = true
    43  		}
    44  		packagesMap[pkg.Package.Key+":"+pkg.Package.InstalledVersion] = subPackages
    45  	}
    46  
    47  	var topLevelPackagesList []string
    48  	for pkgName := range packagesMap {
    49  		if !allSubPackages[pkgName] {
    50  			topLevelPackagesList = append(topLevelPackagesList, pkgName)
    51  		}
    52  	}
    53  	return packagesMap, topLevelPackagesList, nil
    54  }
    55  
    56  // Structs for parsing the pip-dependency-map result.
    57  type pythonDependencyPackage struct {
    58  	Package      packageType   `json:"package,omitempty"`
    59  	Dependencies []packageType `json:"dependencies,omitempty"`
    60  }
    61  
    62  type packageType struct {
    63  	Key              string `json:"key,omitempty"`
    64  	PackageName      string `json:"package_name,omitempty"`
    65  	InstalledVersion string `json:"installed_version,omitempty"`
    66  }
    67  
    68  func GetPythonDependenciesFiles(tool PythonTool, args []string, buildName, buildNumber string, log utils.Log, srcPath string) (map[string]entities.Dependency, error) {
    69  	switch tool {
    70  	case Pip, Pipenv:
    71  		return InstallWithLogParsing(tool, args, log, srcPath)
    72  	case Poetry:
    73  		if buildName != "" && buildNumber != "" {
    74  			log.Warn("Poetry commands are not supporting collecting dependencies files")
    75  		}
    76  		return make(map[string]entities.Dependency), nil
    77  	default:
    78  		return nil, errors.New(string(tool) + " commands are not supported.")
    79  	}
    80  }
    81  
    82  func GetPythonDependencies(tool PythonTool, srcPath, localDependenciesPath string) (dependenciesGraph map[string][]string, topLevelDependencies []string, err error) {
    83  	switch tool {
    84  	case Pip:
    85  		return getPipDependencies(srcPath, localDependenciesPath)
    86  	case Pipenv:
    87  		return getPipenvDependencies(srcPath)
    88  	case Poetry:
    89  		return getPoetryDependencies(srcPath)
    90  	default:
    91  		return nil, nil, errors.New(string(tool) + " commands are not supported.")
    92  	}
    93  }
    94  
    95  func GetPackageName(tool PythonTool, srcPath string) (packageName string, err error) {
    96  	switch tool {
    97  	case Pip, Pipenv:
    98  		return getPackageNameFromSetuppy(srcPath)
    99  	case Poetry:
   100  		packageName, _, err = getPackageNameFromPyproject(srcPath)
   101  		return
   102  	default:
   103  		return "", errors.New(string(tool) + " commands are not supported.")
   104  	}
   105  }
   106  
   107  // Before running this function, dependency IDs may be the file names of the resolved python packages.
   108  // Update build info dependency IDs and the requestedBy field.
   109  // allDependencies      - Dependency name to Dependency map
   110  // dependenciesGraph    - Dependency graph as built by 'pipdeptree' or 'pipenv graph'
   111  // topLevelPackagesList - The direct dependencies
   112  // packageName          - The resolved package name of the Python project, may be empty if we couldn't resolve it
   113  // moduleName           - The input module name from the user, or the packageName
   114  func UpdateDepsIdsAndRequestedBy(dependenciesMap map[string]entities.Dependency, dependenciesGraph map[string][]string, topLevelPackagesList []string, packageName, moduleName string) {
   115  	if packageName == "" {
   116  		// Projects without setup.py
   117  		dependenciesGraph[moduleName] = topLevelPackagesList
   118  	} else if packageName != moduleName {
   119  		// Projects with setup.py
   120  		dependenciesGraph[moduleName] = dependenciesGraph[packageName]
   121  	}
   122  	rootModule := entities.Dependency{Id: moduleName, RequestedBy: [][]string{{}}}
   123  	updateDepsIdsAndRequestedBy(rootModule, dependenciesMap, dependenciesGraph)
   124  }
   125  
   126  func updateDepsIdsAndRequestedBy(parentDependency entities.Dependency, dependenciesMap map[string]entities.Dependency, dependenciesGraph map[string][]string) {
   127  	for _, childId := range dependenciesGraph[parentDependency.Id] {
   128  		childName := childId[0:strings.Index(childId, ":")]
   129  		if childDep, ok := dependenciesMap[childName]; ok {
   130  			if childDep.NodeHasLoop() || len(childDep.RequestedBy) >= entities.RequestedByMaxLength {
   131  				continue
   132  			}
   133  			// Update RequestedBy field from parent's RequestedBy.
   134  			childDep.UpdateRequestedBy(parentDependency.Id, parentDependency.RequestedBy)
   135  
   136  			// Set dependency type
   137  			if childDep.Type == "" {
   138  				fileType := ""
   139  				if i := strings.LastIndex(childDep.Id, ".tar."); i != -1 {
   140  					fileType = childDep.Id[i+1:]
   141  				} else if i := strings.LastIndex(childDep.Id, "."); i != -1 {
   142  					fileType = childDep.Id[i+1:]
   143  				}
   144  				childDep.Type = fileType
   145  			}
   146  			// Convert Id field from filename to dependency id
   147  			childDep.Id = childId
   148  			// Reassign map entry with new entry copy
   149  			dependenciesMap[childName] = childDep
   150  			// Run recursive call on child dependencies
   151  			updateDepsIdsAndRequestedBy(childDep, dependenciesMap, dependenciesGraph)
   152  		}
   153  	}
   154  }
   155  
   156  func getFilePath(srcPath, fileName string) (string, error) {
   157  	filePath := filepath.Join(srcPath, fileName)
   158  	// Check if fileName exists.
   159  	validPath, err := utils.IsFileExists(filePath, false)
   160  	if err != nil || !validPath {
   161  		return "", err
   162  	}
   163  	return filePath, nil
   164  }
   165  
   166  // Create the CmdOutputPattern objects that can capture group content that may span multiple lines for logs that have line size limitations.
   167  // Since the log parser parse line by line, we need to create a parser that can capture group content that may span multiple lines.
   168  func getMultilineSplitCaptureOutputPattern(startCollectingPattern, captureGroup, endCollectingPattern string, handler func(pattern *gofrogcmd.CmdOutputPattern) (string, error)) (parsers []*gofrogcmd.CmdOutputPattern) {
   169  	// Prepare regex patterns.
   170  	oneLineRegex := regexp.MustCompile(startCollectingPattern + `(` + captureGroup + `)` + endCollectingPattern)
   171  	startCollectionRegexp := regexp.MustCompile(startCollectingPattern)
   172  	endCollectionRegexp := regexp.MustCompile(endCollectingPattern)
   173  
   174  	// Create a parser for single line pattern matches.
   175  	parsers = append(parsers, &gofrogcmd.CmdOutputPattern{RegExp: oneLineRegex, ExecFunc: handler})
   176  
   177  	// Create a parser for multi line pattern matches.
   178  	lineBuffer := ""
   179  	collectingMultiLineValue := false
   180  	parsers = append(parsers, &gofrogcmd.CmdOutputPattern{RegExp: regexp.MustCompile(".*"), ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) {
   181  		// Check if the line matches the startCollectingPattern.
   182  		if !collectingMultiLineValue && startCollectionRegexp.MatchString(pattern.Line) {
   183  			// Start collecting lines.
   184  			collectingMultiLineValue = true
   185  			lineBuffer = pattern.Line
   186  			// We assume that the content is multiline so no need to check end at this point.
   187  			// Single line will be handled and matched by the other parser.
   188  			return pattern.Line, nil
   189  		}
   190  		if !collectingMultiLineValue {
   191  			return pattern.Line, nil
   192  		}
   193  		// Add the line content to the buffer.
   194  		lineBuffer += pattern.Line
   195  		// Check if the line matches the endCollectingPattern.
   196  		if endCollectionRegexp.MatchString(pattern.Line) {
   197  			collectingMultiLineValue = false
   198  			// Simulate a one line content check to make sure we have regex match.
   199  			if oneLineRegex.MatchString(lineBuffer) {
   200  				return handler(&gofrogcmd.CmdOutputPattern{Line: pattern.Line, MatchedResults: oneLineRegex.FindStringSubmatch(lineBuffer)})
   201  			}
   202  		}
   203  
   204  		return pattern.Line, nil
   205  	}})
   206  
   207  	return
   208  }
   209  
   210  func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, srcPath string) (map[string]entities.Dependency, error) {
   211  	if tool == Pipenv {
   212  		// Add verbosity flag to pipenv commands to collect necessary data
   213  		commandArgs = append(commandArgs, "-v")
   214  	}
   215  	installCmd := io.NewCommand(string(tool), "install", commandArgs)
   216  	installCmd.Dir = srcPath
   217  
   218  	dependenciesMap := map[string]entities.Dependency{}
   219  	parsers := []*gofrogcmd.CmdOutputPattern{}
   220  
   221  	var packageName string
   222  	expectingPackageFilePath := false
   223  
   224  	// Extract downloaded package name.
   225  	parsers = append(parsers, &gofrogcmd.CmdOutputPattern{
   226  		RegExp: regexp.MustCompile(`^Collecting\s(\w[\w-.]+)`),
   227  		ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) {
   228  			// If this pattern matched a second time before downloaded-file-name was found, prompt a message.
   229  			if expectingPackageFilePath {
   230  				// This may occur when a package-installation file is saved in pip-cache-dir, thus not being downloaded during the installation.
   231  				// Re-running pip-install with 'no-cache-dir' fixes this issue.
   232  				log.Debug(fmt.Sprintf("Could not resolve download path for package: %s, continuing...", packageName))
   233  
   234  				// Save package with empty file path.
   235  				dependenciesMap[strings.ToLower(packageName)] = entities.Dependency{Id: ""}
   236  			}
   237  
   238  			// Check for out of bound results.
   239  			if len(pattern.MatchedResults)-1 <= 0 {
   240  				log.Debug(fmt.Sprintf("Failed extracting package name from line: %s", pattern.Line))
   241  				return pattern.Line, nil
   242  			}
   243  
   244  			// Save dependency information.
   245  			expectingPackageFilePath = true
   246  			packageName = pattern.MatchedResults[1]
   247  
   248  			return pattern.Line, nil
   249  		},
   250  	})
   251  
   252  	saveCaptureGroupAsDependencyInfo := func(pattern *gofrogcmd.CmdOutputPattern) (string, error) {
   253  		fileName := extractFileNameFromRegexCaptureGroup(pattern)
   254  		if fileName == "" {
   255  			log.Debug(fmt.Sprintf("Failed extracting download path from line: %s", pattern.Line))
   256  			return pattern.Line, nil
   257  		}
   258  		// If this pattern matched before package-name was found, do not collect this path.
   259  		if !expectingPackageFilePath {
   260  			log.Debug(fmt.Sprintf("Could not resolve package name for download path: %s , continuing...", packageName))
   261  			return pattern.Line, nil
   262  		}
   263  		// Save dependency information.
   264  		dependenciesMap[strings.ToLower(packageName)] = entities.Dependency{Id: fileName}
   265  		expectingPackageFilePath = false
   266  		log.Debug(fmt.Sprintf("Found package: %s installed with: %s", packageName, fileName))
   267  		return pattern.Line, nil
   268  	}
   269  
   270  	// Extract downloaded file, stored in Artifactory. (value at log may be split into multiple lines)
   271  	parsers = append(parsers, getMultilineSplitCaptureOutputPattern(startDownloadingPattern, downloadingCaptureGroup, endPattern, saveCaptureGroupAsDependencyInfo)...)
   272  	// Extract cached file, stored in Artifactory. (value at log may be split into multiple lines)
   273  	parsers = append(parsers, getMultilineSplitCaptureOutputPattern(startUsingCachedPattern, usingCacheCaptureGroup, endPattern, saveCaptureGroupAsDependencyInfo)...)
   274  
   275  	// Extract already installed packages names.
   276  	parsers = append(parsers, &gofrogcmd.CmdOutputPattern{
   277  		RegExp: regexp.MustCompile(`^Requirement\salready\ssatisfied:\s(\w[\w-.]+)`),
   278  		ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) {
   279  			// Check for out of bound results.
   280  			if len(pattern.MatchedResults)-1 < 0 {
   281  				log.Debug(fmt.Sprintf("Failed extracting package name from line: %s", pattern.Line))
   282  				return pattern.Line, nil
   283  			}
   284  
   285  			// Save dependency with empty file name.
   286  			dependenciesMap[strings.ToLower(pattern.MatchedResults[1])] = entities.Dependency{Id: ""}
   287  			log.Debug(fmt.Sprintf("Found package: %s already installed", pattern.MatchedResults[1]))
   288  			return pattern.Line, nil
   289  		},
   290  	})
   291  
   292  	// Execute command.
   293  	_, errorOut, _, err := gofrogcmd.RunCmdWithOutputParser(installCmd, true, parsers...)
   294  	if err != nil {
   295  		return nil, fmt.Errorf("failed running %s command with error: '%s - %s'", string(tool), err.Error(), errorOut)
   296  	}
   297  	return dependenciesMap, nil
   298  }
   299  
   300  func extractFileNameFromRegexCaptureGroup(pattern *gofrogcmd.CmdOutputPattern) (fileName string) {
   301  	// Check for out of bound results (no captures).
   302  	if len(pattern.MatchedResults) <= 1 {
   303  		return ""
   304  	}
   305  	// Extract file information from capture group.
   306  	filePath := pattern.MatchedResults[1]
   307  	lastSlashIndex := strings.LastIndex(filePath, "/")
   308  	if lastSlashIndex == -1 {
   309  		return filePath
   310  	}
   311  	lastComponent := filePath[lastSlashIndex+1:]
   312  	// Unescape the last component, for example 'PyYAML-5.1.2%2Bsp1.tar.gz' -> 'PyYAML-5.1.2+sp1.tar.gz'.
   313  	unescapedComponent, _ := url.QueryUnescape(lastComponent)
   314  	if unescapedComponent == "" {
   315  		// Couldn't escape, will use the raw string
   316  		return lastComponent
   317  	}
   318  	return unescapedComponent
   319  }