github.com/jfrog/build-info-go@v1.9.26/utils/pythonutils/utils.go (about) 1 package pythonutils 2 3 import ( 4 "errors" 5 "fmt" 6 "net/url" 7 "path/filepath" 8 "regexp" 9 "strings" 10 11 "github.com/jfrog/build-info-go/entities" 12 "github.com/jfrog/build-info-go/utils" 13 "github.com/jfrog/gofrog/io" 14 gofrogcmd "github.com/jfrog/gofrog/io" 15 ) 16 17 const ( 18 Pip PythonTool = "pip" 19 Pipenv PythonTool = "pipenv" 20 Poetry PythonTool = "poetry" 21 22 startDownloadingPattern = `^\s*Downloading\s` 23 downloadingCaptureGroup = `[^\s]*` 24 startUsingCachedPattern = `^\s*Using\scached\s` 25 usingCacheCaptureGroup = `[\S]+` 26 endPattern = `\s\(` 27 ) 28 29 type PythonTool string 30 31 // Parse pythonDependencyPackage list to dependencies map. (mapping dependency to his child deps) 32 // Also returns a list of project's root dependencies 33 func parseDependenciesToGraph(packages []pythonDependencyPackage) (map[string][]string, []string, error) { 34 // Create packages map. 35 packagesMap := map[string][]string{} 36 allSubPackages := map[string]bool{} 37 for _, pkg := range packages { 38 var subPackages []string 39 for _, subPkg := range pkg.Dependencies { 40 subPkgFullName := subPkg.Key + ":" + subPkg.InstalledVersion 41 subPackages = append(subPackages, subPkgFullName) 42 allSubPackages[subPkgFullName] = true 43 } 44 packagesMap[pkg.Package.Key+":"+pkg.Package.InstalledVersion] = subPackages 45 } 46 47 var topLevelPackagesList []string 48 for pkgName := range packagesMap { 49 if !allSubPackages[pkgName] { 50 topLevelPackagesList = append(topLevelPackagesList, pkgName) 51 } 52 } 53 return packagesMap, topLevelPackagesList, nil 54 } 55 56 // Structs for parsing the pip-dependency-map result. 57 type pythonDependencyPackage struct { 58 Package packageType `json:"package,omitempty"` 59 Dependencies []packageType `json:"dependencies,omitempty"` 60 } 61 62 type packageType struct { 63 Key string `json:"key,omitempty"` 64 PackageName string `json:"package_name,omitempty"` 65 InstalledVersion string `json:"installed_version,omitempty"` 66 } 67 68 func GetPythonDependenciesFiles(tool PythonTool, args []string, buildName, buildNumber string, log utils.Log, srcPath string) (map[string]entities.Dependency, error) { 69 switch tool { 70 case Pip, Pipenv: 71 return InstallWithLogParsing(tool, args, log, srcPath) 72 case Poetry: 73 if buildName != "" && buildNumber != "" { 74 log.Warn("Poetry commands are not supporting collecting dependencies files") 75 } 76 return make(map[string]entities.Dependency), nil 77 default: 78 return nil, errors.New(string(tool) + " commands are not supported.") 79 } 80 } 81 82 func GetPythonDependencies(tool PythonTool, srcPath, localDependenciesPath string) (dependenciesGraph map[string][]string, topLevelDependencies []string, err error) { 83 switch tool { 84 case Pip: 85 return getPipDependencies(srcPath, localDependenciesPath) 86 case Pipenv: 87 return getPipenvDependencies(srcPath) 88 case Poetry: 89 return getPoetryDependencies(srcPath) 90 default: 91 return nil, nil, errors.New(string(tool) + " commands are not supported.") 92 } 93 } 94 95 func GetPackageName(tool PythonTool, srcPath string) (packageName string, err error) { 96 switch tool { 97 case Pip, Pipenv: 98 return getPackageNameFromSetuppy(srcPath) 99 case Poetry: 100 packageName, _, err = getPackageNameFromPyproject(srcPath) 101 return 102 default: 103 return "", errors.New(string(tool) + " commands are not supported.") 104 } 105 } 106 107 // Before running this function, dependency IDs may be the file names of the resolved python packages. 108 // Update build info dependency IDs and the requestedBy field. 109 // allDependencies - Dependency name to Dependency map 110 // dependenciesGraph - Dependency graph as built by 'pipdeptree' or 'pipenv graph' 111 // topLevelPackagesList - The direct dependencies 112 // packageName - The resolved package name of the Python project, may be empty if we couldn't resolve it 113 // moduleName - The input module name from the user, or the packageName 114 func UpdateDepsIdsAndRequestedBy(dependenciesMap map[string]entities.Dependency, dependenciesGraph map[string][]string, topLevelPackagesList []string, packageName, moduleName string) { 115 if packageName == "" { 116 // Projects without setup.py 117 dependenciesGraph[moduleName] = topLevelPackagesList 118 } else if packageName != moduleName { 119 // Projects with setup.py 120 dependenciesGraph[moduleName] = dependenciesGraph[packageName] 121 } 122 rootModule := entities.Dependency{Id: moduleName, RequestedBy: [][]string{{}}} 123 updateDepsIdsAndRequestedBy(rootModule, dependenciesMap, dependenciesGraph) 124 } 125 126 func updateDepsIdsAndRequestedBy(parentDependency entities.Dependency, dependenciesMap map[string]entities.Dependency, dependenciesGraph map[string][]string) { 127 for _, childId := range dependenciesGraph[parentDependency.Id] { 128 childName := childId[0:strings.Index(childId, ":")] 129 if childDep, ok := dependenciesMap[childName]; ok { 130 if childDep.NodeHasLoop() || len(childDep.RequestedBy) >= entities.RequestedByMaxLength { 131 continue 132 } 133 // Update RequestedBy field from parent's RequestedBy. 134 childDep.UpdateRequestedBy(parentDependency.Id, parentDependency.RequestedBy) 135 136 // Set dependency type 137 if childDep.Type == "" { 138 fileType := "" 139 if i := strings.LastIndex(childDep.Id, ".tar."); i != -1 { 140 fileType = childDep.Id[i+1:] 141 } else if i := strings.LastIndex(childDep.Id, "."); i != -1 { 142 fileType = childDep.Id[i+1:] 143 } 144 childDep.Type = fileType 145 } 146 // Convert Id field from filename to dependency id 147 childDep.Id = childId 148 // Reassign map entry with new entry copy 149 dependenciesMap[childName] = childDep 150 // Run recursive call on child dependencies 151 updateDepsIdsAndRequestedBy(childDep, dependenciesMap, dependenciesGraph) 152 } 153 } 154 } 155 156 func getFilePath(srcPath, fileName string) (string, error) { 157 filePath := filepath.Join(srcPath, fileName) 158 // Check if fileName exists. 159 validPath, err := utils.IsFileExists(filePath, false) 160 if err != nil || !validPath { 161 return "", err 162 } 163 return filePath, nil 164 } 165 166 // Create the CmdOutputPattern objects that can capture group content that may span multiple lines for logs that have line size limitations. 167 // Since the log parser parse line by line, we need to create a parser that can capture group content that may span multiple lines. 168 func getMultilineSplitCaptureOutputPattern(startCollectingPattern, captureGroup, endCollectingPattern string, handler func(pattern *gofrogcmd.CmdOutputPattern) (string, error)) (parsers []*gofrogcmd.CmdOutputPattern) { 169 // Prepare regex patterns. 170 oneLineRegex := regexp.MustCompile(startCollectingPattern + `(` + captureGroup + `)` + endCollectingPattern) 171 startCollectionRegexp := regexp.MustCompile(startCollectingPattern) 172 endCollectionRegexp := regexp.MustCompile(endCollectingPattern) 173 174 // Create a parser for single line pattern matches. 175 parsers = append(parsers, &gofrogcmd.CmdOutputPattern{RegExp: oneLineRegex, ExecFunc: handler}) 176 177 // Create a parser for multi line pattern matches. 178 lineBuffer := "" 179 collectingMultiLineValue := false 180 parsers = append(parsers, &gofrogcmd.CmdOutputPattern{RegExp: regexp.MustCompile(".*"), ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { 181 // Check if the line matches the startCollectingPattern. 182 if !collectingMultiLineValue && startCollectionRegexp.MatchString(pattern.Line) { 183 // Start collecting lines. 184 collectingMultiLineValue = true 185 lineBuffer = pattern.Line 186 // We assume that the content is multiline so no need to check end at this point. 187 // Single line will be handled and matched by the other parser. 188 return pattern.Line, nil 189 } 190 if !collectingMultiLineValue { 191 return pattern.Line, nil 192 } 193 // Add the line content to the buffer. 194 lineBuffer += pattern.Line 195 // Check if the line matches the endCollectingPattern. 196 if endCollectionRegexp.MatchString(pattern.Line) { 197 collectingMultiLineValue = false 198 // Simulate a one line content check to make sure we have regex match. 199 if oneLineRegex.MatchString(lineBuffer) { 200 return handler(&gofrogcmd.CmdOutputPattern{Line: pattern.Line, MatchedResults: oneLineRegex.FindStringSubmatch(lineBuffer)}) 201 } 202 } 203 204 return pattern.Line, nil 205 }}) 206 207 return 208 } 209 210 func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, srcPath string) (map[string]entities.Dependency, error) { 211 if tool == Pipenv { 212 // Add verbosity flag to pipenv commands to collect necessary data 213 commandArgs = append(commandArgs, "-v") 214 } 215 installCmd := io.NewCommand(string(tool), "install", commandArgs) 216 installCmd.Dir = srcPath 217 218 dependenciesMap := map[string]entities.Dependency{} 219 parsers := []*gofrogcmd.CmdOutputPattern{} 220 221 var packageName string 222 expectingPackageFilePath := false 223 224 // Extract downloaded package name. 225 parsers = append(parsers, &gofrogcmd.CmdOutputPattern{ 226 RegExp: regexp.MustCompile(`^Collecting\s(\w[\w-.]+)`), 227 ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { 228 // If this pattern matched a second time before downloaded-file-name was found, prompt a message. 229 if expectingPackageFilePath { 230 // This may occur when a package-installation file is saved in pip-cache-dir, thus not being downloaded during the installation. 231 // Re-running pip-install with 'no-cache-dir' fixes this issue. 232 log.Debug(fmt.Sprintf("Could not resolve download path for package: %s, continuing...", packageName)) 233 234 // Save package with empty file path. 235 dependenciesMap[strings.ToLower(packageName)] = entities.Dependency{Id: ""} 236 } 237 238 // Check for out of bound results. 239 if len(pattern.MatchedResults)-1 <= 0 { 240 log.Debug(fmt.Sprintf("Failed extracting package name from line: %s", pattern.Line)) 241 return pattern.Line, nil 242 } 243 244 // Save dependency information. 245 expectingPackageFilePath = true 246 packageName = pattern.MatchedResults[1] 247 248 return pattern.Line, nil 249 }, 250 }) 251 252 saveCaptureGroupAsDependencyInfo := func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { 253 fileName := extractFileNameFromRegexCaptureGroup(pattern) 254 if fileName == "" { 255 log.Debug(fmt.Sprintf("Failed extracting download path from line: %s", pattern.Line)) 256 return pattern.Line, nil 257 } 258 // If this pattern matched before package-name was found, do not collect this path. 259 if !expectingPackageFilePath { 260 log.Debug(fmt.Sprintf("Could not resolve package name for download path: %s , continuing...", packageName)) 261 return pattern.Line, nil 262 } 263 // Save dependency information. 264 dependenciesMap[strings.ToLower(packageName)] = entities.Dependency{Id: fileName} 265 expectingPackageFilePath = false 266 log.Debug(fmt.Sprintf("Found package: %s installed with: %s", packageName, fileName)) 267 return pattern.Line, nil 268 } 269 270 // Extract downloaded file, stored in Artifactory. (value at log may be split into multiple lines) 271 parsers = append(parsers, getMultilineSplitCaptureOutputPattern(startDownloadingPattern, downloadingCaptureGroup, endPattern, saveCaptureGroupAsDependencyInfo)...) 272 // Extract cached file, stored in Artifactory. (value at log may be split into multiple lines) 273 parsers = append(parsers, getMultilineSplitCaptureOutputPattern(startUsingCachedPattern, usingCacheCaptureGroup, endPattern, saveCaptureGroupAsDependencyInfo)...) 274 275 // Extract already installed packages names. 276 parsers = append(parsers, &gofrogcmd.CmdOutputPattern{ 277 RegExp: regexp.MustCompile(`^Requirement\salready\ssatisfied:\s(\w[\w-.]+)`), 278 ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { 279 // Check for out of bound results. 280 if len(pattern.MatchedResults)-1 < 0 { 281 log.Debug(fmt.Sprintf("Failed extracting package name from line: %s", pattern.Line)) 282 return pattern.Line, nil 283 } 284 285 // Save dependency with empty file name. 286 dependenciesMap[strings.ToLower(pattern.MatchedResults[1])] = entities.Dependency{Id: ""} 287 log.Debug(fmt.Sprintf("Found package: %s already installed", pattern.MatchedResults[1])) 288 return pattern.Line, nil 289 }, 290 }) 291 292 // Execute command. 293 _, errorOut, _, err := gofrogcmd.RunCmdWithOutputParser(installCmd, true, parsers...) 294 if err != nil { 295 return nil, fmt.Errorf("failed running %s command with error: '%s - %s'", string(tool), err.Error(), errorOut) 296 } 297 return dependenciesMap, nil 298 } 299 300 func extractFileNameFromRegexCaptureGroup(pattern *gofrogcmd.CmdOutputPattern) (fileName string) { 301 // Check for out of bound results (no captures). 302 if len(pattern.MatchedResults) <= 1 { 303 return "" 304 } 305 // Extract file information from capture group. 306 filePath := pattern.MatchedResults[1] 307 lastSlashIndex := strings.LastIndex(filePath, "/") 308 if lastSlashIndex == -1 { 309 return filePath 310 } 311 lastComponent := filePath[lastSlashIndex+1:] 312 // Unescape the last component, for example 'PyYAML-5.1.2%2Bsp1.tar.gz' -> 'PyYAML-5.1.2+sp1.tar.gz'. 313 unescapedComponent, _ := url.QueryUnescape(lastComponent) 314 if unescapedComponent == "" { 315 // Couldn't escape, will use the raw string 316 return lastComponent 317 } 318 return unescapedComponent 319 }