github.com/vchain-us/vcn@v0.9.11-0.20210921212052-a2484d23c0b3/pkg/bom/python/pip.go (about) 1 /* 2 * Copyright (c) 2021 CodeNotary, Inc. All Rights Reserved. 3 * This software is released under GPL3. 4 * The full license information can be found under: 5 * https://www.gnu.org/licenses/gpl-3.0.en.html 6 * 7 */ 8 9 package python 10 11 import ( 12 "bufio" 13 "bytes" 14 "errors" 15 "fmt" 16 "io/ioutil" 17 "os/exec" 18 "path/filepath" 19 "strings" 20 21 "github.com/schollz/progressbar/v3" 22 23 "github.com/vchain-us/vcn/pkg/bom/artifact" 24 ) 25 26 // pythonArtifactFromPip implements Artifact interface 27 type pythonArtifactFromPip struct { 28 pythonArtifact 29 } 30 31 type module struct { 32 needed bool 33 version string 34 } 35 36 type task struct { 37 name string 38 version string 39 } 40 type result struct { 41 name string 42 hash string 43 hashType artifact.HashType 44 deps []string 45 license string 46 version string 47 err error 48 } 49 50 var installArgs = []string{"-m", "pip", "install", "-r"} 51 var moduleListArgs = []string{"-m", "pip", "list", "-v"} 52 var moduleDetailsArgs = []string{"-m", "pip", "show"} 53 54 // Dependencies returns list of Python dependencies for the artifact 55 // collect info about all installed modules, find module relations, populate module graph and then recursively 56 // select only the needed modules, using content of 'requirements.txt' as a starting point 57 func (a *pythonArtifactFromPip) ResolveDependencies(output artifact.OutputOptions) ([]artifact.Dependency, error) { 58 if a.Deps != nil { 59 return a.Deps, nil 60 } 61 62 // install all required dependencies 63 // first try "python", if it fails, try "python3" 64 pythonExe := "python" 65 reqFile := filepath.Join(a.path, pipFileName) 66 cmdArgs := append(installArgs, reqFile) 67 err := exec.Command(pythonExe, cmdArgs...).Run() 68 if err != nil { 69 pythonExe = "python3" 70 _, err = exec.Command(pythonExe, cmdArgs...).Output() 71 if err != nil { 72 exit, ok := err.(*exec.ExitError) 73 if ok && len(exit.Stderr) > 0 { 74 return nil, fmt.Errorf("cannot install python modules:\n%s", string(exit.Stderr)) 75 } 76 return nil, fmt.Errorf("cannot install python modules: %w", err) 77 } 78 } 79 80 // collect info about all cached modules 81 buf, err := exec.Command(pythonExe, moduleListArgs...).Output() 82 if err != nil { 83 exit, ok := err.(*exec.ExitError) 84 if ok && len(exit.Stderr) > 0 { 85 return nil, fmt.Errorf("cannot get python module list:\n%s", string(exit.Stderr)) 86 } 87 return nil, fmt.Errorf("cannot get python module list: %w", err) 88 } 89 90 // output has two header lines, and then entries of the format "<package> <version> <location> <installer>" 91 scanner := bufio.NewScanner(bytes.NewReader(buf)) 92 scanner.Split(bufio.ScanLines) 93 for i := 0; i < 2; { 94 if !scanner.Scan() { 95 return nil, errors.New("got unexpected result to pip module list request") 96 } 97 text := scanner.Text() 98 if text[0] == '#' { 99 // skip possible Python warnings - lines starting with # 100 continue 101 } 102 i++ 103 } 104 105 // store all known modules 106 moduleGraph := make(map[string]*module) 107 for scanner.Scan() { 108 text := scanner.Text() 109 if text[0] == '#' { 110 // skip possible Python warnings - lines starting with # 111 continue 112 } 113 fields := strings.Fields(text) 114 moduleGraph[fields[0]] = &module{version: fields[1]} 115 } 116 117 // first process root dependencies from requirements.txt 118 // then for every dependency process its dependencies 119 buf, err = ioutil.ReadFile(reqFile) 120 if err != nil { 121 return nil, err 122 } 123 124 var bar *progressbar.ProgressBar 125 if output == artifact.Progress { 126 bar = progressbar.Default(int64(len(moduleGraph))) 127 } 128 129 // init goroutine throttling - channels, start goroutines. 130 // We can be sure that there will be no more in-flight messages in channels than known modules 131 tasks := make(chan task, len(moduleGraph)) 132 results := make(chan result, len(moduleGraph)) 133 for i := 0; i < artifact.MaxGoroutines; i++ { 134 go pipWorker(tasks, results, pythonExe, output, bar) 135 } 136 defer close(results) 137 defer close(tasks) // signal workers to stop 138 139 taskCount := 0 140 141 messages := make([]string, 0) 142 // initial tasks - content of requirements.txt 143 scanner = bufio.NewScanner(bytes.NewReader(buf)) 144 scanner.Split(bufio.ScanLines) 145 for scanner.Scan() { 146 line := scanner.Text() 147 fields := strings.SplitN(line, "#", 1) 148 line = strings.TrimSpace(fields[0]) 149 if line == "" { 150 continue 151 } 152 endPos := strings.IndexAny(line, "=><!") 153 if endPos > 0 { 154 line = line[:endPos] 155 } 156 157 mod, ok := moduleGraph[line] 158 if !ok { 159 name, err := resolveAlias(pythonExe, line) 160 if err != nil { 161 return nil, err 162 } 163 if name == "" { 164 messages = append(messages, "Unknown module "+line+" - ignoring") 165 continue 166 } 167 mod, ok = moduleGraph[name] 168 if !ok { 169 messages = append(messages, "Unknown module "+line+" - ignoring") 170 continue 171 } 172 moduleGraph[line] = mod // add alias entry 173 } 174 175 if mod.needed { 176 continue // already being processed by other name 177 } 178 mod.needed = true 179 180 tasks <- task{name: line, version: mod.version} 181 taskCount++ 182 } 183 184 // get dependencies, run tasks for dependencies, collect info about all used modules 185 res := make([]artifact.Dependency, 0) 186 for done := 0; taskCount == 0 || done < taskCount; done++ { 187 result := <-results 188 if result.err != nil { 189 close(tasks) // signal workers to stop 190 return nil, err 191 } 192 res = append(res, artifact.Dependency{ 193 Name: result.name, 194 Version: result.version, 195 Hash: result.hash, 196 HashType: result.hashType, 197 License: result.license}) 198 for _, v := range result.deps { 199 if v == "" { 200 continue 201 } 202 mod, ok := moduleGraph[v] 203 if !ok { 204 name, err := resolveAlias(pythonExe, v) 205 if err != nil { 206 return nil, err 207 } 208 if name == "" { 209 messages = append(messages, "Unknown module "+v+" - ignoring") 210 continue 211 } 212 mod, ok = moduleGraph[name] 213 if !ok { 214 messages = append(messages, "Unknown module "+v+" - ignoring") 215 continue 216 } 217 moduleGraph[v] = mod // add alias entry 218 } 219 220 if mod.needed { 221 continue // already being processed 222 } 223 mod.needed = true 224 tasks <- task{name: v, version: mod.version} 225 taskCount++ 226 } 227 } 228 if bar != nil { 229 bar.ChangeMax(taskCount) // to make 100% progress bar 230 } 231 for _, m := range messages { 232 fmt.Println(m) 233 } 234 235 a.Deps = res 236 return res, nil 237 } 238 239 func pipWorker(tasks <-chan task, results chan<- result, pythonExe string, output artifact.OutputOptions, bar *progressbar.ProgressBar) { 240 for task := range tasks { 241 lic, hashType, hash, err := QueryPkgDetails(task.name, task.version) 242 if err != nil { 243 results <- result{err: err} 244 continue 245 } 246 deps, err := preRequisites(pythonExe, task.name) 247 if err != nil { 248 results <- result{err: err} 249 continue 250 } 251 252 results <- result{name: task.name, version: task.version, hash: hash, deps: deps, hashType: hashType, license: lic, err: nil} 253 switch output { 254 case artifact.Progress: 255 bar.Add(1) 256 case artifact.Debug: 257 fmt.Printf("%s@%s (%s)\n", task.name, task.version, hash) 258 } 259 } 260 } 261 262 func preRequisites(pythonExe string, module string) ([]string, error) { 263 output, err := exec.Command(pythonExe, append(moduleDetailsArgs, module)...).Output() 264 if err != nil { 265 exit, ok := err.(*exec.ExitError) 266 if ok && len(exit.Stderr) > 0 { 267 return nil, fmt.Errorf("cannot get python module details:\n%s", string(exit.Stderr)) 268 } 269 return nil, fmt.Errorf("cannot get python module details: %w", err) 270 } 271 272 scanner := bufio.NewScanner(bytes.NewReader(output)) 273 scanner.Split(bufio.ScanLines) 274 var deps []string 275 for scanner.Scan() { 276 fields := strings.SplitN(scanner.Text(), ": ", 2) 277 if fields[0] == "Requires" { 278 deps = strings.Split(fields[1], ", ") 279 break 280 } 281 } 282 return deps, nil 283 } 284 285 // sometimes module is known by several names, in this case query module 286 // by its alias and add en entry for hashmap, pointing to the module 287 func resolveAlias(pythonExe, module string) (string, error) { 288 output, err := exec.Command(pythonExe, append(moduleDetailsArgs, module)...).Output() 289 if err != nil { 290 exit, ok := err.(*exec.ExitError) 291 if ok && len(exit.Stderr) > 0 { 292 return "", fmt.Errorf("cannot get python module by alias:\n%s", string(exit.Stderr)) 293 } 294 return "", fmt.Errorf("cannot get python module by alias: %w", err) 295 } 296 297 scanner := bufio.NewScanner(bytes.NewReader(output)) 298 scanner.Split(bufio.ScanLines) 299 for scanner.Scan() { 300 fields := strings.SplitN(scanner.Text(), ": ", 2) 301 if fields[0] == "Name" { 302 return fields[1], nil 303 } 304 } 305 return "", nil 306 }