github.com/vchain-us/vcn@v0.9.11-0.20210921212052-a2484d23c0b3/pkg/bom/python/pip.go (about)

     1  /*
     2   * Copyright (c) 2021 CodeNotary, Inc. All Rights Reserved.
     3   * This software is released under GPL3.
     4   * The full license information can be found under:
     5   * https://www.gnu.org/licenses/gpl-3.0.en.html
     6   *
     7   */
     8  
     9  package python
    10  
    11  import (
    12  	"bufio"
    13  	"bytes"
    14  	"errors"
    15  	"fmt"
    16  	"io/ioutil"
    17  	"os/exec"
    18  	"path/filepath"
    19  	"strings"
    20  
    21  	"github.com/schollz/progressbar/v3"
    22  
    23  	"github.com/vchain-us/vcn/pkg/bom/artifact"
    24  )
    25  
    26  // pythonArtifactFromPip implements Artifact interface
    27  type pythonArtifactFromPip struct {
    28  	pythonArtifact
    29  }
    30  
    31  type module struct {
    32  	needed  bool
    33  	version string
    34  }
    35  
    36  type task struct {
    37  	name    string
    38  	version string
    39  }
    40  type result struct {
    41  	name     string
    42  	hash     string
    43  	hashType artifact.HashType
    44  	deps     []string
    45  	license  string
    46  	version  string
    47  	err      error
    48  }
    49  
    50  var installArgs = []string{"-m", "pip", "install", "-r"}
    51  var moduleListArgs = []string{"-m", "pip", "list", "-v"}
    52  var moduleDetailsArgs = []string{"-m", "pip", "show"}
    53  
    54  // Dependencies returns list of Python dependencies for the artifact
    55  // collect info about all installed modules, find module relations, populate module graph and then recursively
    56  // select only the needed modules, using content of 'requirements.txt' as a starting point
    57  func (a *pythonArtifactFromPip) ResolveDependencies(output artifact.OutputOptions) ([]artifact.Dependency, error) {
    58  	if a.Deps != nil {
    59  		return a.Deps, nil
    60  	}
    61  
    62  	// install all required dependencies
    63  	// first try "python", if it fails, try "python3"
    64  	pythonExe := "python"
    65  	reqFile := filepath.Join(a.path, pipFileName)
    66  	cmdArgs := append(installArgs, reqFile)
    67  	err := exec.Command(pythonExe, cmdArgs...).Run()
    68  	if err != nil {
    69  		pythonExe = "python3"
    70  		_, err = exec.Command(pythonExe, cmdArgs...).Output()
    71  		if err != nil {
    72  			exit, ok := err.(*exec.ExitError)
    73  			if ok && len(exit.Stderr) > 0 {
    74  				return nil, fmt.Errorf("cannot install python modules:\n%s", string(exit.Stderr))
    75  			}
    76  			return nil, fmt.Errorf("cannot install python modules: %w", err)
    77  		}
    78  	}
    79  
    80  	// collect info about all cached modules
    81  	buf, err := exec.Command(pythonExe, moduleListArgs...).Output()
    82  	if err != nil {
    83  		exit, ok := err.(*exec.ExitError)
    84  		if ok && len(exit.Stderr) > 0 {
    85  			return nil, fmt.Errorf("cannot get python module list:\n%s", string(exit.Stderr))
    86  		}
    87  		return nil, fmt.Errorf("cannot get python module list: %w", err)
    88  	}
    89  
    90  	// output has two header lines, and then entries of the format "<package> <version> <location> <installer>"
    91  	scanner := bufio.NewScanner(bytes.NewReader(buf))
    92  	scanner.Split(bufio.ScanLines)
    93  	for i := 0; i < 2; {
    94  		if !scanner.Scan() {
    95  			return nil, errors.New("got unexpected result to pip module list request")
    96  		}
    97  		text := scanner.Text()
    98  		if text[0] == '#' {
    99  			// skip possible Python warnings - lines starting with #
   100  			continue
   101  		}
   102  		i++
   103  	}
   104  
   105  	// store all known modules
   106  	moduleGraph := make(map[string]*module)
   107  	for scanner.Scan() {
   108  		text := scanner.Text()
   109  		if text[0] == '#' {
   110  			// skip possible Python warnings - lines starting with #
   111  			continue
   112  		}
   113  		fields := strings.Fields(text)
   114  		moduleGraph[fields[0]] = &module{version: fields[1]}
   115  	}
   116  
   117  	// first process root dependencies from requirements.txt
   118  	// then for every dependency process its dependencies
   119  	buf, err = ioutil.ReadFile(reqFile)
   120  	if err != nil {
   121  		return nil, err
   122  	}
   123  
   124  	var bar *progressbar.ProgressBar
   125  	if output == artifact.Progress {
   126  		bar = progressbar.Default(int64(len(moduleGraph)))
   127  	}
   128  
   129  	// init goroutine throttling - channels, start goroutines.
   130  	// We can be sure that there will be no more in-flight messages in channels than known modules
   131  	tasks := make(chan task, len(moduleGraph))
   132  	results := make(chan result, len(moduleGraph))
   133  	for i := 0; i < artifact.MaxGoroutines; i++ {
   134  		go pipWorker(tasks, results, pythonExe, output, bar)
   135  	}
   136  	defer close(results)
   137  	defer close(tasks) // signal workers to stop
   138  
   139  	taskCount := 0
   140  
   141  	messages := make([]string, 0)
   142  	// initial tasks - content of requirements.txt
   143  	scanner = bufio.NewScanner(bytes.NewReader(buf))
   144  	scanner.Split(bufio.ScanLines)
   145  	for scanner.Scan() {
   146  		line := scanner.Text()
   147  		fields := strings.SplitN(line, "#", 1)
   148  		line = strings.TrimSpace(fields[0])
   149  		if line == "" {
   150  			continue
   151  		}
   152  		endPos := strings.IndexAny(line, "=><!")
   153  		if endPos > 0 {
   154  			line = line[:endPos]
   155  		}
   156  
   157  		mod, ok := moduleGraph[line]
   158  		if !ok {
   159  			name, err := resolveAlias(pythonExe, line)
   160  			if err != nil {
   161  				return nil, err
   162  			}
   163  			if name == "" {
   164  				messages = append(messages, "Unknown module "+line+" - ignoring")
   165  				continue
   166  			}
   167  			mod, ok = moduleGraph[name]
   168  			if !ok {
   169  				messages = append(messages, "Unknown module "+line+" - ignoring")
   170  				continue
   171  			}
   172  			moduleGraph[line] = mod // add alias entry
   173  		}
   174  
   175  		if mod.needed {
   176  			continue // already being processed by other name
   177  		}
   178  		mod.needed = true
   179  
   180  		tasks <- task{name: line, version: mod.version}
   181  		taskCount++
   182  	}
   183  
   184  	// get dependencies, run tasks for dependencies, collect info about all used modules
   185  	res := make([]artifact.Dependency, 0)
   186  	for done := 0; taskCount == 0 || done < taskCount; done++ {
   187  		result := <-results
   188  		if result.err != nil {
   189  			close(tasks) // signal workers to stop
   190  			return nil, err
   191  		}
   192  		res = append(res, artifact.Dependency{
   193  			Name:     result.name,
   194  			Version:  result.version,
   195  			Hash:     result.hash,
   196  			HashType: result.hashType,
   197  			License:  result.license})
   198  		for _, v := range result.deps {
   199  			if v == "" {
   200  				continue
   201  			}
   202  			mod, ok := moduleGraph[v]
   203  			if !ok {
   204  				name, err := resolveAlias(pythonExe, v)
   205  				if err != nil {
   206  					return nil, err
   207  				}
   208  				if name == "" {
   209  					messages = append(messages, "Unknown module "+v+" - ignoring")
   210  					continue
   211  				}
   212  				mod, ok = moduleGraph[name]
   213  				if !ok {
   214  					messages = append(messages, "Unknown module "+v+" - ignoring")
   215  					continue
   216  				}
   217  				moduleGraph[v] = mod // add alias entry
   218  			}
   219  
   220  			if mod.needed {
   221  				continue // already being processed
   222  			}
   223  			mod.needed = true
   224  			tasks <- task{name: v, version: mod.version}
   225  			taskCount++
   226  		}
   227  	}
   228  	if bar != nil {
   229  		bar.ChangeMax(taskCount) // to make 100% progress bar
   230  	}
   231  	for _, m := range messages {
   232  		fmt.Println(m)
   233  	}
   234  
   235  	a.Deps = res
   236  	return res, nil
   237  }
   238  
   239  func pipWorker(tasks <-chan task, results chan<- result, pythonExe string, output artifact.OutputOptions, bar *progressbar.ProgressBar) {
   240  	for task := range tasks {
   241  		lic, hashType, hash, err := QueryPkgDetails(task.name, task.version)
   242  		if err != nil {
   243  			results <- result{err: err}
   244  			continue
   245  		}
   246  		deps, err := preRequisites(pythonExe, task.name)
   247  		if err != nil {
   248  			results <- result{err: err}
   249  			continue
   250  		}
   251  
   252  		results <- result{name: task.name, version: task.version, hash: hash, deps: deps, hashType: hashType, license: lic, err: nil}
   253  		switch output {
   254  		case artifact.Progress:
   255  			bar.Add(1)
   256  		case artifact.Debug:
   257  			fmt.Printf("%s@%s (%s)\n", task.name, task.version, hash)
   258  		}
   259  	}
   260  }
   261  
   262  func preRequisites(pythonExe string, module string) ([]string, error) {
   263  	output, err := exec.Command(pythonExe, append(moduleDetailsArgs, module)...).Output()
   264  	if err != nil {
   265  		exit, ok := err.(*exec.ExitError)
   266  		if ok && len(exit.Stderr) > 0 {
   267  			return nil, fmt.Errorf("cannot get python module details:\n%s", string(exit.Stderr))
   268  		}
   269  		return nil, fmt.Errorf("cannot get python module details: %w", err)
   270  	}
   271  
   272  	scanner := bufio.NewScanner(bytes.NewReader(output))
   273  	scanner.Split(bufio.ScanLines)
   274  	var deps []string
   275  	for scanner.Scan() {
   276  		fields := strings.SplitN(scanner.Text(), ": ", 2)
   277  		if fields[0] == "Requires" {
   278  			deps = strings.Split(fields[1], ", ")
   279  			break
   280  		}
   281  	}
   282  	return deps, nil
   283  }
   284  
   285  // sometimes module is known by several names, in this case query module
   286  // by its alias and add en entry for hashmap, pointing to the module
   287  func resolveAlias(pythonExe, module string) (string, error) {
   288  	output, err := exec.Command(pythonExe, append(moduleDetailsArgs, module)...).Output()
   289  	if err != nil {
   290  		exit, ok := err.(*exec.ExitError)
   291  		if ok && len(exit.Stderr) > 0 {
   292  			return "", fmt.Errorf("cannot get python module by alias:\n%s", string(exit.Stderr))
   293  		}
   294  		return "", fmt.Errorf("cannot get python module by alias: %w", err)
   295  	}
   296  
   297  	scanner := bufio.NewScanner(bytes.NewReader(output))
   298  	scanner.Split(bufio.ScanLines)
   299  	for scanner.Scan() {
   300  		fields := strings.SplitN(scanner.Text(), ": ", 2)
   301  		if fields[0] == "Name" {
   302  			return fields[1], nil
   303  		}
   304  	}
   305  	return "", nil
   306  }