github.com/replit/upm@v0.0.0-20240423230255-9ce4fc3ea24c/internal/backends/python/gen_pypi_map/test_modules.go (about)

     1  package main
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"os"
     7  	"regexp"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/replit/upm/internal/api"
    12  )
    13  
    14  func formatSeconds(totalSeconds int) string {
    15  	days := totalSeconds / 60 / 60 / 24
    16  	totalSeconds = totalSeconds % (60 * 60 * 24)
    17  	hours := totalSeconds / 60 / 60
    18  	totalSeconds = totalSeconds % (60 * 60)
    19  	minutes := totalSeconds / 60
    20  	totalSeconds = totalSeconds % 60
    21  	seconds := totalSeconds
    22  
    23  	return fmt.Sprintf("%02dd%02dh%02dm%02ds", days, hours, minutes, seconds)
    24  }
    25  
    26  func TestModules(packages PackageIndex, cacheDir string, pkgsFile string, distMods bool, workers int, force bool, timeout time.Duration) {
    27  
    28  	cache := LoadAllPackageInfo(cacheDir, pkgsFile)
    29  
    30  	discoveredPackages := 0
    31  	fmt.Printf("Using %d workers.\n", workers)
    32  	fmt.Printf("Scanning package index...\n")
    33  
    34  	// Each package is handled in a seperate goroutine, the total number
    35  	// concurrent is limited by the buffer size of this channel
    36  	resultQueue := make(chan PackageInfo, workers)
    37  	concurrencyLimiter := make(chan struct{}, workers)
    38  	var wg sync.WaitGroup
    39  
    40  	for packages.Next() {
    41  		discoveredPackages++
    42  		packageName := packages.Package()
    43  
    44  		// Register every goroutine with the wait group before we start it
    45  		wg.Add(1)
    46  		go func() {
    47  			// Notify the wait group when finished
    48  			defer wg.Done()
    49  
    50  			// Block until there is room for more goroutines. This way we don't
    51  			// overload the opennumber of open connections. Release our spot when finished
    52  			concurrencyLimiter <- struct{}{}
    53  			defer func() { <-concurrencyLimiter }()
    54  
    55  			packageInfo, err := ProcessPackage(packageName, cache, cacheDir, distMods, force, timeout)
    56  			packageInfo.Name = packageName
    57  			if err != nil {
    58  				fmt.Fprintf(os.Stderr, "Failed to process package [%v]: %v\n", packageName, err)
    59  				packageInfo.Error = err.Error()
    60  			}
    61  			resultQueue <- packageInfo
    62  		}()
    63  	}
    64  
    65  	fmt.Printf("Discovered %v packages\n", discoveredPackages)
    66  
    67  	errors := 0
    68  	packageCount := 0
    69  	modules := 0
    70  	startTime := time.Now()
    71  	lastStatus := startTime
    72  	fmt.Printf("Scanning package modules...\n")
    73  	for processedPackages := 0; processedPackages < discoveredPackages; processedPackages++ {
    74  		result := <-resultQueue
    75  
    76  		if result.Error != "" {
    77  			errors++
    78  			// fmt.Fprintf(os.Stderr, "{\"package\": \"%v\", \"error\": %v\n", result.Name, result.Error)
    79  		} else {
    80  			packageCount++
    81  			modules += len(result.Modules)
    82  		}
    83  
    84  		// Print progress updates to stdout
    85  		if time.Since(lastStatus).Seconds() > 1 {
    86  			lastStatus = time.Now()
    87  			percentage := float64(processedPackages) / float64(discoveredPackages)
    88  
    89  			elapsed := time.Since(startTime)
    90  			rate := float64(processedPackages) / elapsed.Seconds()
    91  			remaining := int(float64(discoveredPackages-processedPackages) / rate)
    92  
    93  			fmt.Printf("%v/%v %.2f%% [%s]\n", processedPackages, discoveredPackages, 100*percentage, formatSeconds(remaining))
    94  		}
    95  	}
    96  
    97  	// After all packages have been processed, close channels
    98  	close(resultQueue)
    99  
   100  	fmt.Printf("Found %v modules in %v packages in %s. %v packages failed\n", modules, packageCount, formatSeconds(int(time.Since(startTime).Seconds())), errors)
   101  }
   102  
   103  func GetPackageMetadata(packageName string) (PackageData, error) {
   104  	resp, err := api.HttpClient.Get("https://pypi.org/pypi/" + packageName + "/json")
   105  	if err != nil {
   106  		return PackageData{}, err
   107  	}
   108  
   109  	if resp.StatusCode != 200 {
   110  		return PackageData{}, fmt.Errorf("failed to get package info: %d", resp.StatusCode)
   111  	}
   112  
   113  	defer resp.Body.Close()
   114  
   115  	decoder := json.NewDecoder(resp.Body)
   116  
   117  	data := PackageData{}
   118  	err = decoder.Decode(&data)
   119  	if err != nil {
   120  		return PackageData{}, err
   121  	}
   122  
   123  	idRegex := regexp.MustCompile("^([a-zA-Z-_0-9.]+)")
   124  	var stripedRequiresDist []string
   125  
   126  	for _, dep := range data.Info.RequiresDist {
   127  		match := idRegex.FindStringSubmatch(dep)
   128  		if len(match) > 0 {
   129  			stripedRequiresDist = append(stripedRequiresDist, match[0])
   130  		}
   131  	}
   132  	data.Info.RequiresDist = stripedRequiresDist
   133  
   134  	return data, nil
   135  }
   136  
   137  // NOTE: cache is read only
   138  func ProcessPackage(packageName string, cache map[string]PackageInfo, cacheDir string, distMods bool, force bool, timeout time.Duration) (PackageInfo, error) {
   139  	// Get the package metadata from pypi
   140  	metadata, err := GetPackageMetadata(packageName)
   141  	if err != nil {
   142  		return PackageInfo{}, PypiError{DownloadFailure, "", err}
   143  	}
   144  
   145  	var cached PackageInfo = cache[packageName]
   146  
   147  	// Check if cached module is out of date
   148  	if !force && metadata.Info.Version == cached.Version {
   149  		// If we hit in the cache, no need to download the distribution
   150  		return cached, nil
   151  	}
   152  
   153  	// Accumulate everything (including error info!) into retval
   154  	var modules []string
   155  	if distMods {
   156  		// Determine modules by examining a distribution
   157  		modules, err = GetModules(metadata)
   158  	} else {
   159  		// Determine the modules by installing the package
   160  		modules, err = InstallDiff(metadata, timeout)
   161  	}
   162  
   163  	var retval PackageInfo
   164  	retval.Version = metadata.Info.Version
   165  	retval.Modules = modules
   166  	retval.Name = metadata.Info.Name
   167  	retval.RequiresDist = metadata.Info.RequiresDist
   168  
   169  	if err != nil {
   170  		retval.Error = err.Error()
   171  	}
   172  
   173  	err = SavePackageInfo(packageName, cacheDir, &retval)
   174  	if err != nil {
   175  		return PackageInfo{}, err
   176  	}
   177  
   178  	return retval, nil
   179  }