github.com/replit/upm@v0.0.0-20240423230255-9ce4fc3ea24c/internal/backends/python/gen_pypi_map/test_modules.go (about) 1 package main 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "os" 7 "regexp" 8 "sync" 9 "time" 10 11 "github.com/replit/upm/internal/api" 12 ) 13 14 func formatSeconds(totalSeconds int) string { 15 days := totalSeconds / 60 / 60 / 24 16 totalSeconds = totalSeconds % (60 * 60 * 24) 17 hours := totalSeconds / 60 / 60 18 totalSeconds = totalSeconds % (60 * 60) 19 minutes := totalSeconds / 60 20 totalSeconds = totalSeconds % 60 21 seconds := totalSeconds 22 23 return fmt.Sprintf("%02dd%02dh%02dm%02ds", days, hours, minutes, seconds) 24 } 25 26 func TestModules(packages PackageIndex, cacheDir string, pkgsFile string, distMods bool, workers int, force bool, timeout time.Duration) { 27 28 cache := LoadAllPackageInfo(cacheDir, pkgsFile) 29 30 discoveredPackages := 0 31 fmt.Printf("Using %d workers.\n", workers) 32 fmt.Printf("Scanning package index...\n") 33 34 // Each package is handled in a seperate goroutine, the total number 35 // concurrent is limited by the buffer size of this channel 36 resultQueue := make(chan PackageInfo, workers) 37 concurrencyLimiter := make(chan struct{}, workers) 38 var wg sync.WaitGroup 39 40 for packages.Next() { 41 discoveredPackages++ 42 packageName := packages.Package() 43 44 // Register every goroutine with the wait group before we start it 45 wg.Add(1) 46 go func() { 47 // Notify the wait group when finished 48 defer wg.Done() 49 50 // Block until there is room for more goroutines. This way we don't 51 // overload the opennumber of open connections. Release our spot when finished 52 concurrencyLimiter <- struct{}{} 53 defer func() { <-concurrencyLimiter }() 54 55 packageInfo, err := ProcessPackage(packageName, cache, cacheDir, distMods, force, timeout) 56 packageInfo.Name = packageName 57 if err != nil { 58 fmt.Fprintf(os.Stderr, "Failed to process package [%v]: %v\n", packageName, err) 59 packageInfo.Error = err.Error() 60 } 61 resultQueue <- packageInfo 62 }() 63 } 64 65 fmt.Printf("Discovered %v packages\n", discoveredPackages) 66 67 errors := 0 68 packageCount := 0 69 modules := 0 70 startTime := time.Now() 71 lastStatus := startTime 72 fmt.Printf("Scanning package modules...\n") 73 for processedPackages := 0; processedPackages < discoveredPackages; processedPackages++ { 74 result := <-resultQueue 75 76 if result.Error != "" { 77 errors++ 78 // fmt.Fprintf(os.Stderr, "{\"package\": \"%v\", \"error\": %v\n", result.Name, result.Error) 79 } else { 80 packageCount++ 81 modules += len(result.Modules) 82 } 83 84 // Print progress updates to stdout 85 if time.Since(lastStatus).Seconds() > 1 { 86 lastStatus = time.Now() 87 percentage := float64(processedPackages) / float64(discoveredPackages) 88 89 elapsed := time.Since(startTime) 90 rate := float64(processedPackages) / elapsed.Seconds() 91 remaining := int(float64(discoveredPackages-processedPackages) / rate) 92 93 fmt.Printf("%v/%v %.2f%% [%s]\n", processedPackages, discoveredPackages, 100*percentage, formatSeconds(remaining)) 94 } 95 } 96 97 // After all packages have been processed, close channels 98 close(resultQueue) 99 100 fmt.Printf("Found %v modules in %v packages in %s. %v packages failed\n", modules, packageCount, formatSeconds(int(time.Since(startTime).Seconds())), errors) 101 } 102 103 func GetPackageMetadata(packageName string) (PackageData, error) { 104 resp, err := api.HttpClient.Get("https://pypi.org/pypi/" + packageName + "/json") 105 if err != nil { 106 return PackageData{}, err 107 } 108 109 if resp.StatusCode != 200 { 110 return PackageData{}, fmt.Errorf("failed to get package info: %d", resp.StatusCode) 111 } 112 113 defer resp.Body.Close() 114 115 decoder := json.NewDecoder(resp.Body) 116 117 data := PackageData{} 118 err = decoder.Decode(&data) 119 if err != nil { 120 return PackageData{}, err 121 } 122 123 idRegex := regexp.MustCompile("^([a-zA-Z-_0-9.]+)") 124 var stripedRequiresDist []string 125 126 for _, dep := range data.Info.RequiresDist { 127 match := idRegex.FindStringSubmatch(dep) 128 if len(match) > 0 { 129 stripedRequiresDist = append(stripedRequiresDist, match[0]) 130 } 131 } 132 data.Info.RequiresDist = stripedRequiresDist 133 134 return data, nil 135 } 136 137 // NOTE: cache is read only 138 func ProcessPackage(packageName string, cache map[string]PackageInfo, cacheDir string, distMods bool, force bool, timeout time.Duration) (PackageInfo, error) { 139 // Get the package metadata from pypi 140 metadata, err := GetPackageMetadata(packageName) 141 if err != nil { 142 return PackageInfo{}, PypiError{DownloadFailure, "", err} 143 } 144 145 var cached PackageInfo = cache[packageName] 146 147 // Check if cached module is out of date 148 if !force && metadata.Info.Version == cached.Version { 149 // If we hit in the cache, no need to download the distribution 150 return cached, nil 151 } 152 153 // Accumulate everything (including error info!) into retval 154 var modules []string 155 if distMods { 156 // Determine modules by examining a distribution 157 modules, err = GetModules(metadata) 158 } else { 159 // Determine the modules by installing the package 160 modules, err = InstallDiff(metadata, timeout) 161 } 162 163 var retval PackageInfo 164 retval.Version = metadata.Info.Version 165 retval.Modules = modules 166 retval.Name = metadata.Info.Name 167 retval.RequiresDist = metadata.Info.RequiresDist 168 169 if err != nil { 170 retval.Error = err.Error() 171 } 172 173 err = SavePackageInfo(packageName, cacheDir, &retval) 174 if err != nil { 175 return PackageInfo{}, err 176 } 177 178 return retval, nil 179 }