github.com/replit/upm@v0.0.0-20240423230255-9ce4fc3ea24c/internal/backends/python/gen_pypi_map/db_gen.go (about)

     1  package main
     2  
     3  import (
     4  	"database/sql"
     5  	"encoding/json"
     6  	"errors"
     7  	"fmt"
     8  	"os"
     9  	"strings"
    10  
    11  	_ "github.com/mattn/go-sqlite3"
    12  )
    13  
    14  func GenerateDB(pkg string, outputFilePath string, cache map[string]PackageInfo, bqFilePath string, pkgsLegacyFile string) error {
    15  	downloadStats, err := LoadDownloadStats(bqFilePath)
    16  	if err != nil {
    17  		return err
    18  	}
    19  
    20  	legacyPypiPackages := loadLegacyPypyPackages(pkgsLegacyFile)
    21  
    22  	packagesProcessed := make(map[string]bool)
    23  	var moduleToPackageList = map[string][]PackageInfo{}
    24  
    25  	for _, info := range cache {
    26  		pkgName := strings.ToLower(info.Name)
    27  		if info.Error != "" {
    28  			// fallback to legacy package module info
    29  			legacyInfo, ok := legacyPypiPackages[pkgName]
    30  			if ok {
    31  				info.Modules = legacyInfo.Mods
    32  			}
    33  		}
    34  		packagesProcessed[pkgName] = true
    35  		for _, module := range info.Modules {
    36  			moduleToPackageList[module] = append(moduleToPackageList[module], info)
    37  		}
    38  	}
    39  
    40  	// Backfill legacy package info that is missing from our cache
    41  	for pkg, legacyInfo := range legacyPypiPackages {
    42  		_, ok := packagesProcessed[pkg]
    43  		if ok {
    44  			continue
    45  		}
    46  		var info PackageInfo
    47  		info.Name = legacyInfo.Pkg
    48  		info.Modules = legacyInfo.Mods
    49  
    50  		for _, module := range info.Modules {
    51  			moduleToPackageList[module] = append(moduleToPackageList[module], info)
    52  		}
    53  	}
    54  
    55  	fmt.Printf("Loaded %d modules\n", len(moduleToPackageList))
    56  
    57  	err = os.Remove(outputFilePath)
    58  	if err != nil && !errors.Is(err, os.ErrNotExist) {
    59  		return err
    60  	}
    61  	db, err := sql.Open("sqlite3", outputFilePath)
    62  	if err != nil {
    63  		return err
    64  	}
    65  	_, err = db.Exec(`
    66  	create table module_to_pypi_package (module_name text primary key, guess text, reason text);
    67  	create table pypi_packages (package_name text primary key, module_list text);
    68  	`)
    69  	if err != nil {
    70  		return err
    71  	}
    72  
    73  	// Write all data within one transaction for speed
    74  	// https://stackoverflow.com/questions/1711631/improve-insert-per-second-performance-of-sqlite
    75  	_, err = db.Exec(`begin transaction;`)
    76  	if err != nil {
    77  		return err
    78  	}
    79  
    80  	// Guess at every module, add the guess and the package that was guessed to
    81  	// the map
    82  	for moduleName, candidates := range moduleToPackageList {
    83  		if guess, reason, guessable := GuessPackage(moduleName, candidates, downloadStats); guessable {
    84  			stmt, err := db.Prepare("insert into module_to_pypi_package values (?, ?, ?);")
    85  			if err != nil {
    86  				return err
    87  			}
    88  			_, err = stmt.Exec(moduleName, guess.Name, reason)
    89  			if err != nil {
    90  				return err
    91  			}
    92  			stmt.Close()
    93  
    94  			stmt, err = db.Prepare(`
    95  			insert into pypi_packages values (?, ?)
    96  			on conflict (package_name)
    97  			do update set
    98  				module_list = excluded.module_list;
    99  			`)
   100  			if err != nil {
   101  				return err
   102  			}
   103  			_, err = stmt.Exec(guess.Name, strings.Join(guess.Modules, ","))
   104  			if err != nil {
   105  				return fmt.Errorf("%s on %s", err.Error(), guess.Name)
   106  			}
   107  			stmt.Close()
   108  		}
   109  	}
   110  
   111  	_, err = db.Exec(`end transaction;`)
   112  	if err != nil {
   113  		return err
   114  	}
   115  
   116  	err = db.Close()
   117  	if err != nil {
   118  		return err
   119  	}
   120  
   121  	// Make it read only
   122  	err = os.Chmod(outputFilePath, 0444)
   123  	if err != nil {
   124  		return err
   125  	}
   126  
   127  	fmt.Printf("Wrote %s\n", outputFilePath)
   128  	return nil
   129  
   130  }
   131  
   132  func loadLegacyPypyPackages(filePath string) map[string]LegacyPackageInfo {
   133  	injson, err := os.Open(filePath)
   134  	if err != nil {
   135  		return make(map[string]LegacyPackageInfo)
   136  	}
   137  	infoMap := make(map[string]LegacyPackageInfo)
   138  
   139  	dec := json.NewDecoder(injson)
   140  	for dec.More() {
   141  		var info LegacyPackageInfo
   142  
   143  		err = dec.Decode(&info)
   144  		if err != nil {
   145  			continue
   146  		}
   147  		info.Pkg = strings.ToLower(info.Pkg)
   148  		infoMap[info.Pkg] = info
   149  	}
   150  
   151  	return infoMap
   152  }