github.com/replit/upm@v0.0.0-20240423230255-9ce4fc3ea24c/internal/backends/python/gen_pypi_map/package_guesser.go (about)

     1  package main
     2  
     3  import (
     4  	"sort"
     5  	"strings"
     6  )
     7  
     8  func GuessPackage(module string, packages []PackageInfo, downloadStats map[string]int) (PackageInfo, string, bool) {
     9  	// Never try and guess packages in the python stdlib
    10  	if stdlibMods[module] {
    11  		return PackageInfo{}, "", false
    12  	}
    13  
    14  	// If no packages provide this module, give up
    15  	if len(packages) == 0 {
    16  		return PackageInfo{}, "", false
    17  	}
    18  
    19  	// If there is only one package that provides this module, use that
    20  	if len(packages) == 1 {
    21  		return packages[0], "only one", true
    22  	}
    23  
    24  	// There are at least two packages that provide this module
    25  	///////////////////////////////////////////////////////////
    26  
    27  	// Got through all the matches, if any package name is almost an exact match to the
    28  	// module name, use that
    29  	var nameMatches []PackageInfo = nil
    30  	for _, candidate := range packages {
    31  
    32  		if strings.Replace(strings.ToLower(candidate.Name), "-", "_", -1) == strings.ToLower(module) ||
    33  			strings.Replace(strings.ToLower(candidate.Name), "-", "", -1) == strings.ToLower(module) ||
    34  			strings.Replace(strings.ToLower(candidate.Name), "_", "", -1) == strings.ToLower(module) {
    35  			nameMatches = append(nameMatches, candidate)
    36  		}
    37  	}
    38  
    39  	if len(nameMatches) > 0 {
    40  		// Pick the most popular match
    41  		sort.Slice(nameMatches, func(a, b int) bool {
    42  			return downloadStats[nameMatches[a].Name] > downloadStats[nameMatches[b].Name]
    43  		})
    44  
    45  		return nameMatches[0], "name match", true
    46  	}
    47  
    48  	candidates := removeParentPackages(packages)
    49  
    50  	if len(candidates) > 0 {
    51  		packages = candidates
    52  	}
    53  
    54  	if len(packages) == 1 {
    55  		return packages[0], "only one after trimming", true
    56  	}
    57  
    58  	// Sort the packages by downloads
    59  	sort.Slice(packages, func(a, b int) bool {
    60  		return downloadStats[strings.ToLower(packages[a].Name)] >
    61  			downloadStats[strings.ToLower(packages[b].Name)]
    62  	})
    63  
    64  	// If the most downloaded package that provides this module has been
    65  	// downloaded fewer then 100 times, skip the module
    66  	if downloadStats[strings.ToLower(packages[0].Name)] < 100 {
    67  		return PackageInfo{}, "", false
    68  	}
    69  
    70  	return packages[0], "most popular of remaining", true
    71  }
    72  
    73  /*
    74  Go through all input packages and look through their dependencies (RequiresDist).
    75  If they depend on one or more of the other packages in this list, they are removed
    76  from the candidate list (returned).
    77  */
    78  func removeParentPackages(packages []PackageInfo) []PackageInfo {
    79  	candidateSet := make(map[string]PackageInfo)
    80  	for _, pkg := range packages {
    81  		candidateSet[pkg.Name] = pkg
    82  	}
    83  
    84  	for _, pkg := range packages {
    85  		found := false
    86  		for _, dep := range pkg.RequiresDist {
    87  			for _, otherPkg := range packages {
    88  				if strings.EqualFold(dep, otherPkg.Name) {
    89  					// we found otherPkg is depended on by pkg
    90  					// but does otherPkg also depend on pkg?
    91  					// if so, it's a mutual dependency, and we don't count it
    92  					innerFound := false
    93  					for _, otherPkgDep := range otherPkg.RequiresDist {
    94  						if strings.EqualFold(otherPkgDep, pkg.Name) {
    95  							innerFound = true
    96  							break
    97  						}
    98  					}
    99  					if !innerFound {
   100  						found = true
   101  						break
   102  					}
   103  				}
   104  			}
   105  			if found {
   106  				break
   107  			}
   108  		}
   109  		if found {
   110  			delete(candidateSet, pkg.Name)
   111  		}
   112  	}
   113  
   114  	var candidates []PackageInfo
   115  	for _, pkg := range candidateSet {
   116  		candidates = append(candidates, pkg)
   117  	}
   118  
   119  	return candidates
   120  }
   121  
   122  // pythonStdlibModules this build is built from
   123  // https://docs.python.org/3/py-modindex.htm as we never want to guess a
   124  // standard library module is provided by a remote package.
   125  var stdlibMods = map[string]bool{
   126  	"__future__":      true,
   127  	"__main__":        true,
   128  	"_dummy_thread":   true,
   129  	"_thread":         true,
   130  	"abc":             true,
   131  	"aifc":            true,
   132  	"argparse":        true,
   133  	"array":           true,
   134  	"ast":             true,
   135  	"asynchat":        true,
   136  	"asyncio":         true,
   137  	"asyncore":        true,
   138  	"atexit":          true,
   139  	"audioop":         true,
   140  	"base64":          true,
   141  	"bdb":             true,
   142  	"binascii":        true,
   143  	"binhex":          true,
   144  	"bisect":          true,
   145  	"builtins":        true,
   146  	"bz2":             true,
   147  	"calendar":        true,
   148  	"cgi":             true,
   149  	"cgitb":           true,
   150  	"chunk":           true,
   151  	"cmath":           true,
   152  	"cmd":             true,
   153  	"code":            true,
   154  	"codecs":          true,
   155  	"codeop":          true,
   156  	"collections":     true,
   157  	"colorsys":        true,
   158  	"compileall":      true,
   159  	"concurrent":      true,
   160  	"configparser":    true,
   161  	"contextlib":      true,
   162  	"contextvars":     true,
   163  	"copy":            true,
   164  	"copyreg":         true,
   165  	"cProfile":        true,
   166  	"crypt":           true,
   167  	"csv":             true,
   168  	"ctypes":          true,
   169  	"curses":          true,
   170  	"dataclasses":     true,
   171  	"datetime":        true,
   172  	"dbm":             true,
   173  	"decimal":         true,
   174  	"difflib":         true,
   175  	"dis":             true,
   176  	"distutils":       true,
   177  	"doctest":         true,
   178  	"dummy_threading": true,
   179  	"email":           true,
   180  	"encodings":       true,
   181  	"ensurepip":       true,
   182  	"enum":            true,
   183  	"errno":           true,
   184  	"faulthandler":    true,
   185  	"fcntl":           true,
   186  	"filecmp":         true,
   187  	"fileinput":       true,
   188  	"fnmatch":         true,
   189  	"formatter":       true,
   190  	"fractions":       true,
   191  	"ftplib":          true,
   192  	"functools":       true,
   193  	"gc":              true,
   194  	"getopt":          true,
   195  	"getpass":         true,
   196  	"gettext":         true,
   197  	"glob":            true,
   198  	"grp":             true,
   199  	"gzip":            true,
   200  	"hashlib":         true,
   201  	"heapq":           true,
   202  	"hmac":            true,
   203  	"html":            true,
   204  	"http":            true,
   205  	"imaplib":         true,
   206  	"imghdr":          true,
   207  	"imp":             true,
   208  	"importlib":       true,
   209  	"inspect":         true,
   210  	"io":              true,
   211  	"ipaddress":       true,
   212  	"itertools":       true,
   213  	"json":            true,
   214  	"keyword":         true,
   215  	"lib2to3":         true,
   216  	"linecache":       true,
   217  	"locale":          true,
   218  	"logging":         true,
   219  	"lzma":            true,
   220  	"mailbox":         true,
   221  	"mailcap":         true,
   222  	"marshal":         true,
   223  	"math":            true,
   224  	"mimetypes":       true,
   225  	"mmap":            true,
   226  	"modulefinder":    true,
   227  	"msilib":          true,
   228  	"msvcrt":          true,
   229  	"multiprocessing": true,
   230  	"netrc":           true,
   231  	"nis":             true,
   232  	"nntplib":         true,
   233  	"numbers":         true,
   234  	"operator":        true,
   235  	"optparse":        true,
   236  	"os":              true,
   237  	"ossaudiodev":     true,
   238  	"parser":          true,
   239  	"pathlib":         true,
   240  	"pdb":             true,
   241  	"pickle":          true,
   242  	"pickletools":     true,
   243  	"pipes":           true,
   244  	"pkgutil":         true,
   245  	"platform":        true,
   246  	"plistlib":        true,
   247  	"poplib":          true,
   248  	"posix":           true,
   249  	"pprint":          true,
   250  	"profile":         true,
   251  	"pstats":          true,
   252  	"pty":             true,
   253  	"pwd":             true,
   254  	"py_compile":      true,
   255  	"pyclbr":          true,
   256  	"pydoc":           true,
   257  	"queue":           true,
   258  	"quopri":          true,
   259  	"random":          true,
   260  	"re":              true,
   261  	"readline":        true,
   262  	"reprlib":         true,
   263  	"resource":        true,
   264  	"rlcompleter":     true,
   265  	"runpy":           true,
   266  	"sched":           true,
   267  	"secrets":         true,
   268  	"select":          true,
   269  	"selectors":       true,
   270  	"shelve":          true,
   271  	"shlex":           true,
   272  	"shutil":          true,
   273  	"signal":          true,
   274  	"site":            true,
   275  	"smtpd":           true,
   276  	"smtplib":         true,
   277  	"sndhdr":          true,
   278  	"socket":          true,
   279  	"socketserver":    true,
   280  	"spwd":            true,
   281  	"sqlite3":         true,
   282  	"ssl":             true,
   283  	"stat":            true,
   284  	"statistics":      true,
   285  	"string":          true,
   286  	"stringprep":      true,
   287  	"struct":          true,
   288  	"subprocess":      true,
   289  	"sunau":           true,
   290  	"symbol":          true,
   291  	"symtable":        true,
   292  	"sys":             true,
   293  	"sysconfig":       true,
   294  	"syslog":          true,
   295  	"tabnanny":        true,
   296  	"tarfile":         true,
   297  	"telnetlib":       true,
   298  	"tempfile":        true,
   299  	"termios":         true,
   300  	"test":            true,
   301  	"textwrap":        true,
   302  	"threading":       true,
   303  	"time":            true,
   304  	"timeit":          true,
   305  	"tkinter":         true,
   306  	"token":           true,
   307  	"tokenize":        true,
   308  	"trace":           true,
   309  	"traceback":       true,
   310  	"tracemalloc":     true,
   311  	"tty":             true,
   312  	"turtle":          true,
   313  	"turtledemo":      true,
   314  	"types":           true,
   315  	"typing":          true,
   316  	"unicodedata":     true,
   317  	"unittest":        true,
   318  	"urllib":          true,
   319  	"uu":              true,
   320  	"uuid":            true,
   321  	"venv":            true,
   322  	"warnings":        true,
   323  	"wave":            true,
   324  	"weakref":         true,
   325  	"webbrowser":      true,
   326  	"winreg":          true,
   327  	"winsound":        true,
   328  	"wsgiref":         true,
   329  	"xdrlib":          true,
   330  	"xml":             true,
   331  	"xmlrpc":          true,
   332  	"zipapp":          true,
   333  	"zipfile":         true,
   334  	"zipimport":       true,
   335  	"zlib":            true,
   336  }