github.com/replit/upm@v0.0.0-20240423230255-9ce4fc3ea24c/internal/backends/python/gen_pypi_map/package_guesser.go (about) 1 package main 2 3 import ( 4 "sort" 5 "strings" 6 ) 7 8 func GuessPackage(module string, packages []PackageInfo, downloadStats map[string]int) (PackageInfo, string, bool) { 9 // Never try and guess packages in the python stdlib 10 if stdlibMods[module] { 11 return PackageInfo{}, "", false 12 } 13 14 // If no packages provide this module, give up 15 if len(packages) == 0 { 16 return PackageInfo{}, "", false 17 } 18 19 // If there is only one package that provides this module, use that 20 if len(packages) == 1 { 21 return packages[0], "only one", true 22 } 23 24 // There are at least two packages that provide this module 25 /////////////////////////////////////////////////////////// 26 27 // Got through all the matches, if any package name is almost an exact match to the 28 // module name, use that 29 var nameMatches []PackageInfo = nil 30 for _, candidate := range packages { 31 32 if strings.Replace(strings.ToLower(candidate.Name), "-", "_", -1) == strings.ToLower(module) || 33 strings.Replace(strings.ToLower(candidate.Name), "-", "", -1) == strings.ToLower(module) || 34 strings.Replace(strings.ToLower(candidate.Name), "_", "", -1) == strings.ToLower(module) { 35 nameMatches = append(nameMatches, candidate) 36 } 37 } 38 39 if len(nameMatches) > 0 { 40 // Pick the most popular match 41 sort.Slice(nameMatches, func(a, b int) bool { 42 return downloadStats[nameMatches[a].Name] > downloadStats[nameMatches[b].Name] 43 }) 44 45 return nameMatches[0], "name match", true 46 } 47 48 candidates := removeParentPackages(packages) 49 50 if len(candidates) > 0 { 51 packages = candidates 52 } 53 54 if len(packages) == 1 { 55 return packages[0], "only one after trimming", true 56 } 57 58 // Sort the packages by downloads 59 sort.Slice(packages, func(a, b int) bool { 60 return downloadStats[strings.ToLower(packages[a].Name)] > 61 downloadStats[strings.ToLower(packages[b].Name)] 62 }) 63 64 // If the most downloaded package that provides this module has been 65 // downloaded fewer then 100 times, skip the module 66 if downloadStats[strings.ToLower(packages[0].Name)] < 100 { 67 return PackageInfo{}, "", false 68 } 69 70 return packages[0], "most popular of remaining", true 71 } 72 73 /* 74 Go through all input packages and look through their dependencies (RequiresDist). 75 If they depend on one or more of the other packages in this list, they are removed 76 from the candidate list (returned). 77 */ 78 func removeParentPackages(packages []PackageInfo) []PackageInfo { 79 candidateSet := make(map[string]PackageInfo) 80 for _, pkg := range packages { 81 candidateSet[pkg.Name] = pkg 82 } 83 84 for _, pkg := range packages { 85 found := false 86 for _, dep := range pkg.RequiresDist { 87 for _, otherPkg := range packages { 88 if strings.EqualFold(dep, otherPkg.Name) { 89 // we found otherPkg is depended on by pkg 90 // but does otherPkg also depend on pkg? 91 // if so, it's a mutual dependency, and we don't count it 92 innerFound := false 93 for _, otherPkgDep := range otherPkg.RequiresDist { 94 if strings.EqualFold(otherPkgDep, pkg.Name) { 95 innerFound = true 96 break 97 } 98 } 99 if !innerFound { 100 found = true 101 break 102 } 103 } 104 } 105 if found { 106 break 107 } 108 } 109 if found { 110 delete(candidateSet, pkg.Name) 111 } 112 } 113 114 var candidates []PackageInfo 115 for _, pkg := range candidateSet { 116 candidates = append(candidates, pkg) 117 } 118 119 return candidates 120 } 121 122 // pythonStdlibModules this build is built from 123 // https://docs.python.org/3/py-modindex.htm as we never want to guess a 124 // standard library module is provided by a remote package. 125 var stdlibMods = map[string]bool{ 126 "__future__": true, 127 "__main__": true, 128 "_dummy_thread": true, 129 "_thread": true, 130 "abc": true, 131 "aifc": true, 132 "argparse": true, 133 "array": true, 134 "ast": true, 135 "asynchat": true, 136 "asyncio": true, 137 "asyncore": true, 138 "atexit": true, 139 "audioop": true, 140 "base64": true, 141 "bdb": true, 142 "binascii": true, 143 "binhex": true, 144 "bisect": true, 145 "builtins": true, 146 "bz2": true, 147 "calendar": true, 148 "cgi": true, 149 "cgitb": true, 150 "chunk": true, 151 "cmath": true, 152 "cmd": true, 153 "code": true, 154 "codecs": true, 155 "codeop": true, 156 "collections": true, 157 "colorsys": true, 158 "compileall": true, 159 "concurrent": true, 160 "configparser": true, 161 "contextlib": true, 162 "contextvars": true, 163 "copy": true, 164 "copyreg": true, 165 "cProfile": true, 166 "crypt": true, 167 "csv": true, 168 "ctypes": true, 169 "curses": true, 170 "dataclasses": true, 171 "datetime": true, 172 "dbm": true, 173 "decimal": true, 174 "difflib": true, 175 "dis": true, 176 "distutils": true, 177 "doctest": true, 178 "dummy_threading": true, 179 "email": true, 180 "encodings": true, 181 "ensurepip": true, 182 "enum": true, 183 "errno": true, 184 "faulthandler": true, 185 "fcntl": true, 186 "filecmp": true, 187 "fileinput": true, 188 "fnmatch": true, 189 "formatter": true, 190 "fractions": true, 191 "ftplib": true, 192 "functools": true, 193 "gc": true, 194 "getopt": true, 195 "getpass": true, 196 "gettext": true, 197 "glob": true, 198 "grp": true, 199 "gzip": true, 200 "hashlib": true, 201 "heapq": true, 202 "hmac": true, 203 "html": true, 204 "http": true, 205 "imaplib": true, 206 "imghdr": true, 207 "imp": true, 208 "importlib": true, 209 "inspect": true, 210 "io": true, 211 "ipaddress": true, 212 "itertools": true, 213 "json": true, 214 "keyword": true, 215 "lib2to3": true, 216 "linecache": true, 217 "locale": true, 218 "logging": true, 219 "lzma": true, 220 "mailbox": true, 221 "mailcap": true, 222 "marshal": true, 223 "math": true, 224 "mimetypes": true, 225 "mmap": true, 226 "modulefinder": true, 227 "msilib": true, 228 "msvcrt": true, 229 "multiprocessing": true, 230 "netrc": true, 231 "nis": true, 232 "nntplib": true, 233 "numbers": true, 234 "operator": true, 235 "optparse": true, 236 "os": true, 237 "ossaudiodev": true, 238 "parser": true, 239 "pathlib": true, 240 "pdb": true, 241 "pickle": true, 242 "pickletools": true, 243 "pipes": true, 244 "pkgutil": true, 245 "platform": true, 246 "plistlib": true, 247 "poplib": true, 248 "posix": true, 249 "pprint": true, 250 "profile": true, 251 "pstats": true, 252 "pty": true, 253 "pwd": true, 254 "py_compile": true, 255 "pyclbr": true, 256 "pydoc": true, 257 "queue": true, 258 "quopri": true, 259 "random": true, 260 "re": true, 261 "readline": true, 262 "reprlib": true, 263 "resource": true, 264 "rlcompleter": true, 265 "runpy": true, 266 "sched": true, 267 "secrets": true, 268 "select": true, 269 "selectors": true, 270 "shelve": true, 271 "shlex": true, 272 "shutil": true, 273 "signal": true, 274 "site": true, 275 "smtpd": true, 276 "smtplib": true, 277 "sndhdr": true, 278 "socket": true, 279 "socketserver": true, 280 "spwd": true, 281 "sqlite3": true, 282 "ssl": true, 283 "stat": true, 284 "statistics": true, 285 "string": true, 286 "stringprep": true, 287 "struct": true, 288 "subprocess": true, 289 "sunau": true, 290 "symbol": true, 291 "symtable": true, 292 "sys": true, 293 "sysconfig": true, 294 "syslog": true, 295 "tabnanny": true, 296 "tarfile": true, 297 "telnetlib": true, 298 "tempfile": true, 299 "termios": true, 300 "test": true, 301 "textwrap": true, 302 "threading": true, 303 "time": true, 304 "timeit": true, 305 "tkinter": true, 306 "token": true, 307 "tokenize": true, 308 "trace": true, 309 "traceback": true, 310 "tracemalloc": true, 311 "tty": true, 312 "turtle": true, 313 "turtledemo": true, 314 "types": true, 315 "typing": true, 316 "unicodedata": true, 317 "unittest": true, 318 "urllib": true, 319 "uu": true, 320 "uuid": true, 321 "venv": true, 322 "warnings": true, 323 "wave": true, 324 "weakref": true, 325 "webbrowser": true, 326 "winreg": true, 327 "winsound": true, 328 "wsgiref": true, 329 "xdrlib": true, 330 "xml": true, 331 "xmlrpc": true, 332 "zipapp": true, 333 "zipfile": true, 334 "zipimport": true, 335 "zlib": true, 336 }