github.com/replit/upm@v0.0.0-20240423230255-9ce4fc3ea24c/internal/backends/python/gen_pypi_map/package_index.go (about) 1 package main 2 3 import ( 4 "bufio" 5 "regexp" 6 7 "github.com/replit/upm/internal/api" 8 ) 9 10 type PackageIndex struct { 11 Next func() bool 12 Package func() string 13 } 14 15 func FakePackageIndex(packages ...string) PackageIndex { 16 i := -1 17 next := func() bool { 18 i++ 19 return i < len(packages) 20 } 21 22 pkg := func() string { 23 return packages[i] 24 } 25 26 return PackageIndex{Next: next, Package: pkg} 27 } 28 29 func NewPackageIndex(index string, limit int) (PackageIndex, error) { 30 resp, err := api.HttpClient.Get(index) 31 if err != nil { 32 return PackageIndex{}, err 33 } 34 35 // Lets read the response through a scanner so we don't have to keep it all 36 // in memory 37 scanner := bufio.NewScanner(resp.Body) 38 scanner.Split(bufio.ScanLines) 39 40 // Build a regex to extract the package name 41 exp := regexp.MustCompile(`<a href="(.*)">(.*)</a>`) 42 43 parsePackage := func() string { 44 token := scanner.Text() 45 46 anchor := exp.FindStringSubmatch(token) 47 if anchor != nil { 48 return anchor[2] 49 } else { 50 return "" 51 } 52 } 53 54 i := 0 55 advanceScanner := func() bool { 56 // Scan until end of scanner or valid package 57 for { 58 // If we are past the limit or nothing is left terminate 59 if limit > -1 && i >= limit || !scanner.Scan() { 60 resp.Body.Close() 61 return false 62 } 63 64 packageName := parsePackage() 65 if packageName != "" { 66 i++ 67 return true 68 } 69 } 70 } 71 72 return PackageIndex{Next: advanceScanner, Package: parsePackage}, nil 73 }