github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/resources/resources.go (about) 1 //go:build !wasip1 && !js 2 3 package resources 4 5 import ( 6 "embed" 7 "fmt" 8 "io" 9 "net/http" 10 "strconv" 11 ) 12 13 //go:embed data/gpt2-tokenizer/encoder.json 14 //go:embed data/gpt2-tokenizer/vocab.bpe 15 //go:embed data/gpt2-tokenizer/unitrim.json 16 //go:embed data/gpt2-tokenizer/specials.txt 17 //go:embed data/pile-tokenizer/encoder.json 18 //go:embed data/pile-tokenizer/vocab.bpe 19 //go:embed data/pile-tokenizer/unitrim.json 20 //go:embed data/pile-tokenizer/specials.txt 21 //go:embed data/clip-tokenizer/encoder.json 22 //go:embed data/clip-tokenizer/vocab.bpe 23 //go:embed data/clip-tokenizer/unitrim.json 24 //go:embed data/clip-tokenizer/specials.txt 25 //go:embed data/clip-tokenizer/special_config.json 26 //go:embed data/clip-tokenizer/special_tokens_map.json 27 //go:embed data/nerdstash_v1-tokenizer/encoder.json 28 //go:embed data/nerdstash_v1-tokenizer/merges.json 29 //go:embed data/nerdstash_v1-tokenizer/specials.txt 30 //go:embed data/nerdstash_v1-tokenizer/special_config.json 31 //go:embed data/nerdstash_v2-tokenizer/encoder.json 32 //go:embed data/nerdstash_v2-tokenizer/merges.json 33 //go:embed data/nerdstash_v2-tokenizer/specials.txt 34 //go:embed data/nerdstash_v2-tokenizer/special_config.json 35 //go:embed data/llama-tokenizer/merges.json 36 //go:embed data/llama-tokenizer/specials.txt 37 //go:embed data/llama-tokenizer/tokenizer_config.json 38 //go:embed data/llama-tokenizer/special_tokens_map.json 39 //go:embed data/llama-tokenizer/encoder.json 40 //go:embed data/llama3-tokenizer/config.json 41 //go:embed data/llama3-tokenizer/tokenizer.json 42 //go:embed data/llama3-tokenizer/tokenizer_config.json 43 //go:embed data/llama3-tokenizer/special_tokens_map.json 44 //go:embed data/mistral-tokenizer/merges.json 45 //go:embed data/mistral-tokenizer/specials.txt 46 //go:embed data/mistral-tokenizer/tokenizer_config.json 47 //go:embed data/mistral-tokenizer/tokenizer.json 48 //go:embed data/mistral-tokenizer/special_tokens_map.json 49 //go:embed data/mistral-tokenizer/encoder.json 50 var f embed.FS 51 52 // GetEmbeddedResource 53 // Returns a ResourceEntry for the given resource name that is embedded in 54 // the binary. 55 func GetEmbeddedResource(path string) *ResourceEntry { 56 resourceFile, err := f.Open("data/" + path) 57 if err != nil { 58 return nil 59 } 60 resourceBytes, err := f.ReadFile("data/" + path) 61 if err != nil { 62 return nil 63 } 64 return &ResourceEntry{&resourceFile, &resourceBytes} 65 } 66 67 // EmbeddedDirExists 68 // Returns true if the given directory is embedded in the binary, otherwise 69 // false and an error. 70 func EmbeddedDirExists(path string) (bool, error) { 71 if _, err := f.ReadDir("data/" + path); err != nil { 72 return false, err 73 } else { 74 return true, nil 75 } 76 } 77 78 // FetchHTTP 79 // Fetch a resource from a remote HTTP server with bearer token auth. 80 func FetchHTTP(uri string, rsrc string, auth string) (io.ReadCloser, error) { 81 req, reqErr := http.NewRequest("GET", uri+"/"+rsrc, nil) 82 if reqErr != nil { 83 return nil, reqErr 84 } 85 if auth != "" { 86 req.Header.Add("Authorization", "Bearer "+auth) 87 } 88 resp, remoteErr := http.DefaultClient.Do(req) 89 if remoteErr != nil { 90 return nil, remoteErr 91 } 92 if resp.StatusCode != 200 { 93 return nil, fmt.Errorf("HTTP status code %d", 94 resp.StatusCode) 95 } 96 return resp.Body, nil 97 } 98 99 // SizeHTTP 100 // Get the size of a resource from a remote HTTP server with bearer token auth. 101 func SizeHTTP(uri string, rsrc string, auth string) (uint, error) { 102 req, reqErr := http.NewRequest("HEAD", uri+"/"+rsrc, nil) 103 if reqErr != nil { 104 return 0, reqErr 105 } 106 if auth != "" { 107 req.Header.Add("Authorization", "Bearer "+auth) 108 } 109 resp, remoteErr := http.DefaultClient.Do(req) 110 if remoteErr != nil { 111 return 0, remoteErr 112 } else if resp.StatusCode != 200 { 113 return 0, fmt.Errorf("HTTP status code %d", 114 resp.StatusCode) 115 } else { 116 size, _ := strconv.Atoi(resp.Header.Get("Content-Length")) 117 return uint(size), nil 118 } 119 }