github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/resources/resources.go (about)

     1  //go:build !wasip1 && !js
     2  
     3  package resources
     4  
     5  import (
     6  	"embed"
     7  	"fmt"
     8  	"io"
     9  	"net/http"
    10  	"strconv"
    11  )
    12  
    13  //go:embed data/gpt2-tokenizer/encoder.json
    14  //go:embed data/gpt2-tokenizer/vocab.bpe
    15  //go:embed data/gpt2-tokenizer/unitrim.json
    16  //go:embed data/gpt2-tokenizer/specials.txt
    17  //go:embed data/pile-tokenizer/encoder.json
    18  //go:embed data/pile-tokenizer/vocab.bpe
    19  //go:embed data/pile-tokenizer/unitrim.json
    20  //go:embed data/pile-tokenizer/specials.txt
    21  //go:embed data/clip-tokenizer/encoder.json
    22  //go:embed data/clip-tokenizer/vocab.bpe
    23  //go:embed data/clip-tokenizer/unitrim.json
    24  //go:embed data/clip-tokenizer/specials.txt
    25  //go:embed data/clip-tokenizer/special_config.json
    26  //go:embed data/clip-tokenizer/special_tokens_map.json
    27  //go:embed data/nerdstash_v1-tokenizer/encoder.json
    28  //go:embed data/nerdstash_v1-tokenizer/merges.json
    29  //go:embed data/nerdstash_v1-tokenizer/specials.txt
    30  //go:embed data/nerdstash_v1-tokenizer/special_config.json
    31  //go:embed data/nerdstash_v2-tokenizer/encoder.json
    32  //go:embed data/nerdstash_v2-tokenizer/merges.json
    33  //go:embed data/nerdstash_v2-tokenizer/specials.txt
    34  //go:embed data/nerdstash_v2-tokenizer/special_config.json
    35  //go:embed data/llama-tokenizer/merges.json
    36  //go:embed data/llama-tokenizer/specials.txt
    37  //go:embed data/llama-tokenizer/tokenizer_config.json
    38  //go:embed data/llama-tokenizer/special_tokens_map.json
    39  //go:embed data/llama-tokenizer/encoder.json
    40  //go:embed data/llama3-tokenizer/config.json
    41  //go:embed data/llama3-tokenizer/tokenizer.json
    42  //go:embed data/llama3-tokenizer/tokenizer_config.json
    43  //go:embed data/llama3-tokenizer/special_tokens_map.json
    44  //go:embed data/mistral-tokenizer/merges.json
    45  //go:embed data/mistral-tokenizer/specials.txt
    46  //go:embed data/mistral-tokenizer/tokenizer_config.json
    47  //go:embed data/mistral-tokenizer/tokenizer.json
    48  //go:embed data/mistral-tokenizer/special_tokens_map.json
    49  //go:embed data/mistral-tokenizer/encoder.json
    50  var f embed.FS
    51  
    52  // GetEmbeddedResource
    53  // Returns a ResourceEntry for the given resource name that is embedded in
    54  // the binary.
    55  func GetEmbeddedResource(path string) *ResourceEntry {
    56  	resourceFile, err := f.Open("data/" + path)
    57  	if err != nil {
    58  		return nil
    59  	}
    60  	resourceBytes, err := f.ReadFile("data/" + path)
    61  	if err != nil {
    62  		return nil
    63  	}
    64  	return &ResourceEntry{&resourceFile, &resourceBytes}
    65  }
    66  
    67  // EmbeddedDirExists
    68  // Returns true if the given directory is embedded in the binary, otherwise
    69  // false and an error.
    70  func EmbeddedDirExists(path string) (bool, error) {
    71  	if _, err := f.ReadDir("data/" + path); err != nil {
    72  		return false, err
    73  	} else {
    74  		return true, nil
    75  	}
    76  }
    77  
    78  // FetchHTTP
    79  // Fetch a resource from a remote HTTP server with bearer token auth.
    80  func FetchHTTP(uri string, rsrc string, auth string) (io.ReadCloser, error) {
    81  	req, reqErr := http.NewRequest("GET", uri+"/"+rsrc, nil)
    82  	if reqErr != nil {
    83  		return nil, reqErr
    84  	}
    85  	if auth != "" {
    86  		req.Header.Add("Authorization", "Bearer "+auth)
    87  	}
    88  	resp, remoteErr := http.DefaultClient.Do(req)
    89  	if remoteErr != nil {
    90  		return nil, remoteErr
    91  	}
    92  	if resp.StatusCode != 200 {
    93  		return nil, fmt.Errorf("HTTP status code %d",
    94  			resp.StatusCode)
    95  	}
    96  	return resp.Body, nil
    97  }
    98  
    99  // SizeHTTP
   100  // Get the size of a resource from a remote HTTP server with bearer token auth.
   101  func SizeHTTP(uri string, rsrc string, auth string) (uint, error) {
   102  	req, reqErr := http.NewRequest("HEAD", uri+"/"+rsrc, nil)
   103  	if reqErr != nil {
   104  		return 0, reqErr
   105  	}
   106  	if auth != "" {
   107  		req.Header.Add("Authorization", "Bearer "+auth)
   108  	}
   109  	resp, remoteErr := http.DefaultClient.Do(req)
   110  	if remoteErr != nil {
   111  		return 0, remoteErr
   112  	} else if resp.StatusCode != 200 {
   113  		return 0, fmt.Errorf("HTTP status code %d",
   114  			resp.StatusCode)
   115  	} else {
   116  		size, _ := strconv.Atoi(resp.Header.Get("Content-Length"))
   117  		return uint(size), nil
   118  	}
   119  }