github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/lib/library_test.go (about) 1 package main 2 3 import ( 4 "fmt" 5 "os" 6 "testing" 7 ) 8 9 func ReadTestFile(path string) ([]byte, error) { 10 if testFile, err := os.Open(path); err != nil { 11 return nil, err 12 } else { 13 fileinfo, statErr := testFile.Stat() 14 if statErr != nil { 15 fmt.Println(err) 16 return nil, statErr 17 } 18 filesize := fileinfo.Size() 19 buffer := make([]byte, filesize) 20 21 _, readErr := testFile.Read(buffer) 22 if readErr != nil { 23 fmt.Println(err) 24 return nil, readErr 25 } 26 27 return buffer, nil 28 } 29 } 30 31 func BenchmarkTokenize(b *testing.B) { 32 b.StopTimer() 33 b.ResetTimer() 34 vocab := "gpt2-tokenizer" 35 wrapInitTokenizer(vocab) 36 path := "../resources/frankenstein.txt" 37 if corpus, err := ReadTestFile(path); err != nil { 38 fmt.Printf("%v\n", err) 39 b.Fail() 40 } else { 41 b.StartTimer() 42 duration, numTokens := testBuffer(vocab, corpus) 43 b.StopTimer() 44 tokensPerSecond := float64(numTokens) / duration.Seconds() 45 b.Logf("%d tokens generated at %0.2f per second over %vms", 46 numTokens, tokensPerSecond, duration.Milliseconds()) 47 } 48 }