github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/lib/library_test.go (about)

     1  package main
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"testing"
     7  )
     8  
     9  func ReadTestFile(path string) ([]byte, error) {
    10  	if testFile, err := os.Open(path); err != nil {
    11  		return nil, err
    12  	} else {
    13  		fileinfo, statErr := testFile.Stat()
    14  		if statErr != nil {
    15  			fmt.Println(err)
    16  			return nil, statErr
    17  		}
    18  		filesize := fileinfo.Size()
    19  		buffer := make([]byte, filesize)
    20  
    21  		_, readErr := testFile.Read(buffer)
    22  		if readErr != nil {
    23  			fmt.Println(err)
    24  			return nil, readErr
    25  		}
    26  
    27  		return buffer, nil
    28  	}
    29  }
    30  
    31  func BenchmarkTokenize(b *testing.B) {
    32  	b.StopTimer()
    33  	b.ResetTimer()
    34  	vocab := "gpt2-tokenizer"
    35  	wrapInitTokenizer(vocab)
    36  	path := "../resources/frankenstein.txt"
    37  	if corpus, err := ReadTestFile(path); err != nil {
    38  		fmt.Printf("%v\n", err)
    39  		b.Fail()
    40  	} else {
    41  		b.StartTimer()
    42  		duration, numTokens := testBuffer(vocab, corpus)
    43  		b.StopTimer()
    44  		tokensPerSecond := float64(numTokens) / duration.Seconds()
    45  		b.Logf("%d tokens generated at %0.2f per second over %vms",
    46  			numTokens, tokensPerSecond, duration.Milliseconds())
    47  	}
    48  }