github.com/lazin/go-ngram@v0.0.0-20160527144230-80eaf16ac4eb/spool.go (about) 1 package ngram 2 3 import ( 4 "bytes" 5 "errors" 6 "sync" 7 8 "github.com/cespare/go-smaz" 9 ) 10 11 type region struct { 12 begin int 13 end int 14 } 15 16 // string pool data structure 17 type stringPool struct { 18 items []region 19 buffer bytes.Buffer 20 21 sync.RWMutex 22 } 23 24 // Append adds new string to string pool. Function returns token ID and error. 25 // Strings doesn't need to be unique 26 func (pool *stringPool) Append(s string) (TokenID, error) { 27 begin := pool.buffer.Len() 28 bstr := []byte(s) 29 bstr = smaz.Compress(bstr) 30 n, error := pool.buffer.Write(bstr) 31 if error != nil { 32 return 0, error 33 } 34 end := begin + n 35 pool.Lock() 36 ixitem := TokenID(len(pool.items)) 37 pool.items = append(pool.items, region{begin: begin, end: end}) 38 pool.Unlock() 39 return ixitem, nil 40 } 41 42 // ReadAt converts token ID back to string. 43 func (pool *stringPool) ReadAt(index TokenID) (string, error) { 44 if index < TokenID(0) || index >= TokenID(len(pool.items)) { 45 return "", errors.New("index out of range") 46 } 47 pool.RLock() 48 item := pool.items[int(index)] 49 pool.RUnlock() 50 compressed := pool.buffer.Bytes()[item.begin:item.end] 51 decompressed, error := smaz.Decompress(compressed) 52 if error != nil { 53 return "", error 54 } 55 return string(decompressed), nil 56 }