github.com/scottcagno/storage@v1.8.0/pkg/lsmt/sstable/ss-table-index.go (about) 1 package sstable 2 3 import ( 4 "fmt" 5 "github.com/scottcagno/storage/pkg/lsmt/binary" 6 "github.com/scottcagno/storage/pkg/lsmt/trees/rbtree" 7 "io" 8 "math" 9 "os" 10 "path/filepath" 11 "strconv" 12 ) 13 14 func IndexFileNameFromIndex(index int64) string { 15 hexa := strconv.FormatInt(index, 16) 16 return fmt.Sprintf("%s%010s%s", filePrefix, hexa, indexFileSuffix) 17 } 18 19 func IndexFromIndexFileName(name string) (int64, error) { 20 hexa := name[len(filePrefix) : len(name)-len(indexFileSuffix)] 21 return strconv.ParseInt(hexa, 16, 32) 22 } 23 24 type SSTIndex struct { 25 path string 26 file *os.File 27 open bool 28 first string 29 last string 30 data []*binary.Index 31 } 32 33 func OpenSSTIndex(base string, index int64) (*SSTIndex, error) { 34 // make sure we are working with absolute paths 35 base, err := filepath.Abs(base) 36 if err != nil { 37 return nil, err 38 } 39 // sanitize any path separators 40 base = filepath.ToSlash(base) 41 // create new gindex file path 42 path := filepath.Join(base, IndexFileNameFromIndex(index)) 43 // open (or create) gindex file 44 file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0666) 45 if err != nil { 46 return nil, err 47 } 48 // init and return SSTIndex 49 ssi := &SSTIndex{ 50 path: path, 51 file: file, 52 open: true, 53 } 54 // load sst data gindex info 55 err = ssi.LoadSSIndexData() 56 if err != nil { 57 return nil, err 58 } 59 return ssi, nil 60 } 61 62 func (ssi *SSTIndex) LoadSSIndexData() error { 63 // check to make sure file exists 64 _, err := os.Stat(ssi.path) 65 if os.IsNotExist(err) { 66 return err 67 } 68 // open file to read header 69 fd, err := os.OpenFile(ssi.path, os.O_RDONLY, 0666) 70 if err != nil { 71 return err 72 } 73 // read and decode gindex entries 74 for { 75 // decode next gindex entry 76 i, err := binary.DecodeIndex(fd) 77 if err != nil { 78 if err == io.EOF || err == io.ErrUnexpectedEOF { 79 break 80 } 81 // make sure we close! 82 err = fd.Close() 83 if err != nil { 84 return err 85 } 86 return err 87 } 88 // add gindex entry to sst gindex 89 ssi.data = append(ssi.data, i) 90 } 91 // make sure we close! 92 err = fd.Close() 93 if err != nil { 94 return err 95 } 96 // update sst first and last and then return 97 if len(ssi.data) > 0 { 98 ssi.first = string(ssi.data[0].Key) 99 ssi.last = string(ssi.data[len(ssi.data)-1].Key) 100 } 101 return nil 102 } 103 104 func (ssi *SSTIndex) errorCheckFileAndIndex() error { 105 // make sure file is not closed 106 if !ssi.open { 107 return binary.ErrFileClosed 108 } 109 // make sure gindex is loaded 110 if ssi.data == nil { 111 err := ssi.LoadSSIndexData() 112 if err != nil { 113 return err 114 } 115 } 116 return nil 117 } 118 119 func (ssi *SSTIndex) Write(key []byte, offset int64) error { 120 // error check 121 err := ssi.errorCheckFileAndIndex() 122 if err != nil { 123 return err 124 } 125 // create new gindex 126 i := &binary.Index{Key: key, Offset: offset} 127 // write entry info to gindex file 128 _, err = binary.EncodeIndex(ssi.file, i) 129 if err != nil { 130 return err 131 } 132 // add to gindex 133 ssi.data = append(ssi.data, i) 134 // check last 135 last := len(ssi.data) - 1 136 if ssi.last != string(ssi.data[last].Key) { 137 ssi.last = string(ssi.data[last].Key) 138 } 139 return nil 140 } 141 142 func (ssi *SSTIndex) searchDataIndex(key string) int { 143 // declare for later 144 i, j := 0, len(ssi.data) 145 // otherwise, perform binary search 146 for i < j { 147 h := i + (j-i)/2 148 if key >= string(ssi.data[h].Key) { 149 i = h + 1 150 } else { 151 j = h 152 } 153 } 154 return i - 1 155 } 156 157 func (ssi *SSTIndex) Find(key string) (*binary.Index, error) { 158 // error check 159 err := ssi.errorCheckFileAndIndex() 160 if err != nil { 161 return nil, err 162 } 163 // attempt to find key 164 at := ssi.searchDataIndex(key) 165 if at == -1 { 166 return nil, ErrSSTIndexNotFound 167 } 168 // check gindex for entry offset 169 i := ssi.data[at] 170 if i == nil || i.Offset == -1 { 171 return nil, ErrSSTIndexNotFound 172 } 173 // return data entry 174 return i, nil 175 } 176 177 func (ssi *SSTIndex) Scan(iter func(k string, off int64) bool) { 178 for n := range ssi.data { 179 i := ssi.data[n] 180 if !iter(string(i.Key), i.Offset) { 181 continue 182 } 183 } 184 } 185 186 func calculateSparseRatio(n int64) int64 { 187 if n < 1 { 188 return 0 189 } 190 if n == 1 { 191 n++ 192 } 193 return int64(math.Log2(float64(n))) 194 } 195 196 func (ssi *SSTIndex) GenerateAndGetSparseIndex() ([]*binary.Index, error) { 197 if !ssi.open { 198 return nil, binary.ErrFileClosed 199 } 200 var sparseSet []*binary.Index 201 count := int64(len(ssi.data)) 202 ratio := calculateSparseRatio(count) 203 for i := int64(0); i < count; i++ { 204 if i%(count/ratio) == 0 { 205 sparseSet = append(sparseSet, ssi.data[i]) 206 } 207 } 208 return sparseSet, nil 209 } 210 211 func (ssi *SSTIndex) GenerateAndPutSparseIndex(sparseIndex *rbtree.RBTree) error { 212 if !ssi.open { 213 return binary.ErrFileClosed 214 } 215 index, err := ssi.GetIndexNumber() 216 if err != nil { 217 return err 218 } 219 count := int64(len(ssi.data)) 220 ratio := calculateSparseRatio(count) 221 for i := int64(0); i < count; i++ { 222 if i%(count/ratio) == 0 { 223 sparseIndex.Put(spiEntry{ 224 Key: string(ssi.data[i].Key), 225 SSTIndex: index, 226 IndexEntry: ssi.data[i], 227 }) 228 } 229 } 230 return nil 231 } 232 233 func (ssi *SSTIndex) GetIndexNumber() (int64, error) { 234 index, err := IndexFromIndexFileName(filepath.Base(ssi.file.Name())) 235 if err != nil { 236 return -1, err 237 } 238 return index, nil 239 } 240 241 func (ssi *SSTIndex) Len() int { 242 return len(ssi.data) 243 } 244 245 func (ssi *SSTIndex) Close() error { 246 if !ssi.open { 247 return nil 248 } 249 err := ssi.file.Sync() 250 if err != nil { 251 return err 252 } 253 err = ssi.file.Close() 254 if err != nil { 255 return err 256 } 257 ssi.open = false 258 return nil 259 }