github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/mem_table.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package nbs 23 24 import ( 25 "context" 26 "errors" 27 "fmt" 28 "sort" 29 30 "golang.org/x/sync/errgroup" 31 32 "github.com/dolthub/dolt/go/store/chunks" 33 "github.com/dolthub/dolt/go/store/hash" 34 ) 35 36 func WriteChunks(chunks []chunks.Chunk) (string, []byte, error) { 37 var size uint64 38 for _, chunk := range chunks { 39 size += uint64(len(chunk.Data())) 40 } 41 42 mt := newMemTable(size) 43 44 return writeChunksToMT(mt, chunks) 45 } 46 47 func writeChunksToMT(mt *memTable, chunks []chunks.Chunk) (string, []byte, error) { 48 for _, chunk := range chunks { 49 if !mt.addChunk(addr(chunk.Hash()), chunk.Data()) { 50 return "", nil, errors.New("didn't create this memory table with enough space to add all the chunks") 51 } 52 } 53 54 var stats Stats 55 name, data, count, err := mt.write(nil, &stats) 56 57 if err != nil { 58 return "", nil, err 59 } 60 61 if count != uint32(len(chunks)) { 62 return "", nil, errors.New("didn't write everything") 63 } 64 65 return name.String(), data, nil 66 } 67 68 type memTable struct { 69 chunks map[addr][]byte 70 order []hasRecord // Must maintain the invariant that these are sorted by rec.order 71 maxData, totalData uint64 72 73 snapper snappyEncoder 74 } 75 76 func newMemTable(memTableSize uint64) *memTable { 77 return &memTable{chunks: map[addr][]byte{}, maxData: memTableSize} 78 } 79 80 func (mt *memTable) addChunk(h addr, data []byte) bool { 81 if len(data) == 0 { 82 panic("NBS blocks cannot be zero length") 83 } 84 if _, ok := mt.chunks[h]; ok { 85 return true 86 } 87 dataLen := uint64(len(data)) 88 if mt.totalData+dataLen > mt.maxData { 89 return false 90 } 91 mt.totalData += dataLen 92 mt.chunks[h] = data 93 mt.order = append(mt.order, hasRecord{ 94 &h, 95 h.Prefix(), 96 len(mt.order), 97 false, 98 }) 99 return true 100 } 101 102 func (mt *memTable) count() (uint32, error) { 103 return uint32(len(mt.order)), nil 104 } 105 106 func (mt *memTable) uncompressedLen() (uint64, error) { 107 return mt.totalData, nil 108 } 109 110 func (mt *memTable) has(h addr) (bool, error) { 111 _, has := mt.chunks[h] 112 return has, nil 113 } 114 115 func (mt *memTable) hasMany(addrs []hasRecord) (bool, error) { 116 var remaining bool 117 for i, addr := range addrs { 118 if addr.has { 119 continue 120 } 121 122 ok, err := mt.has(*addr.a) 123 124 if err != nil { 125 return false, err 126 } 127 128 if ok { 129 addrs[i].has = true 130 } else { 131 remaining = true 132 } 133 } 134 return remaining, nil 135 } 136 137 func (mt *memTable) get(ctx context.Context, h addr, stats *Stats) ([]byte, error) { 138 return mt.chunks[h], nil 139 } 140 141 func (mt *memTable) getMany(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(*chunks.Chunk), stats *Stats) (bool, error) { 142 var remaining bool 143 for _, r := range reqs { 144 data := mt.chunks[*r.a] 145 if data != nil { 146 c := chunks.NewChunkWithHash(hash.Hash(*r.a), data) 147 found(&c) 148 } else { 149 remaining = true 150 } 151 } 152 return remaining, nil 153 } 154 155 func (mt *memTable) getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(CompressedChunk), stats *Stats) (bool, error) { 156 var remaining bool 157 for _, r := range reqs { 158 data := mt.chunks[*r.a] 159 if data != nil { 160 c := chunks.NewChunkWithHash(hash.Hash(*r.a), data) 161 found(ChunkToCompressedChunk(c)) 162 } else { 163 remaining = true 164 } 165 } 166 167 return remaining, nil 168 } 169 170 func (mt *memTable) extract(ctx context.Context, chunks chan<- extractRecord) error { 171 for _, hrec := range mt.order { 172 chunks <- extractRecord{a: *hrec.a, data: mt.chunks[*hrec.a], err: nil} 173 } 174 175 return nil 176 } 177 178 func (mt *memTable) write(haver chunkReader, stats *Stats) (name addr, data []byte, count uint32, err error) { 179 numChunks := uint64(len(mt.order)) 180 if numChunks == 0 { 181 return addr{}, nil, 0, fmt.Errorf("mem table cannot write with zero chunks") 182 } 183 maxSize := maxTableSize(uint64(len(mt.order)), mt.totalData) 184 buff := make([]byte, maxSize) 185 tw := newTableWriter(buff, mt.snapper) 186 187 if haver != nil { 188 sort.Sort(hasRecordByPrefix(mt.order)) // hasMany() requires addresses to be sorted. 189 _, err := haver.hasMany(mt.order) 190 191 if err != nil { 192 return addr{}, nil, 0, err 193 } 194 195 sort.Sort(hasRecordByOrder(mt.order)) // restore "insertion" order for write 196 } 197 198 for _, addr := range mt.order { 199 if !addr.has { 200 h := addr.a 201 tw.addChunk(*h, mt.chunks[*h]) 202 count++ 203 } 204 } 205 tableSize, name, err := tw.finish() 206 207 if err != nil { 208 return addr{}, nil, 0, err 209 } 210 211 if count > 0 { 212 stats.BytesPerPersist.Sample(uint64(tableSize)) 213 stats.CompressedChunkBytesPerPersist.Sample(uint64(tw.totalCompressedData)) 214 stats.UncompressedChunkBytesPerPersist.Sample(uint64(tw.totalUncompressedData)) 215 stats.ChunksPerPersist.Sample(uint64(count)) 216 } 217 218 return name, buff[:tableSize], count, nil 219 } 220 221 func (mt *memTable) Close() error { 222 return nil 223 }