github.com/ethersphere/bee/v2@v2.2.0/pkg/file/pipeline/hashtrie/hashtrie.go (about) 1 // Copyright 2020 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package hashtrie 6 7 import ( 8 "context" 9 "encoding/binary" 10 "errors" 11 "fmt" 12 13 "github.com/ethersphere/bee/v2/pkg/file/pipeline" 14 "github.com/ethersphere/bee/v2/pkg/file/redundancy" 15 "github.com/ethersphere/bee/v2/pkg/replicas" 16 "github.com/ethersphere/bee/v2/pkg/storage" 17 "github.com/ethersphere/bee/v2/pkg/swarm" 18 ) 19 20 var ( 21 errInconsistentRefs = errors.New("inconsistent references") 22 errTrieFull = errors.New("trie full") 23 ) 24 25 const maxLevel = 8 26 27 type hashTrieWriter struct { 28 ctx context.Context // context for put function of dispersed replica chunks 29 refSize int 30 cursors []int // level cursors, key is level. level 0 is data level holds how many chunks were processed. Intermediate higher levels will always have LOWER cursor values. 31 buffer []byte // keeps intermediate level data 32 full bool // indicates whether the trie is full. currently we support (128^7)*4096 = 2305843009213693952 bytes 33 pipelineFn pipeline.PipelineFunc 34 rParams redundancy.RedundancyParams 35 parityChunkFn redundancy.ParityChunkCallback 36 chunkCounters []uint8 // counts the chunk references in intermediate chunks. key is the chunk level. 37 effectiveChunkCounters []uint8 // counts the effective chunk references in intermediate chunks. key is the chunk level. 38 maxChildrenChunks uint8 // maximum number of chunk references in intermediate chunks. 39 replicaPutter storage.Putter // putter to save dispersed replicas of the root chunk 40 } 41 42 func NewHashTrieWriter( 43 ctx context.Context, 44 refLen int, 45 rParams redundancy.RedundancyParams, 46 pipelineFn pipeline.PipelineFunc, 47 replicaPutter storage.Putter, 48 ) pipeline.ChainWriter { 49 h := &hashTrieWriter{ 50 ctx: ctx, 51 refSize: refLen, 52 cursors: make([]int, 9), 53 buffer: make([]byte, swarm.ChunkWithSpanSize*9*2), // double size as temp workaround for weak calculation of needed buffer space 54 rParams: rParams, 55 pipelineFn: pipelineFn, 56 chunkCounters: make([]uint8, 9), 57 effectiveChunkCounters: make([]uint8, 9), 58 maxChildrenChunks: uint8(rParams.MaxShards() + rParams.Parities(rParams.MaxShards())), 59 replicaPutter: replicas.NewPutter(replicaPutter), 60 } 61 h.parityChunkFn = func(level int, span, address []byte) error { 62 return h.writeToIntermediateLevel(level, true, span, address, []byte{}) 63 } 64 65 return h 66 } 67 68 // accepts writes of hashes from the previous writer in the chain, by definition these writes 69 // are on level 1 70 func (h *hashTrieWriter) ChainWrite(p *pipeline.PipeWriteArgs) error { 71 oneRef := h.refSize + swarm.SpanSize 72 l := len(p.Span) + len(p.Ref) + len(p.Key) 73 if l%oneRef != 0 || l == 0 { 74 return errInconsistentRefs 75 } 76 if h.full { 77 return errTrieFull 78 } 79 if h.rParams.Level() == redundancy.NONE { 80 return h.writeToIntermediateLevel(1, false, p.Span, p.Ref, p.Key) 81 } else { 82 return h.writeToDataLevel(p.Span, p.Ref, p.Key, p.Data) 83 } 84 } 85 86 func (h *hashTrieWriter) writeToIntermediateLevel(level int, parityChunk bool, span, ref, key []byte) error { 87 copy(h.buffer[h.cursors[level]:h.cursors[level]+len(span)], span) 88 h.cursors[level] += len(span) 89 copy(h.buffer[h.cursors[level]:h.cursors[level]+len(ref)], ref) 90 h.cursors[level] += len(ref) 91 copy(h.buffer[h.cursors[level]:h.cursors[level]+len(key)], key) 92 h.cursors[level] += len(key) 93 94 // update counters 95 if !parityChunk { 96 h.effectiveChunkCounters[level]++ 97 } 98 h.chunkCounters[level]++ 99 if h.chunkCounters[level] == h.maxChildrenChunks { 100 // at this point the erasure coded chunks have been written 101 err := h.wrapFullLevel(level) 102 return err 103 } 104 return nil 105 } 106 107 // writeToDataLevel caches data chunks and call writeToIntermediateLevel 108 func (h *hashTrieWriter) writeToDataLevel(span, ref, key, data []byte) error { 109 // write dataChunks to the level above 110 err := h.writeToIntermediateLevel(1, false, span, ref, key) 111 if err != nil { 112 return err 113 } 114 115 return h.rParams.ChunkWrite(0, data, h.parityChunkFn) 116 } 117 118 // wrapLevel wraps an existing level and writes the resulting hash to the following level 119 // then truncates the current level data by shifting the cursors. 120 // Steps are performed in the following order: 121 // - take all of the data in the current level 122 // - break down span and hash data 123 // - sum the span size, concatenate the hash to the buffer 124 // - call the short pipeline with the span and the buffer 125 // - get the hash that was created, append it one level above, and if necessary, wrap that level too 126 // - remove already hashed data from buffer 127 // 128 // assumes that h.chunkCounters[level] has reached h.maxChildrenChunks at fullchunk 129 // or redundancy.Encode was called in case of rightmost chunks 130 func (h *hashTrieWriter) wrapFullLevel(level int) error { 131 data := h.buffer[h.cursors[level+1]:h.cursors[level]] 132 sp := uint64(0) 133 var hashes []byte 134 offset := 0 135 for i := uint8(0); i < h.effectiveChunkCounters[level]; i++ { 136 // sum up the spans of the level, then we need to bmt them and store it as a chunk 137 // then write the chunk address to the next level up 138 sp += binary.LittleEndian.Uint64(data[offset : offset+swarm.SpanSize]) 139 offset += +swarm.SpanSize 140 hash := data[offset : offset+h.refSize] 141 offset += h.refSize 142 hashes = append(hashes, hash...) 143 } 144 parities := 0 145 for offset < len(data) { 146 // we do not add span of parity chunks to the common because that is gibberish 147 offset += +swarm.SpanSize 148 hash := data[offset : offset+swarm.HashSize] // parity reference has always hash length 149 offset += swarm.HashSize 150 hashes = append(hashes, hash...) 151 parities++ 152 } 153 spb := make([]byte, 8) 154 binary.LittleEndian.PutUint64(spb, sp) 155 if parities > 0 { 156 redundancy.EncodeLevel(spb, h.rParams.Level()) 157 } 158 hashes = append(spb, hashes...) 159 writer := h.pipelineFn() 160 args := pipeline.PipeWriteArgs{ 161 Data: hashes, 162 Span: spb, 163 } 164 err := writer.ChainWrite(&args) 165 if err != nil { 166 return err 167 } 168 169 err = h.writeToIntermediateLevel(level+1, false, args.Span, args.Ref, args.Key) 170 if err != nil { 171 return err 172 } 173 174 err = h.rParams.ChunkWrite(level, args.Data, h.parityChunkFn) 175 if err != nil { 176 return err 177 } 178 179 // this "truncates" the current level that was wrapped 180 // by setting the cursors to the cursors of one level above 181 h.cursors[level] = h.cursors[level+1] 182 h.chunkCounters[level], h.effectiveChunkCounters[level] = 0, 0 183 if level+1 == 8 { 184 h.full = true 185 } 186 return nil 187 } 188 189 // Sum returns the Swarm merkle-root content-addressed hash 190 // of an arbitrary-length binary data. 191 // The algorithm it uses is as follows: 192 // - From level 1 till maxLevel 8, iterate: 193 // -- If level data length equals 0 then continue to next level 194 // -- If level data length equals 1 reference then carry over level data to next 195 // -- If level data length is bigger than 1 reference then sum the level and 196 // write the result to the next level 197 // - Return the hash in level 8 198 // 199 // the cases are as follows: 200 // - one hash in a given level, in which case we _do not_ perform a hashing operation, but just move 201 // the hash to the next level, potentially resulting in a level wrap 202 // - more than one hash, in which case we _do_ perform a hashing operation, appending the hash to 203 // the next level 204 func (h *hashTrieWriter) Sum() ([]byte, error) { 205 for i := 1; i < maxLevel; i++ { 206 l := h.chunkCounters[i] 207 switch { 208 case l == 0: 209 // level empty, continue to the next. 210 continue 211 case l == h.maxChildrenChunks: 212 // this case is possible and necessary due to the carry over 213 // in the next switch case statement. normal writes done 214 // through writeToLevel will automatically wrap a full level. 215 // erasure encoding call is not necessary since ElevateCarrierChunk solves that 216 err := h.wrapFullLevel(i) 217 if err != nil { 218 return nil, err 219 } 220 case l == 1: 221 // this cursor assignment basically means: 222 // take the hash|span|key from this level, and append it to 223 // the data of the next level. you may wonder how this works: 224 // every time we sum a level, the sum gets written into the next level 225 // and the level cursor gets set to the next level's cursor (see the 226 // truncating at the end of wrapFullLevel). there might (or not) be 227 // a hash at the next level, and the cursor of the next level is 228 // necessarily _smaller_ than the cursor of this level, so in fact what 229 // happens is that due to the shifting of the cursors, the data of this 230 // level will appear to be concatenated with the data of the next level. 231 // we therefore get a "carry-over" behavior between intermediate levels 232 // that might or might not have data. the eventual result is that the last 233 // hash generated will always be carried over to the last level (8), then returned. 234 h.cursors[i+1] = h.cursors[i] 235 // replace cached chunk to the level as well 236 err := h.rParams.ElevateCarrierChunk(i-1, h.parityChunkFn) 237 if err != nil { 238 return nil, err 239 } 240 // update counters, subtracting from current level is not necessary 241 h.effectiveChunkCounters[i+1]++ 242 h.chunkCounters[i+1]++ 243 default: 244 // call erasure encoding before writing the last chunk on the level 245 err := h.rParams.Encode(i-1, h.parityChunkFn) 246 if err != nil { 247 return nil, err 248 } 249 // more than 0 but smaller than chunk size - wrap the level to the one above it 250 err = h.wrapFullLevel(i) 251 if err != nil { 252 return nil, err 253 } 254 } 255 } 256 levelLen := h.chunkCounters[maxLevel] 257 if levelLen != 1 { 258 return nil, errInconsistentRefs 259 } 260 261 // return the hash in the highest level, that's all we need 262 data := h.buffer[0:h.cursors[maxLevel]] 263 rootHash := data[swarm.SpanSize:] 264 265 // save disperse replicas of the root chunk 266 if h.rParams.Level() != redundancy.NONE { 267 rootData, err := h.rParams.GetRootData() 268 if err != nil { 269 return nil, err 270 } 271 err = h.replicaPutter.Put(h.ctx, swarm.NewChunk(swarm.NewAddress(rootHash[:swarm.HashSize]), rootData)) 272 if err != nil { 273 return nil, fmt.Errorf("hashtrie: cannot put dispersed replica %s", err.Error()) 274 } 275 } 276 return rootHash, nil 277 }