github.com/ethersphere/bee/v2@v2.2.0/pkg/file/splitter/internal/job.go (about) 1 // Copyright 2020 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package internal 6 7 import ( 8 "context" 9 "encoding/binary" 10 "errors" 11 "fmt" 12 13 "github.com/ethersphere/bee/v2/pkg/cac" 14 "github.com/ethersphere/bee/v2/pkg/encryption" 15 "github.com/ethersphere/bee/v2/pkg/file" 16 storage "github.com/ethersphere/bee/v2/pkg/storage" 17 "github.com/ethersphere/bee/v2/pkg/swarm" 18 ) 19 20 // maximum amount of file tree levels this file hasher component can handle 21 // (128 ^ (9 - 1)) * 4096 = 295147905179352825856 bytes 22 const levelBufferLimit = 9 23 24 // SimpleSplitterJob encapsulated a single splitter operation, accepting blockwise 25 // writes of data whose length is defined in advance. 26 // 27 // After the job is constructed, Write must be called with up to ChunkSize byte slices 28 // until the full data length has been written. The Sum should be called which will 29 // return the SwarmHash of the data. 30 // 31 // Called Sum before the last Write, or Write after Sum has been called, may result in 32 // error and will may result in undefined result. 33 type SimpleSplitterJob struct { 34 ctx context.Context 35 putter storage.Putter 36 spanLength int64 // target length of data 37 length int64 // number of bytes written to the data level of the hasher 38 sumCounts []int // number of sums performed, indexed per level 39 cursors []int // section write position, indexed per level 40 buffer []byte // keeps data and hashes, indexed by cursors 41 toEncrypt bool // to encryrpt the chunks or not 42 refSize int64 43 } 44 45 // NewSimpleSplitterJob creates a new SimpleSplitterJob. 46 // 47 // The spanLength is the length of the data that will be written. 48 func NewSimpleSplitterJob(ctx context.Context, putter storage.Putter, spanLength int64, toEncrypt bool) *SimpleSplitterJob { 49 hashSize := swarm.HashSize 50 refSize := int64(hashSize) 51 if toEncrypt { 52 refSize += encryption.KeyLength 53 } 54 55 return &SimpleSplitterJob{ 56 ctx: ctx, 57 putter: putter, 58 spanLength: spanLength, 59 sumCounts: make([]int, levelBufferLimit), 60 cursors: make([]int, levelBufferLimit), 61 buffer: make([]byte, swarm.ChunkWithSpanSize*levelBufferLimit*2), // double size as temp workaround for weak calculation of needed buffer space 62 toEncrypt: toEncrypt, 63 refSize: refSize, 64 } 65 } 66 67 // Write adds data to the file splitter. 68 func (j *SimpleSplitterJob) Write(b []byte) (int, error) { 69 if len(b) > swarm.ChunkSize { 70 return 0, fmt.Errorf("Write must be called with a maximum of %d bytes", swarm.ChunkSize) 71 } 72 j.length += int64(len(b)) 73 if j.length > j.spanLength { 74 return 0, errors.New("write past span length") 75 } 76 77 err := j.writeToLevel(0, b) 78 if err != nil { 79 return 0, err 80 } 81 if j.length == j.spanLength { 82 err := j.hashUnfinished() 83 if err != nil { 84 return 0, file.NewHashError(err) 85 } 86 err = j.moveDanglingChunk() 87 if err != nil { 88 return 0, file.NewHashError(err) 89 } 90 91 } 92 return len(b), nil 93 } 94 95 // Sum returns the Swarm hash of the data. 96 func (j *SimpleSplitterJob) Sum(b []byte) []byte { 97 return j.digest() 98 } 99 100 // writeToLevel writes to the data buffer on the specified level. 101 // It calls sum if chunk boundary is reached and recursively calls this function for 102 // the next level with the acquired bmt hash 103 // 104 // It adjusts the relevant levels' cursors accordingly. 105 func (s *SimpleSplitterJob) writeToLevel(lvl int, data []byte) error { 106 copy(s.buffer[s.cursors[lvl]:s.cursors[lvl]+len(data)], data) 107 s.cursors[lvl] += len(data) 108 if s.cursors[lvl]-s.cursors[lvl+1] == swarm.ChunkSize { 109 ref, err := s.sumLevel(lvl) 110 if err != nil { 111 return err 112 } 113 err = s.writeToLevel(lvl+1, ref) 114 if err != nil { 115 return err 116 } 117 s.cursors[lvl] = s.cursors[lvl+1] 118 } 119 return nil 120 } 121 122 // sumLevel calculates and returns the bmt sum of the last written data on the level. 123 // 124 // TODO: error handling on store write fail 125 func (s *SimpleSplitterJob) sumLevel(lvl int) ([]byte, error) { 126 s.sumCounts[lvl]++ 127 spanSize := file.Spans[lvl] * swarm.ChunkSize 128 span := (s.length-1)%spanSize + 1 129 130 var chunkData []byte 131 132 head := make([]byte, swarm.SpanSize) 133 binary.LittleEndian.PutUint64(head, uint64(span)) 134 tail := s.buffer[s.cursors[lvl+1]:s.cursors[lvl]] 135 chunkData = append(head, tail...) 136 c := chunkData 137 var encryptionKey encryption.Key 138 139 if s.toEncrypt { 140 var err error 141 c, encryptionKey, err = s.encryptChunkData(chunkData) 142 if err != nil { 143 return nil, err 144 } 145 } 146 147 ch, err := cac.NewWithDataSpan(c) 148 if err != nil { 149 return nil, err 150 } 151 152 err = s.putter.Put(s.ctx, ch) 153 if err != nil { 154 return nil, err 155 } 156 157 return append(ch.Address().Bytes(), encryptionKey...), nil 158 } 159 160 // digest returns the calculated digest after a Sum call. 161 // 162 // The hash returned is the hash in the first section index of the work buffer 163 // this will be the root hash when all recursive sums have completed. 164 // 165 // The method does not check that the final hash actually has been written, so 166 // timing is the responsibility of the caller. 167 func (s *SimpleSplitterJob) digest() []byte { 168 if s.toEncrypt { 169 return s.buffer[:swarm.SectionSize*2] 170 } else { 171 return s.buffer[:swarm.SectionSize] 172 } 173 } 174 175 // hashUnfinished hasher the remaining unhashed chunks at the end of each level if 176 // write doesn't end on a chunk boundary. 177 func (s *SimpleSplitterJob) hashUnfinished() error { 178 if s.length%swarm.ChunkSize != 0 { 179 ref, err := s.sumLevel(0) 180 if err != nil { 181 return err 182 } 183 copy(s.buffer[s.cursors[1]:], ref) 184 s.cursors[1] += len(ref) 185 s.cursors[0] = s.cursors[1] 186 } 187 return nil 188 } 189 190 // nolint:gofmt 191 // moveDanglingChunk concatenates the reference to the single reference 192 // at the highest level of the tree in case of a balanced tree. 193 // 194 // Let F be full chunks (disregarding branching factor) and S be single references 195 // in the following scenario: 196 // 197 // S 198 // F F 199 // F F F 200 // 201 // F F F F S 202 // 203 // The result will be: 204 // 205 // SS 206 // F F 207 // F F F 208 // 209 // F F F F 210 // 211 // After which the SS will be hashed to obtain the final root hash 212 func (s *SimpleSplitterJob) moveDanglingChunk() error { 213 // calculate the total number of levels needed to represent the data (including the data level) 214 targetLevel := file.Levels(s.length, swarm.SectionSize, swarm.Branches) 215 216 // sum every intermediate level and write to the level above it 217 for i := 1; i < targetLevel; i++ { 218 219 // and if there is a single reference outside a balanced tree on this level 220 // don't hash it again but pass it on to the next level 221 if s.sumCounts[i] > 0 { 222 // TODO: simplify if possible 223 if int64(s.sumCounts[i-1])-file.Spans[targetLevel-1-i] <= 1 { 224 s.cursors[i+1] = s.cursors[i] 225 s.cursors[i] = s.cursors[i-1] 226 continue 227 } 228 } 229 230 ref, err := s.sumLevel(i) 231 if err != nil { 232 return err 233 } 234 copy(s.buffer[s.cursors[i+1]:], ref) 235 s.cursors[i+1] += len(ref) 236 s.cursors[i] = s.cursors[i+1] 237 } 238 return nil 239 } 240 241 func (s *SimpleSplitterJob) encryptChunkData(chunkData []byte) ([]byte, encryption.Key, error) { 242 if len(chunkData) < 8 { 243 return nil, nil, fmt.Errorf("invalid data, min length 8 got %v", len(chunkData)) 244 } 245 246 key, encryptedSpan, encryptedData, err := s.encrypt(chunkData) 247 if err != nil { 248 return nil, nil, err 249 } 250 c := make([]byte, len(encryptedSpan)+len(encryptedData)) 251 copy(c[:8], encryptedSpan) 252 copy(c[8:], encryptedData) 253 return c, key, nil 254 } 255 256 func (s *SimpleSplitterJob) encrypt(chunkData []byte) (encryption.Key, []byte, []byte, error) { 257 key := encryption.GenerateRandomKey(encryption.KeyLength) 258 encryptedSpan, err := encryption.NewSpanEncryption(key).Encrypt(chunkData[:8]) 259 if err != nil { 260 return nil, nil, nil, err 261 } 262 encryptedData, err := encryption.NewDataEncryption(key).Encrypt(chunkData[8:]) 263 if err != nil { 264 return nil, nil, nil, err 265 } 266 return key, encryptedSpan, encryptedData, nil 267 }