github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/internal/chunkstore/chunkstore.go (about) 1 // Copyright 2022 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package chunkstore 6 7 import ( 8 "context" 9 "encoding/binary" 10 "errors" 11 "fmt" 12 "time" 13 14 "github.com/ethersphere/bee/v2/pkg/sharky" 15 "github.com/ethersphere/bee/v2/pkg/storage" 16 "github.com/ethersphere/bee/v2/pkg/storage/storageutil" 17 "github.com/ethersphere/bee/v2/pkg/swarm" 18 "golang.org/x/exp/slices" 19 ) 20 21 var ( 22 // errMarshalInvalidRetrievalIndexAddress is returned if the RetrievalIndexItem address is zero during marshaling. 23 errMarshalInvalidRetrievalIndexAddress = errors.New("marshal RetrievalIndexItem: address is zero") 24 // errMarshalInvalidRetrievalIndexLocation is returned if the RetrievalIndexItem location is invalid during marshaling. 25 errMarshalInvalidRetrievalIndexLocation = errors.New("marshal RetrievalIndexItem: location is invalid") 26 // errUnmarshalInvalidRetrievalIndexSize is returned during unmarshaling if the passed buffer is not the expected size. 27 errUnmarshalInvalidRetrievalIndexSize = errors.New("unmarshal RetrievalIndexItem: invalid size") 28 // errUnmarshalInvalidRetrievalIndexLocationBytes is returned during unmarshaling if the location buffer is invalid. 29 errUnmarshalInvalidRetrievalIndexLocationBytes = errors.New("unmarshal RetrievalIndexItem: invalid location bytes") 30 ) 31 32 const RetrievalIndexItemSize = swarm.HashSize + 8 + sharky.LocationSize + 4 33 34 var _ storage.Item = (*RetrievalIndexItem)(nil) 35 36 // Sharky provides an abstraction for the sharky.Store operations used in the 37 // chunkstore. This allows us to be more flexible in passing in the sharky instance 38 // to chunkstore. For eg, check the TxChunkStore implementation in this pkg. 39 type Sharky interface { 40 Read(context.Context, sharky.Location, []byte) error 41 Write(context.Context, []byte) (sharky.Location, error) 42 Release(context.Context, sharky.Location) error 43 } 44 45 func Get(ctx context.Context, r storage.Reader, s storage.Sharky, addr swarm.Address) (swarm.Chunk, error) { 46 rIdx := &RetrievalIndexItem{Address: addr} 47 err := r.Get(rIdx) 48 if err != nil { 49 return nil, fmt.Errorf("chunk store: failed reading retrievalIndex for address %s: %w", addr, err) 50 } 51 return readChunk(ctx, s, rIdx) 52 } 53 54 // helper to read chunk from retrievalIndex. 55 func readChunk(ctx context.Context, s storage.Sharky, rIdx *RetrievalIndexItem) (swarm.Chunk, error) { 56 buf := make([]byte, rIdx.Location.Length) 57 err := s.Read(ctx, rIdx.Location, buf) 58 if err != nil { 59 return nil, fmt.Errorf( 60 "chunk store: failed reading location: %v for chunk %s from sharky: %w", 61 rIdx.Location, rIdx.Address, err, 62 ) 63 } 64 65 return swarm.NewChunk(rIdx.Address, buf), nil 66 } 67 68 func Has(_ context.Context, r storage.Reader, addr swarm.Address) (bool, error) { 69 return r.Has(&RetrievalIndexItem{Address: addr}) 70 } 71 72 func Put(ctx context.Context, s storage.IndexStore, sh storage.Sharky, ch swarm.Chunk) error { 73 var ( 74 rIdx = &RetrievalIndexItem{Address: ch.Address()} 75 loc sharky.Location 76 ) 77 err := s.Get(rIdx) 78 switch { 79 case errors.Is(err, storage.ErrNotFound): 80 // if this is the first instance of this address, we should store the chunk 81 // in sharky and create the new indexes. 82 loc, err = sh.Write(ctx, ch.Data()) 83 if err != nil { 84 return fmt.Errorf("chunk store: write to sharky failed: %w", err) 85 } 86 rIdx.Location = loc 87 rIdx.Timestamp = uint64(time.Now().Unix()) 88 case err != nil: 89 return fmt.Errorf("chunk store: failed to read: %w", err) 90 } 91 92 rIdx.RefCnt++ 93 94 return s.Put(rIdx) 95 } 96 97 func Replace(ctx context.Context, s storage.IndexStore, sh storage.Sharky, ch swarm.Chunk) error { 98 rIdx := &RetrievalIndexItem{Address: ch.Address()} 99 err := s.Get(rIdx) 100 if err != nil { 101 return fmt.Errorf("chunk store: failed to read retrievalIndex for address %s: %w", ch.Address(), err) 102 } 103 104 err = sh.Release(ctx, rIdx.Location) 105 if err != nil { 106 return fmt.Errorf("chunkstore: failed to release sharky location: %w", err) 107 } 108 109 loc, err := sh.Write(ctx, ch.Data()) 110 if err != nil { 111 return fmt.Errorf("chunk store: write to sharky failed: %w", err) 112 } 113 rIdx.Location = loc 114 rIdx.Timestamp = uint64(time.Now().Unix()) 115 return s.Put(rIdx) 116 } 117 118 func Delete(ctx context.Context, s storage.IndexStore, sh storage.Sharky, addr swarm.Address) error { 119 rIdx := &RetrievalIndexItem{Address: addr} 120 err := s.Get(rIdx) 121 switch { 122 case errors.Is(err, storage.ErrNotFound): 123 return nil 124 case err != nil: 125 return fmt.Errorf("chunk store: failed to read retrievalIndex for address %s: %w", addr, err) 126 default: 127 rIdx.RefCnt-- 128 } 129 130 if rIdx.RefCnt > 0 { // If there are more references for this we don't delete it from sharky. 131 err = s.Put(rIdx) 132 if err != nil { 133 return fmt.Errorf("chunk store: failed updating retrievalIndex for address %s: %w", addr, err) 134 } 135 return nil 136 } 137 138 return errors.Join( 139 sh.Release(ctx, rIdx.Location), 140 s.Delete(rIdx), 141 ) 142 } 143 144 func Iterate(ctx context.Context, s storage.IndexStore, sh storage.Sharky, fn storage.IterateChunkFn) error { 145 return s.Iterate( 146 storage.Query{ 147 Factory: func() storage.Item { return new(RetrievalIndexItem) }, 148 }, 149 func(r storage.Result) (bool, error) { 150 ch, err := readChunk(ctx, sh, r.Entry.(*RetrievalIndexItem)) 151 if err != nil { 152 return true, err 153 } 154 return fn(ch) 155 }, 156 ) 157 } 158 159 func IterateChunkEntries(st storage.Reader, fn func(swarm.Address, uint32) (bool, error)) error { 160 return st.Iterate( 161 storage.Query{ 162 Factory: func() storage.Item { return new(RetrievalIndexItem) }, 163 }, 164 func(r storage.Result) (bool, error) { 165 item := r.Entry.(*RetrievalIndexItem) 166 addr := item.Address 167 return fn(addr, item.RefCnt) 168 }, 169 ) 170 } 171 172 type LocationResult struct { 173 Err error 174 Location sharky.Location 175 } 176 177 type IterateResult struct { 178 Err error 179 Item *RetrievalIndexItem 180 } 181 182 // IterateLocations iterates over entire retrieval index and plucks only sharky location. 183 func IterateLocations( 184 ctx context.Context, 185 st storage.Reader, 186 ) <-chan LocationResult { 187 188 locationResultC := make(chan LocationResult) 189 190 go func() { 191 defer close(locationResultC) 192 193 err := st.Iterate(storage.Query{ 194 Factory: func() storage.Item { return new(RetrievalIndexItem) }, 195 }, func(r storage.Result) (bool, error) { 196 entry := r.Entry.(*RetrievalIndexItem) 197 result := LocationResult{Location: entry.Location} 198 199 select { 200 case <-ctx.Done(): 201 return true, ctx.Err() 202 case locationResultC <- result: 203 } 204 205 return false, nil 206 }) 207 if err != nil { 208 result := LocationResult{Err: fmt.Errorf("iterate retrieval index error: %w", err)} 209 210 select { 211 case <-ctx.Done(): 212 case locationResultC <- result: 213 } 214 } 215 }() 216 217 return locationResultC 218 } 219 220 // Iterate iterates over entire retrieval index with a call back. 221 func IterateItems(st storage.Store, callBackFunc func(*RetrievalIndexItem) error) error { 222 return st.Iterate(storage.Query{ 223 Factory: func() storage.Item { return new(RetrievalIndexItem) }, 224 }, func(r storage.Result) (bool, error) { 225 entry := r.Entry.(*RetrievalIndexItem) 226 return false, callBackFunc(entry) 227 }) 228 } 229 230 // RetrievalIndexItem is the index which gives us the sharky location from the swarm.Address. 231 // The RefCnt stores the reference of each time a Put operation is issued on this Address. 232 type RetrievalIndexItem struct { 233 Address swarm.Address 234 Timestamp uint64 235 Location sharky.Location 236 RefCnt uint32 237 } 238 239 func (r *RetrievalIndexItem) ID() string { return r.Address.ByteString() } 240 241 func (RetrievalIndexItem) Namespace() string { return "retrievalIdx" } 242 243 // Stored in bytes as: 244 // |--Address(32)--|--Timestamp(8)--|--Location(7)--|--RefCnt(4)--| 245 func (r *RetrievalIndexItem) Marshal() ([]byte, error) { 246 if r.Address.IsZero() { 247 return nil, errMarshalInvalidRetrievalIndexAddress 248 } 249 250 buf := make([]byte, RetrievalIndexItemSize) 251 i := 0 252 253 locBuf, err := r.Location.MarshalBinary() 254 if err != nil { 255 return nil, errMarshalInvalidRetrievalIndexLocation 256 } 257 258 copy(buf[i:swarm.HashSize], r.Address.Bytes()) 259 i += swarm.HashSize 260 261 binary.LittleEndian.PutUint64(buf[i:i+8], r.Timestamp) 262 i += 8 263 264 copy(buf[i:i+sharky.LocationSize], locBuf) 265 i += sharky.LocationSize 266 267 binary.LittleEndian.PutUint32(buf[i:], r.RefCnt) 268 269 return buf, nil 270 } 271 272 func (r *RetrievalIndexItem) Unmarshal(buf []byte) error { 273 if len(buf) != RetrievalIndexItemSize { 274 return errUnmarshalInvalidRetrievalIndexSize 275 } 276 277 i := 0 278 ni := new(RetrievalIndexItem) 279 280 ni.Address = swarm.NewAddress(slices.Clone(buf[i : i+swarm.HashSize])) 281 i += swarm.HashSize 282 283 ni.Timestamp = binary.LittleEndian.Uint64(buf[i : i+8]) 284 i += 8 285 286 loc := new(sharky.Location) 287 if err := loc.UnmarshalBinary(buf[i : i+sharky.LocationSize]); err != nil { 288 return errUnmarshalInvalidRetrievalIndexLocationBytes 289 } 290 ni.Location = *loc 291 i += sharky.LocationSize 292 293 ni.RefCnt = binary.LittleEndian.Uint32(buf[i:]) 294 295 *r = *ni 296 return nil 297 } 298 299 func (r *RetrievalIndexItem) Clone() storage.Item { 300 if r == nil { 301 return nil 302 } 303 return &RetrievalIndexItem{ 304 Address: r.Address.Clone(), 305 Timestamp: r.Timestamp, 306 Location: r.Location, 307 RefCnt: r.RefCnt, 308 } 309 } 310 311 func (r RetrievalIndexItem) String() string { 312 return storageutil.JoinFields(r.Namespace(), r.ID()) 313 }