github.com/ethersphere/bee/v2@v2.2.0/pkg/shed/example_store_test.go (about)

     1  // Copyright 2018 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package shed_test
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"encoding/binary"
    23  	"errors"
    24  	"fmt"
    25  	"log"
    26  	"time"
    27  
    28  	shed2 "github.com/ethersphere/bee/v2/pkg/shed"
    29  	"github.com/ethersphere/bee/v2/pkg/storage"
    30  	"github.com/ethersphere/bee/v2/pkg/storage/testing"
    31  	"github.com/ethersphere/bee/v2/pkg/swarm"
    32  	"github.com/syndtr/goleveldb/leveldb"
    33  )
    34  
    35  // Store holds fields and indexes (including their encoding functions)
    36  // and defines operations on them by composing data from them.
    37  // It is just an example without any support for parallel operations
    38  // or real world implementation.
    39  type Store struct {
    40  	db *shed2.DB
    41  
    42  	// fields and indexes
    43  	schemaName     shed2.StringField
    44  	accessCounter  shed2.Uint64Field
    45  	retrievalIndex shed2.Index
    46  	accessIndex    shed2.Index
    47  	gcIndex        shed2.Index
    48  }
    49  
    50  // New returns new Store. All fields and indexes are initialized
    51  // and possible conflicts with schema from existing database is checked
    52  // automatically.
    53  func New(path string) (s *Store, err error) {
    54  	db, err := shed2.NewDB(path, nil)
    55  	if err != nil {
    56  		return nil, err
    57  	}
    58  	s = &Store{
    59  		db: db,
    60  	}
    61  	// Identify current storage schema by arbitrary name.
    62  	s.schemaName, err = db.NewStringField("schema-name")
    63  	if err != nil {
    64  		return nil, err
    65  	}
    66  	// Global ever incrementing index of chunk accesses.
    67  	s.accessCounter, err = db.NewUint64Field("access-counter")
    68  	if err != nil {
    69  		return nil, err
    70  	}
    71  	// Index storing actual chunk address, data and store timestamp.
    72  	s.retrievalIndex, err = db.NewIndex("Address->StoreTimestamp|Data", shed2.IndexFuncs{
    73  		EncodeKey: func(fields shed2.Item) (key []byte, err error) {
    74  			return fields.Address, nil
    75  		},
    76  		DecodeKey: func(key []byte) (e shed2.Item, err error) {
    77  			e.Address = key
    78  			return e, nil
    79  		},
    80  		EncodeValue: func(fields shed2.Item) (value []byte, err error) {
    81  			b := make([]byte, 8)
    82  			binary.BigEndian.PutUint64(b, uint64(fields.StoreTimestamp))
    83  			value = append(b, fields.Data...)
    84  			return value, nil
    85  		},
    86  		DecodeValue: func(keyItem shed2.Item, value []byte) (e shed2.Item, err error) {
    87  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[:8]))
    88  			e.Data = value[8:]
    89  			return e, nil
    90  		},
    91  	})
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  	// Index storing access timestamp for a particular address.
    96  	// It is needed in order to update gc index keys for iteration order.
    97  	s.accessIndex, err = db.NewIndex("Address->AccessTimestamp", shed2.IndexFuncs{
    98  		EncodeKey: func(fields shed2.Item) (key []byte, err error) {
    99  			return fields.Address, nil
   100  		},
   101  		DecodeKey: func(key []byte) (e shed2.Item, err error) {
   102  			e.Address = key
   103  			return e, nil
   104  		},
   105  		EncodeValue: func(fields shed2.Item) (value []byte, err error) {
   106  			b := make([]byte, 8)
   107  			binary.BigEndian.PutUint64(b, uint64(fields.AccessTimestamp))
   108  			return b, nil
   109  		},
   110  		DecodeValue: func(keyItem shed2.Item, value []byte) (e shed2.Item, err error) {
   111  			e.AccessTimestamp = int64(binary.BigEndian.Uint64(value))
   112  			return e, nil
   113  		},
   114  	})
   115  	if err != nil {
   116  		return nil, err
   117  	}
   118  	// Index with keys ordered by access timestamp for garbage collection prioritization.
   119  	s.gcIndex, err = db.NewIndex("AccessTimestamp|StoredTimestamp|Address->nil", shed2.IndexFuncs{
   120  		EncodeKey: func(fields shed2.Item) (key []byte, err error) {
   121  			b := make([]byte, 16, 16+len(fields.Address))
   122  			binary.BigEndian.PutUint64(b[:8], uint64(fields.AccessTimestamp))
   123  			binary.BigEndian.PutUint64(b[8:16], uint64(fields.StoreTimestamp))
   124  			key = append(b, fields.Address...)
   125  			return key, nil
   126  		},
   127  		DecodeKey: func(key []byte) (e shed2.Item, err error) {
   128  			e.AccessTimestamp = int64(binary.BigEndian.Uint64(key[:8]))
   129  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[8:16]))
   130  			e.Address = key[16:]
   131  			return e, nil
   132  		},
   133  		EncodeValue: func(fields shed2.Item) (value []byte, err error) {
   134  			return nil, nil
   135  		},
   136  		DecodeValue: func(keyItem shed2.Item, value []byte) (e shed2.Item, err error) {
   137  			return e, nil
   138  		},
   139  	})
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  	return s, nil
   144  }
   145  
   146  // Put stores the chunk and sets it store timestamp.
   147  func (s *Store) Put(_ context.Context, ch swarm.Chunk) (err error) {
   148  	return s.retrievalIndex.Put(shed2.Item{
   149  		Address:        ch.Address().Bytes(),
   150  		Data:           ch.Data(),
   151  		StoreTimestamp: time.Now().UTC().UnixNano(),
   152  	})
   153  }
   154  
   155  // Get retrieves a chunk with the provided address.
   156  // It updates access and gc indexes by removing the previous
   157  // items from them and adding new items as keys of index entries
   158  // are changed.
   159  func (s *Store) Get(_ context.Context, addr swarm.Address) (c swarm.Chunk, err error) {
   160  	batch := new(leveldb.Batch)
   161  
   162  	// Get the chunk data and storage timestamp.
   163  	item, err := s.retrievalIndex.Get(shed2.Item{
   164  		Address: addr.Bytes(),
   165  	})
   166  	if err != nil {
   167  		if errors.Is(err, leveldb.ErrNotFound) {
   168  			return nil, storage.ErrNotFound
   169  		}
   170  		return nil, fmt.Errorf("retrieval index get: %w", err)
   171  	}
   172  
   173  	// Get the chunk access timestamp.
   174  	accessItem, err := s.accessIndex.Get(shed2.Item{
   175  		Address: addr.Bytes(),
   176  	})
   177  	switch {
   178  	case err == nil:
   179  		// Remove gc index entry if access timestamp is found.
   180  		err = s.gcIndex.DeleteInBatch(batch, shed2.Item{
   181  			Address:         item.Address,
   182  			StoreTimestamp:  accessItem.AccessTimestamp,
   183  			AccessTimestamp: item.StoreTimestamp,
   184  		})
   185  		if err != nil {
   186  			return nil, fmt.Errorf("gc index delete in batch: %w", err)
   187  		}
   188  	case errors.Is(err, leveldb.ErrNotFound):
   189  		// Access timestamp is not found. Do not do anything.
   190  		// This is the first get request.
   191  	default:
   192  		return nil, fmt.Errorf("access index get: %w", err)
   193  	}
   194  
   195  	// Specify new access timestamp
   196  	accessTimestamp := time.Now().UTC().UnixNano()
   197  
   198  	// Put new access timestamp in access index.
   199  	err = s.accessIndex.PutInBatch(batch, shed2.Item{
   200  		Address:         addr.Bytes(),
   201  		AccessTimestamp: accessTimestamp,
   202  	})
   203  	if err != nil {
   204  		return nil, fmt.Errorf("access index put in batch: %w", err)
   205  	}
   206  
   207  	// Put new access timestamp in gc index.
   208  	err = s.gcIndex.PutInBatch(batch, shed2.Item{
   209  		Address:         item.Address,
   210  		AccessTimestamp: accessTimestamp,
   211  		StoreTimestamp:  item.StoreTimestamp,
   212  	})
   213  	if err != nil {
   214  		return nil, fmt.Errorf("gc index put in batch: %w", err)
   215  	}
   216  
   217  	// Increment access counter.
   218  	// Currently this information is not used anywhere.
   219  	_, err = s.accessCounter.IncInBatch(batch)
   220  	if err != nil {
   221  		return nil, fmt.Errorf("access counter inc in batch: %w", err)
   222  	}
   223  
   224  	// Write the batch.
   225  	err = s.db.WriteBatch(batch)
   226  	if err != nil {
   227  		return nil, fmt.Errorf("write batch: %w", err)
   228  	}
   229  
   230  	// Return the chunk.
   231  	return swarm.NewChunk(swarm.NewAddress(item.Address), item.Data), nil
   232  }
   233  
   234  // CollectGarbage is an example of index iteration.
   235  // It provides no reliable garbage collection functionality.
   236  func (s *Store) CollectGarbage() (err error) {
   237  	const maxTrashSize = 100
   238  	maxRounds := 10 // arbitrary number, needs to be calculated
   239  
   240  	// Run a few gc rounds.
   241  	for roundCount := 0; roundCount < maxRounds; roundCount++ {
   242  		var garbageCount int
   243  		// New batch for a new cg round.
   244  		trash := new(leveldb.Batch)
   245  		// Iterate through all index items and break when needed.
   246  		err = s.gcIndex.Iterate(func(item shed2.Item) (stop bool, err error) {
   247  			// Remove the chunk.
   248  			err = s.retrievalIndex.DeleteInBatch(trash, item)
   249  			if err != nil {
   250  				return false, err
   251  			}
   252  			// Remove the element in gc index.
   253  			err = s.gcIndex.DeleteInBatch(trash, item)
   254  			if err != nil {
   255  				return false, err
   256  			}
   257  			// Remove the relation in access index.
   258  			err = s.accessIndex.DeleteInBatch(trash, item)
   259  			if err != nil {
   260  				return false, err
   261  			}
   262  			garbageCount++
   263  			if garbageCount >= maxTrashSize {
   264  				return true, nil
   265  			}
   266  			return false, nil
   267  		}, nil)
   268  		if err != nil {
   269  			return err
   270  		}
   271  		if garbageCount == 0 {
   272  			return nil
   273  		}
   274  		err = s.db.WriteBatch(trash)
   275  		if err != nil {
   276  			return err
   277  		}
   278  	}
   279  	return nil
   280  }
   281  
   282  // GetSchema is an example of retrieving the most simple
   283  // string from a database field.
   284  func (s *Store) GetSchema() (name string, err error) {
   285  	name, err = s.schemaName.Get()
   286  	if errors.Is(err, leveldb.ErrNotFound) {
   287  		return "", nil
   288  	}
   289  	return name, err
   290  }
   291  
   292  // PutSchema is an example of storing the most simple
   293  // string in a database field.
   294  func (s *Store) PutSchema(name string) (err error) {
   295  	return s.schemaName.Put(name)
   296  }
   297  
   298  // Close closes the underlying database.
   299  func (s *Store) Close() error {
   300  	return s.db.Close()
   301  }
   302  
   303  // Example_store constructs a simple storage implementation using shed package.
   304  func Example_store() {
   305  	s, err := New("")
   306  	if err != nil {
   307  		log.Fatal(err)
   308  	}
   309  	defer s.Close()
   310  
   311  	ch := testing.GenerateTestRandomChunk()
   312  	err = s.Put(context.Background(), ch)
   313  	if err != nil {
   314  		return
   315  	}
   316  
   317  	got, err := s.Get(context.Background(), ch.Address())
   318  	if err != nil {
   319  		return
   320  	}
   321  
   322  	fmt.Println(bytes.Equal(got.Data(), ch.Data()))
   323  
   324  	//Output: true
   325  }