github.com/creachadair/ffs@v0.17.3/blob/store.go (about)

     1  // Copyright 2019 Michael J. Fromberger. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package blob implements an interface and support code for persistent storage
    16  // of opaque (untyped) binary blobs.
    17  //
    18  // # Summary
    19  //
    20  // A [Store] represents a collection of disjoint named key-value namespaces
    21  // backed by a shared pool of storage.  A store may further be partititioned
    22  // into named "substores", each of which manages its own collection of
    23  // keyspaces within its enclosing store. While stores and their keyspaces are
    24  // logically distinct, they are intended to represent partitions of a single
    25  // underlying storage layer.
    26  //
    27  // Keyspaces are either arbitrary ([KV]) or content-addressed ([CAS]).
    28  // Both types implement the common [KVCore] interface.
    29  // Arbitrary keyspaces allow writing of values under user-chosen keys ("Put"),
    30  // while content-addressed keyspaces write values under their content address
    31  // only ("CASPut"). An arbitrary keyspace can be converted into a content
    32  // addressed keyspace using [CASFromKV].
    33  //
    34  // # Implementation Notes
    35  //
    36  // The [Store] and [KV] interfaces defined here are intended to be
    37  // implementable on a variety of concrete substrates (files, databases,
    38  // key-value stores) in a straightforward manner.  The API of these types is
    39  // intended to support blobs of a "reasonable" size, where any individual blob
    40  // can be efficiently processed in memory without streaming or chunking.
    41  //
    42  // While in principle blobs of arbitrary size may be stored, an implementation
    43  // may reject "very large" blobs. Practically an implementation should try to
    44  // accept blobs on the order of (up to) ~100MIB, but may reject blobs much
    45  // larger than that.  This interface is intended to store data that is
    46  // partitioned at a higher level in the protocol, and may not be a good fit for
    47  // use cases that require large individual blobs.
    48  //
    49  // The [memstore] package provides an implementation suitable for use in
    50  // testing. The [filestore] package provides an implementation that uses files
    51  // and directories on a local filesystem. More interesting implementations
    52  // using other storage libraries can be found in other repositories.
    53  //
    54  // [memstore]: https://godoc.org/github.com/creachadair/ffs/blob/memstore
    55  // [filestore]: https://godoc.org/github.com/creachadair/ffs/storage/filestore
    56  package blob
    57  
    58  import (
    59  	"context"
    60  	"errors"
    61  	"iter"
    62  
    63  	"github.com/creachadair/mds/mapset"
    64  	"golang.org/x/crypto/blake2b"
    65  )
    66  
    67  // A Store represents a collection of key-value namespaces ("keyspaces")
    68  // identified by string labels. Each keyspace in a store is logically distinct;
    69  // the keys from one space are independent of the keys in another.
    70  //
    71  // Implementations of this interface must be safe for concurrent use by
    72  // multiple goroutines.
    73  //
    74  // The KV and CAS methods share a namespace, meaning that a KV and a CAS
    75  // derived from the same Store and using the same name must share the same
    76  // underlying key-value space.  In particular a Put to a KV or a CASPut to a
    77  // CAS must be visible to a Get or List from either.
    78  type Store interface {
    79  	// KV returns a key space on the store.
    80  	//
    81  	// Multiple calls to KV with the same name are not required to return
    82  	// exactly the same KV value, but should return values that will converge
    83  	// (eventually) to the same view of the storage.
    84  	KV(ctx context.Context, name string) (KV, error)
    85  
    86  	// CAS returns a content-addressed key space on the store.
    87  	//
    88  	// Multiple calls to CAS with the same name are not required to return
    89  	// exactly the same CAS value, but should return values that will converge
    90  	// (eventually) to the same view of the storage.
    91  	//
    92  	// Implementations of this method that do not require special handling are
    93  	// encouraged to use CASFromKV to derive a CAS from a KV.
    94  	CAS(ctx context.Context, name string) (CAS, error)
    95  
    96  	// Sub returns a new Store subordinate to the receiver (a "substore").
    97  	// A substore shares logical storage with its parent store, but keyspaces
    98  	// derived from the substore are distinct from keyspaces of the parent store
    99  	// or any other substores derived from it.
   100  	//
   101  	// Multiple calls to Sub with the same name are not required to return
   102  	// exactly the same [Store] value, but should return values that will
   103  	// converge (eventually) to the same view of the storage.
   104  	Sub(ctx context.Context, name string) (Store, error)
   105  }
   106  
   107  // Closer is an extension interface representing the ability to close and
   108  // release resources claimed by a storage component.
   109  type Closer interface {
   110  	Close(context.Context) error
   111  }
   112  
   113  // StoreCloser combines a [Store] with a Close method that settles state and
   114  // releases any resources from the store when it is no longer in use.
   115  type StoreCloser interface {
   116  	Store
   117  	Closer
   118  }
   119  
   120  // KVCore is the common interface shared by implementations of a key-value
   121  // namespace. Users will generally not use this interface directly; it is
   122  // included by reference in [KV] and [CAS].
   123  type KVCore interface {
   124  	// Get fetches the contents of a blob from the store. If the key is not
   125  	// found in the store, Get must report an ErrKeyNotFound error.
   126  	Get(ctx context.Context, key string) ([]byte, error)
   127  
   128  	// Has reports which of the specified keys are present in the store.
   129  	// The result set contains one entry for each requested key that is present
   130  	// in the store. If none of the requested keys is present, the resulting set
   131  	// may be either empty or nil.
   132  	Has(ctx context.Context, keys ...string) (KeySet, error)
   133  
   134  	// Delete atomically removes a blob from the store. If the key is not found
   135  	// in the store, Delete must report an ErrKeyNotFound error.
   136  	Delete(ctx context.Context, key string) error
   137  
   138  	// List returns an iterator over each key in the store greater than or equal
   139  	// to start, in lexicographic order.
   140  	//
   141  	// Requirements:
   142  	//
   143  	// Each pair reported by the iterator MUST be either a valid key and a nil
   144  	// error, or an empty key and a non-nil error.
   145  	//
   146  	// After the iterator reports an error, it MUST immediately return, even if
   147  	// the yield function reports true.
   148  	//
   149  	// The caller should check the error as part of iteration:
   150  	//
   151  	//  for key, err := range kv.List(ctx, start) {
   152  	//     if err != nil {
   153  	//        return fmt.Errorf("list: %w", err)
   154  	//     }
   155  	//     // ... process key
   156  	//  }
   157  	//
   158  	// It must be safe to call Get, Has, List, and Len during iteration.
   159  	// A caller should not attempt to modify the store while listing, unless the
   160  	// storage implementation documents that it is safe to do so.
   161  	List(ctx context.Context, start string) iter.Seq2[string, error]
   162  
   163  	// Len reports the number of keys currently in the store.
   164  	Len(ctx context.Context) (int64, error)
   165  }
   166  
   167  // A KV represents a mutable set of key-value pairs in which each value is
   168  // identified by a unique, opaque string key.  An implementation of KV is
   169  // permitted (but not required) to report an error from Put when given an empty
   170  // key.  If the implementation cannot store empty keys, it must report
   171  // [ErrKeyNotFound] when operating on an empty key (see [KeyNotFound]).
   172  //
   173  // Implementations of this interface must be safe for concurrent use by
   174  // multiple goroutines.  Moreover, any sequence of operations on a KV that does
   175  // not overlap with any Delete executions must be [linearizable].
   176  //
   177  // [linearizable]: https://en.wikipedia.org/wiki/Linearizability
   178  type KV interface {
   179  	KVCore
   180  
   181  	// Put writes a blob to the store. If the store already contains the
   182  	// specified key and opts.Replace is true, the existing value is replaced
   183  	// without error; otherwise Put must report an ErrKeyExists error without
   184  	// modifying the previous value..
   185  	Put(ctx context.Context, opts PutOptions) error
   186  }
   187  
   188  // CAS represents a mutable set of content-addressed key-value pairs in which
   189  // each value is identified by a unique, opaque string key.
   190  type CAS interface {
   191  	KVCore
   192  
   193  	// CASPut writes data to a content-addressed blob in the underlying store,
   194  	// and returns the assigned key. The target key is returned even in case of
   195  	// error.
   196  	CASPut(ctx context.Context, data []byte) (string, error)
   197  
   198  	// CASKey returns the content address of data without modifying the store.
   199  	// This must be the same value that would be returned by a successful call
   200  	// to CASPut on data.
   201  	CASKey(ctx context.Context, data []byte) string
   202  }
   203  
   204  // PutOptions regulate the behaviour of the Put method of a [KV]
   205  // implementation.
   206  type PutOptions struct {
   207  	Key     string // the key to associate with the data
   208  	Data    []byte // the data to write
   209  	Replace bool   // whether to replace an existing value for this key
   210  }
   211  
   212  // CASFromKV converts a [KV] into a [CAS]. This is intended for use by storage
   213  // implementations to support the CAS method of the [Store] interface.
   214  //
   215  // If the concrete type of kv already implements [CAS], it is returned as-is;
   216  // otherwise it is wrapped in an implementation that computes content addresses
   217  // using a [blake2b] digest of the content.
   218  //
   219  // [blake2b]: https://datatracker.ietf.org/doc/html/rfc7693
   220  func CASFromKV(kv KV) CAS {
   221  	if cas, ok := kv.(CAS); ok {
   222  		return cas
   223  	}
   224  	return hashCAS{kv}
   225  }
   226  
   227  // CASFromKVError converts a [KV] into a [CAS]. This is a convenience wrapper
   228  // to combine an error check with a call to [CASFromKV] for use in storage
   229  // implementations.
   230  func CASFromKVError(kv KV, err error) (CAS, error) {
   231  	if err != nil {
   232  		return nil, err
   233  	}
   234  	return CASFromKV(kv), nil
   235  }
   236  
   237  var (
   238  	// ErrKeyExists is reported by Put when writing a key that already exists in
   239  	// the store.
   240  	ErrKeyExists = errors.New("key already exists")
   241  
   242  	// ErrKeyNotFound is reported by Get or Size when given a key that does not
   243  	// exist in the store.
   244  	ErrKeyNotFound = errors.New("key not found")
   245  )
   246  
   247  // IsKeyNotFound reports whether err or is or wraps ErrKeyNotFound.
   248  // It is false if err == nil.
   249  func IsKeyNotFound(err error) bool {
   250  	return err != nil && errors.Is(err, ErrKeyNotFound)
   251  }
   252  
   253  // IsKeyExists reports whether err is or wraps ErrKeyExists.
   254  func IsKeyExists(err error) bool {
   255  	return err != nil && errors.Is(err, ErrKeyExists)
   256  }
   257  
   258  // KeyError is the concrete type of errors involving a blob key.
   259  // The caller may type-assert to [*KeyError] to recover the key.
   260  type KeyError struct {
   261  	Err error  // the underlying error
   262  	Key string // the key implicated by the error
   263  }
   264  
   265  // Error implements the error interface for KeyError.
   266  // The default error string does not include the key, since error values are
   267  // often logged by default and keys may be sensitive.
   268  func (k *KeyError) Error() string { return k.Err.Error() }
   269  
   270  // Unwrap returns the underlying error from k, to support error wrapping.
   271  func (k *KeyError) Unwrap() error { return k.Err }
   272  
   273  // KeyNotFound returns an ErrKeyNotFound error reporting that key was not found.
   274  // The concrete type is [*KeyError].
   275  func KeyNotFound(key string) error { return &KeyError{Key: key, Err: ErrKeyNotFound} }
   276  
   277  // KeyExists returns an ErrKeyExists error reporting that key exists in the store.
   278  // The concrete type is [*KeyError].
   279  func KeyExists(key string) error { return &KeyError{Key: key, Err: ErrKeyExists} }
   280  
   281  // KeySet represents a set of keys. It is aliased here so the caller does not
   282  // need to explicitly import [mapset].
   283  type KeySet = mapset.Set[string]
   284  
   285  // A HashCAS is a content-addressable wrapper that adds the CAS methods to a
   286  // delegated [KV].
   287  type hashCAS struct{ KV }
   288  
   289  // hash is the digest function used to compute content addresses for hashCAS.
   290  var hash = blake2b.Sum256
   291  
   292  // key computes the content key for data using the provided hash.
   293  func (c hashCAS) key(data []byte) string {
   294  	h := hash(data)
   295  	return string(h[:])
   296  }
   297  
   298  // CASPut writes data to a content-addressed blob in the underlying store, and
   299  // returns the assigned key. The target key is returned even in case of error.
   300  func (c hashCAS) CASPut(ctx context.Context, data []byte) (string, error) {
   301  	key := c.key(data)
   302  
   303  	// Skip writing if the content address is already present.
   304  	if st, err := c.Has(ctx, key); err == nil && st.Has(key) {
   305  		return key, nil
   306  	}
   307  
   308  	// Write the block to storage. Because we are using a content address we
   309  	// do not request replacement, but we also don't consider it an error if
   310  	// the address already exists.
   311  	err := c.Put(ctx, PutOptions{
   312  		Key:     key,
   313  		Data:    data,
   314  		Replace: false,
   315  	})
   316  	if IsKeyExists(err) {
   317  		err = nil
   318  	}
   319  	return key, err
   320  }
   321  
   322  // CASKey constructs the content address for the specified data.
   323  func (c hashCAS) CASKey(_ context.Context, data []byte) string { return c.key(data) }
   324  
   325  // SyncKeys reports which of the given keys are not present in the key space.
   326  // If all the keys are present, SyncKeys returns an empty [KeySet].
   327  func SyncKeys(ctx context.Context, ks KVCore, keys []string) (KeySet, error) {
   328  	if len(keys) == 0 {
   329  		return nil, nil
   330  	}
   331  	have, err := ks.Has(ctx, keys...)
   332  	if err != nil {
   333  		return nil, err
   334  	}
   335  	var missing KeySet
   336  	for _, key := range keys {
   337  		if !have.Has(key) {
   338  			missing.Add(key)
   339  		}
   340  	}
   341  	return missing, nil
   342  }