github.com/creachadair/ffs@v0.17.3/blob/store.go (about) 1 // Copyright 2019 Michael J. Fromberger. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package blob implements an interface and support code for persistent storage 16 // of opaque (untyped) binary blobs. 17 // 18 // # Summary 19 // 20 // A [Store] represents a collection of disjoint named key-value namespaces 21 // backed by a shared pool of storage. A store may further be partititioned 22 // into named "substores", each of which manages its own collection of 23 // keyspaces within its enclosing store. While stores and their keyspaces are 24 // logically distinct, they are intended to represent partitions of a single 25 // underlying storage layer. 26 // 27 // Keyspaces are either arbitrary ([KV]) or content-addressed ([CAS]). 28 // Both types implement the common [KVCore] interface. 29 // Arbitrary keyspaces allow writing of values under user-chosen keys ("Put"), 30 // while content-addressed keyspaces write values under their content address 31 // only ("CASPut"). An arbitrary keyspace can be converted into a content 32 // addressed keyspace using [CASFromKV]. 33 // 34 // # Implementation Notes 35 // 36 // The [Store] and [KV] interfaces defined here are intended to be 37 // implementable on a variety of concrete substrates (files, databases, 38 // key-value stores) in a straightforward manner. The API of these types is 39 // intended to support blobs of a "reasonable" size, where any individual blob 40 // can be efficiently processed in memory without streaming or chunking. 41 // 42 // While in principle blobs of arbitrary size may be stored, an implementation 43 // may reject "very large" blobs. Practically an implementation should try to 44 // accept blobs on the order of (up to) ~100MIB, but may reject blobs much 45 // larger than that. This interface is intended to store data that is 46 // partitioned at a higher level in the protocol, and may not be a good fit for 47 // use cases that require large individual blobs. 48 // 49 // The [memstore] package provides an implementation suitable for use in 50 // testing. The [filestore] package provides an implementation that uses files 51 // and directories on a local filesystem. More interesting implementations 52 // using other storage libraries can be found in other repositories. 53 // 54 // [memstore]: https://godoc.org/github.com/creachadair/ffs/blob/memstore 55 // [filestore]: https://godoc.org/github.com/creachadair/ffs/storage/filestore 56 package blob 57 58 import ( 59 "context" 60 "errors" 61 "iter" 62 63 "github.com/creachadair/mds/mapset" 64 "golang.org/x/crypto/blake2b" 65 ) 66 67 // A Store represents a collection of key-value namespaces ("keyspaces") 68 // identified by string labels. Each keyspace in a store is logically distinct; 69 // the keys from one space are independent of the keys in another. 70 // 71 // Implementations of this interface must be safe for concurrent use by 72 // multiple goroutines. 73 // 74 // The KV and CAS methods share a namespace, meaning that a KV and a CAS 75 // derived from the same Store and using the same name must share the same 76 // underlying key-value space. In particular a Put to a KV or a CASPut to a 77 // CAS must be visible to a Get or List from either. 78 type Store interface { 79 // KV returns a key space on the store. 80 // 81 // Multiple calls to KV with the same name are not required to return 82 // exactly the same KV value, but should return values that will converge 83 // (eventually) to the same view of the storage. 84 KV(ctx context.Context, name string) (KV, error) 85 86 // CAS returns a content-addressed key space on the store. 87 // 88 // Multiple calls to CAS with the same name are not required to return 89 // exactly the same CAS value, but should return values that will converge 90 // (eventually) to the same view of the storage. 91 // 92 // Implementations of this method that do not require special handling are 93 // encouraged to use CASFromKV to derive a CAS from a KV. 94 CAS(ctx context.Context, name string) (CAS, error) 95 96 // Sub returns a new Store subordinate to the receiver (a "substore"). 97 // A substore shares logical storage with its parent store, but keyspaces 98 // derived from the substore are distinct from keyspaces of the parent store 99 // or any other substores derived from it. 100 // 101 // Multiple calls to Sub with the same name are not required to return 102 // exactly the same [Store] value, but should return values that will 103 // converge (eventually) to the same view of the storage. 104 Sub(ctx context.Context, name string) (Store, error) 105 } 106 107 // Closer is an extension interface representing the ability to close and 108 // release resources claimed by a storage component. 109 type Closer interface { 110 Close(context.Context) error 111 } 112 113 // StoreCloser combines a [Store] with a Close method that settles state and 114 // releases any resources from the store when it is no longer in use. 115 type StoreCloser interface { 116 Store 117 Closer 118 } 119 120 // KVCore is the common interface shared by implementations of a key-value 121 // namespace. Users will generally not use this interface directly; it is 122 // included by reference in [KV] and [CAS]. 123 type KVCore interface { 124 // Get fetches the contents of a blob from the store. If the key is not 125 // found in the store, Get must report an ErrKeyNotFound error. 126 Get(ctx context.Context, key string) ([]byte, error) 127 128 // Has reports which of the specified keys are present in the store. 129 // The result set contains one entry for each requested key that is present 130 // in the store. If none of the requested keys is present, the resulting set 131 // may be either empty or nil. 132 Has(ctx context.Context, keys ...string) (KeySet, error) 133 134 // Delete atomically removes a blob from the store. If the key is not found 135 // in the store, Delete must report an ErrKeyNotFound error. 136 Delete(ctx context.Context, key string) error 137 138 // List returns an iterator over each key in the store greater than or equal 139 // to start, in lexicographic order. 140 // 141 // Requirements: 142 // 143 // Each pair reported by the iterator MUST be either a valid key and a nil 144 // error, or an empty key and a non-nil error. 145 // 146 // After the iterator reports an error, it MUST immediately return, even if 147 // the yield function reports true. 148 // 149 // The caller should check the error as part of iteration: 150 // 151 // for key, err := range kv.List(ctx, start) { 152 // if err != nil { 153 // return fmt.Errorf("list: %w", err) 154 // } 155 // // ... process key 156 // } 157 // 158 // It must be safe to call Get, Has, List, and Len during iteration. 159 // A caller should not attempt to modify the store while listing, unless the 160 // storage implementation documents that it is safe to do so. 161 List(ctx context.Context, start string) iter.Seq2[string, error] 162 163 // Len reports the number of keys currently in the store. 164 Len(ctx context.Context) (int64, error) 165 } 166 167 // A KV represents a mutable set of key-value pairs in which each value is 168 // identified by a unique, opaque string key. An implementation of KV is 169 // permitted (but not required) to report an error from Put when given an empty 170 // key. If the implementation cannot store empty keys, it must report 171 // [ErrKeyNotFound] when operating on an empty key (see [KeyNotFound]). 172 // 173 // Implementations of this interface must be safe for concurrent use by 174 // multiple goroutines. Moreover, any sequence of operations on a KV that does 175 // not overlap with any Delete executions must be [linearizable]. 176 // 177 // [linearizable]: https://en.wikipedia.org/wiki/Linearizability 178 type KV interface { 179 KVCore 180 181 // Put writes a blob to the store. If the store already contains the 182 // specified key and opts.Replace is true, the existing value is replaced 183 // without error; otherwise Put must report an ErrKeyExists error without 184 // modifying the previous value.. 185 Put(ctx context.Context, opts PutOptions) error 186 } 187 188 // CAS represents a mutable set of content-addressed key-value pairs in which 189 // each value is identified by a unique, opaque string key. 190 type CAS interface { 191 KVCore 192 193 // CASPut writes data to a content-addressed blob in the underlying store, 194 // and returns the assigned key. The target key is returned even in case of 195 // error. 196 CASPut(ctx context.Context, data []byte) (string, error) 197 198 // CASKey returns the content address of data without modifying the store. 199 // This must be the same value that would be returned by a successful call 200 // to CASPut on data. 201 CASKey(ctx context.Context, data []byte) string 202 } 203 204 // PutOptions regulate the behaviour of the Put method of a [KV] 205 // implementation. 206 type PutOptions struct { 207 Key string // the key to associate with the data 208 Data []byte // the data to write 209 Replace bool // whether to replace an existing value for this key 210 } 211 212 // CASFromKV converts a [KV] into a [CAS]. This is intended for use by storage 213 // implementations to support the CAS method of the [Store] interface. 214 // 215 // If the concrete type of kv already implements [CAS], it is returned as-is; 216 // otherwise it is wrapped in an implementation that computes content addresses 217 // using a [blake2b] digest of the content. 218 // 219 // [blake2b]: https://datatracker.ietf.org/doc/html/rfc7693 220 func CASFromKV(kv KV) CAS { 221 if cas, ok := kv.(CAS); ok { 222 return cas 223 } 224 return hashCAS{kv} 225 } 226 227 // CASFromKVError converts a [KV] into a [CAS]. This is a convenience wrapper 228 // to combine an error check with a call to [CASFromKV] for use in storage 229 // implementations. 230 func CASFromKVError(kv KV, err error) (CAS, error) { 231 if err != nil { 232 return nil, err 233 } 234 return CASFromKV(kv), nil 235 } 236 237 var ( 238 // ErrKeyExists is reported by Put when writing a key that already exists in 239 // the store. 240 ErrKeyExists = errors.New("key already exists") 241 242 // ErrKeyNotFound is reported by Get or Size when given a key that does not 243 // exist in the store. 244 ErrKeyNotFound = errors.New("key not found") 245 ) 246 247 // IsKeyNotFound reports whether err or is or wraps ErrKeyNotFound. 248 // It is false if err == nil. 249 func IsKeyNotFound(err error) bool { 250 return err != nil && errors.Is(err, ErrKeyNotFound) 251 } 252 253 // IsKeyExists reports whether err is or wraps ErrKeyExists. 254 func IsKeyExists(err error) bool { 255 return err != nil && errors.Is(err, ErrKeyExists) 256 } 257 258 // KeyError is the concrete type of errors involving a blob key. 259 // The caller may type-assert to [*KeyError] to recover the key. 260 type KeyError struct { 261 Err error // the underlying error 262 Key string // the key implicated by the error 263 } 264 265 // Error implements the error interface for KeyError. 266 // The default error string does not include the key, since error values are 267 // often logged by default and keys may be sensitive. 268 func (k *KeyError) Error() string { return k.Err.Error() } 269 270 // Unwrap returns the underlying error from k, to support error wrapping. 271 func (k *KeyError) Unwrap() error { return k.Err } 272 273 // KeyNotFound returns an ErrKeyNotFound error reporting that key was not found. 274 // The concrete type is [*KeyError]. 275 func KeyNotFound(key string) error { return &KeyError{Key: key, Err: ErrKeyNotFound} } 276 277 // KeyExists returns an ErrKeyExists error reporting that key exists in the store. 278 // The concrete type is [*KeyError]. 279 func KeyExists(key string) error { return &KeyError{Key: key, Err: ErrKeyExists} } 280 281 // KeySet represents a set of keys. It is aliased here so the caller does not 282 // need to explicitly import [mapset]. 283 type KeySet = mapset.Set[string] 284 285 // A HashCAS is a content-addressable wrapper that adds the CAS methods to a 286 // delegated [KV]. 287 type hashCAS struct{ KV } 288 289 // hash is the digest function used to compute content addresses for hashCAS. 290 var hash = blake2b.Sum256 291 292 // key computes the content key for data using the provided hash. 293 func (c hashCAS) key(data []byte) string { 294 h := hash(data) 295 return string(h[:]) 296 } 297 298 // CASPut writes data to a content-addressed blob in the underlying store, and 299 // returns the assigned key. The target key is returned even in case of error. 300 func (c hashCAS) CASPut(ctx context.Context, data []byte) (string, error) { 301 key := c.key(data) 302 303 // Skip writing if the content address is already present. 304 if st, err := c.Has(ctx, key); err == nil && st.Has(key) { 305 return key, nil 306 } 307 308 // Write the block to storage. Because we are using a content address we 309 // do not request replacement, but we also don't consider it an error if 310 // the address already exists. 311 err := c.Put(ctx, PutOptions{ 312 Key: key, 313 Data: data, 314 Replace: false, 315 }) 316 if IsKeyExists(err) { 317 err = nil 318 } 319 return key, err 320 } 321 322 // CASKey constructs the content address for the specified data. 323 func (c hashCAS) CASKey(_ context.Context, data []byte) string { return c.key(data) } 324 325 // SyncKeys reports which of the given keys are not present in the key space. 326 // If all the keys are present, SyncKeys returns an empty [KeySet]. 327 func SyncKeys(ctx context.Context, ks KVCore, keys []string) (KeySet, error) { 328 if len(keys) == 0 { 329 return nil, nil 330 } 331 have, err := ks.Has(ctx, keys...) 332 if err != nil { 333 return nil, err 334 } 335 var missing KeySet 336 for _, key := range keys { 337 if !have.Has(key) { 338 missing.Add(key) 339 } 340 } 341 return missing, nil 342 }