github.com/ipld/go-ipld-prime@v0.21.0/storage/fsstore/fsstore.go (about)

     1  package fsstore
     2  
     3  import (
     4  	"context"
     5  	"crypto/rand"
     6  	"encoding/base32"
     7  	"encoding/hex"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"path/filepath"
    12  
    13  	"github.com/ipld/go-ipld-prime/storage/sharding"
    14  )
    15  
    16  // Store is implements storage.ReadableStorage and storage.WritableStorage,
    17  // as well as quite a few of the other extended storage feature interfaces,
    18  // backing it with simple filesystem operations.
    19  //
    20  // This implementation uses golang's usual `os` package for IO,
    21  // so it should be highly portable.
    22  //
    23  // Both the sharding and escaping functions are configurable,
    24  // but a typical recommended setup is to use base32 encoding,
    25  // and a sharding function that returns two shards of two characters each.
    26  // The escaping and sharding functions should be chosen with regard to each other --
    27  // the sharding function is applied to the escaped form.
    28  type Store struct {
    29  	basepath     string
    30  	escapingFunc func(string) string
    31  	shardingFunc func(key string, shards *[]string)
    32  }
    33  
    34  func (store *Store) InitDefaults(basepath string) error {
    35  	return store.Init(
    36  		basepath,
    37  		b32enc,             // The same function as go-ipfs uses: see https://github.com/ipfs/go-ipfs-ds-help/blob/48b9cc210923d23b39582b5fa6670ed0d08dc2af/key.go#L20-L22 .
    38  		sharding.Shard_r12, // Equivalent to what go-ipfs uses by default with flatfs: see https://github.com/ipfs/go-ipfs/blob/52a747763f6c4e85b33ca051cda9cc4b75c815f9/docs/config.md#datastorespec and grep for "shard/v1/next-to-last/2".
    39  	)
    40  }
    41  
    42  func (store *Store) Init(
    43  	basepath string,
    44  	escapingFunc func(string) string,
    45  	shardingFunc func(key string, shards *[]string),
    46  ) error {
    47  	// Simple args and state check.
    48  	if basepath == "" {
    49  		return fmt.Errorf("fsstore: invalid setup args: need a path")
    50  	}
    51  	if store.basepath != "" {
    52  		return fmt.Errorf("fsstore: cannot init: is already initialized")
    53  	}
    54  	store.basepath = basepath
    55  	store.escapingFunc = escapingFunc
    56  	store.shardingFunc = shardingFunc
    57  
    58  	// Make sure basepath is a dir, and make sure the staging and content dirs exist.
    59  	if err := CheckAndMakeBasepath(basepath); err != nil {
    60  		return err
    61  	}
    62  
    63  	// That's it for setup on this one.
    64  	return nil
    65  }
    66  
    67  var b32encoder = base32.StdEncoding.WithPadding(base32.NoPadding)
    68  
    69  func b32enc(in string) string {
    70  	return b32encoder.EncodeToString([]byte(in))
    71  }
    72  
    73  // pathForKey applies sharding funcs as well as adds the basepath prefix,
    74  // returning a string ready to use as a filesystem path.
    75  func (store *Store) pathForKey(key string) string {
    76  	shards := make([]string, 1, 4) // future work: would be nice if we could reuse this rather than fresh allocating.
    77  	shards[0] = store.basepath     // not part of the path shard, but will be a param to Join, so, practical to put here.
    78  	//shards[1] = storageDir       // not part of the path shard, but will be a param to Join, so, practical to put here.
    79  	store.shardingFunc(key, &shards)
    80  	return filepath.Join(shards...)
    81  }
    82  
    83  // Has implements go-ipld-prime/storage.Storage.Has.
    84  func (store *Store) Has(ctx context.Context, key string) (bool, error) {
    85  	_, err := os.Stat(store.pathForKey(key))
    86  	if err == nil {
    87  		return true, nil
    88  	}
    89  	if os.IsNotExist(err) {
    90  		return false, nil
    91  	}
    92  	return false, err
    93  }
    94  
    95  // Get implements go-ipld-prime/storage.ReadableStorage.Get.
    96  func (store *Store) Get(ctx context.Context, key string) ([]byte, error) {
    97  	f, err := store.GetStream(ctx, key)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  	defer f.(io.Closer).Close()
   102  	return io.ReadAll(f)
   103  }
   104  
   105  // Put implements go-ipld-prime/storage.WritableStorage.Put.
   106  func (store *Store) Put(ctx context.Context, key string, content []byte) error {
   107  	// We can't improve much on what we get by wrapping the stream interface;
   108  	//  we always end up using a streaming action on the very bottom because that's how file writing works
   109  	//   (especially since we care about controlling the write flow enough to be able to do the atomic move at the end).
   110  	wr, wrCommitter, err := store.PutStream(ctx)
   111  	if err != nil {
   112  		return err
   113  	}
   114  	// Write, all at once.
   115  	// Note we can ignore the size return, because the contract of io.Writer states "Write must return a non-nil error if it returns n < len(p)".
   116  	_, err = wr.Write(content)
   117  	if err != nil {
   118  		wrCommitter("")
   119  		return err
   120  	}
   121  	// Commit.
   122  	return wrCommitter(key)
   123  }
   124  
   125  // GetStream implements go-ipld-prime/storage.StreamingReadableStorage.GetStream.
   126  func (store *Store) GetStream(ctx context.Context, key string) (io.ReadCloser, error) {
   127  	if ctx.Err() != nil {
   128  		return nil, ctx.Err()
   129  	}
   130  
   131  	// Figure out where we expect it to be.
   132  	destpath := store.pathForKey(key)
   133  
   134  	// Open and return.
   135  	// TODO: we should normalize things like "not exists" errors before hurling them up the stack.
   136  	return os.OpenFile(destpath, os.O_RDONLY, 0)
   137  }
   138  
   139  // PutStream implements go-ipld-prime/storage.StreamingWritableStorage.PutStream.
   140  func (store *Store) PutStream(ctx context.Context) (io.Writer, func(string) error, error) {
   141  	for {
   142  		if ctx.Err() != nil {
   143  			return nil, nil, ctx.Err()
   144  		}
   145  		// Open a new file in the staging area, with a random name.
   146  		var bs [8]byte
   147  		rand.Read(bs[:])
   148  		stagepath := filepath.Join(store.basepath, stagingDir, hex.EncodeToString(bs[:]))
   149  		f, err := os.OpenFile(stagepath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0666)
   150  		if os.IsExist(err) {
   151  			continue
   152  		}
   153  		if err != nil {
   154  			return nil, nil, fmt.Errorf("fsstore.BeginWrite: could not create a staging file: %w", err)
   155  		}
   156  		// Okay, got a handle.  Return it... and its commit closure.
   157  		return f, func(key string) error {
   158  			// Close the staging file.
   159  			if err := f.Close(); err != nil {
   160  				return err
   161  			}
   162  			if key == "" {
   163  				return os.Remove(stagepath)
   164  			}
   165  			// n.b. there is a lack of fsync here.  I am going to choose to believe that a sane filesystem will not let me do a 'move' without flushing somewhere in between.
   166  			// Fun little note: there are some times in history where this belief is not backed -- but, mostly, the evolution of kernel and filesystem development seems to have considered that a mistake,
   167  			// and things do again typically take 'move' as a strong cue to flush, unless you've actively configured your system oddly.
   168  			// See https://en.wikipedia.org/wiki/Ext4#Delayed_allocation_and_potential_data_loss for some fun history regarding Ext4;
   169  			// but ultimately, note that the kernel decided to again make 'move' cause flush, and has done so since 2.6.30, which came out sometime in 2009.
   170  			// Accordingly, our lack of fsync here seems justified.
   171  			// However, if you *really* find a system in the wild where this is problematic,
   172  			// *and* you cannot make your application recover gracefully (which should be relatively easy, because... content addressing; you can't have inconsistency, at least!),
   173  			// *and* you cannot configure your filesystem to have the level of durability and sanity that you want, so you must fix it in application land...
   174  			// then... patches welcome.  :)
   175  			//
   176  			// History also seems to indicate that if we add fsyncs hereabouts, people will usually just turn around and seek to disable them for performance reasons;
   177  			// so by default, it seems best to just not do the dance of having a default that people hate.
   178  
   179  			// Figure out where we want it to go.
   180  			destpath := store.pathForKey(key)
   181  
   182  			// Get it there.
   183  			return move(stagepath, destpath)
   184  		}, nil
   185  	}
   186  }
   187  
   188  const stagingDir = ".temp" // same as flatfs uses.
   189  
   190  func CheckAndMakeBasepath(basepath string) error {
   191  	// Is this basepath a dir?
   192  	// (This is TOCTOU, obviously, but also it's nice to sanity check early and return error quickly because it's probably a setup error.)
   193  	if fi, err := os.Stat(basepath); err != nil {
   194  		return fmt.Errorf("fsstore: cannot init: basepath must be a directory: %w", err)
   195  	} else {
   196  		if !fi.IsDir() {
   197  			return fmt.Errorf("fsstore: cannot init: basepath must be a directory")
   198  		}
   199  	}
   200  
   201  	// Make sure the staging dir exists.
   202  	err := os.Mkdir(filepath.Join(basepath, stagingDir), 0777)
   203  	switch {
   204  	case err == nil:
   205  		// excellent.
   206  	case os.IsExist(err):
   207  		// sanity check it's a directory already.
   208  		fi, err := os.Stat(filepath.Join(basepath, stagingDir))
   209  		if err != nil {
   210  			return fmt.Errorf("fsstore: failed to make staging dir: %w", err)
   211  		}
   212  		if !fi.IsDir() {
   213  			return fmt.Errorf("fsstore: staging dir path contains not a dir")
   214  		}
   215  	default:
   216  		return fmt.Errorf("fsstore: failed to make staging dir: %w", err)
   217  	}
   218  
   219  	return nil
   220  }
   221  
   222  // move file at stagepath to destpath.
   223  // First, attempt to directly rename to the destination;
   224  // if we get a ENOENT error code, that means the parent didn't exist, and we make that and then retry.
   225  // If making the parent failed: recurse, and use similar logic.
   226  //
   227  // This optimistic approach should have fewer syscall RTTs when most of the parents exist
   228  // than would be taken if we checked that each parent segment exists.
   229  //
   230  // (An alternative approach would be to blindly mkdir the parent segments every time,
   231  // rather than do this backwards stepping.  Have not benchmarked these against each other.)
   232  func move(stagepath, destpath string) error {
   233  	err := os.Rename(stagepath, destpath)
   234  	if os.IsNotExist(err) {
   235  		// This probably means parent of destpath doesn't exist yet, so we'll make it.
   236  		//  It's technically a race condition to assume that this is because destpath has no parents vs that stagepath hasn't been removed out from underneath us, but, alas; kernel ABIs.
   237  		//   If we did this will all fds, it could be somewhat better.
   238  		//    (This is certainly possible, at least in linux; but we'd have to import the syscall package and do it ourselves, which is not a rubicon we're willing to cross in this package.)
   239  		//   In practice, this is probably not going to kerfuffle things.
   240  		if err := haveDir(filepath.Dir(destpath)); err != nil {
   241  			return err
   242  		}
   243  		// Now try again.
   244  		//  (And don't return quite yet; there's one more check to do, because someone might've raced us.)
   245  		err = os.Rename(stagepath, destpath)
   246  	}
   247  	if os.IsExist(err) {
   248  		// Oh!  Some content is already there?
   249  		//  We're a write-once (presumed-to-be-)content-addressable blob store -- that means *we keep what already exists*.
   250  		//  FIXME: no, I wish this is how the Rename function worked, but it is not, actually.
   251  		return os.Remove(stagepath)
   252  	}
   253  	return err
   254  }
   255  
   256  // haveDir tries to make sure a directory exists at pth.
   257  // If this sounds a lot like os.MkdirAll: yes,
   258  // except this function is going to assume if it exists, it's a dir,
   259  // and that saves us some stat syscalls.
   260  func haveDir(pth string) error {
   261  	err := os.Mkdir(pth, 0777)
   262  	if os.IsNotExist(err) {
   263  		if err := haveDir(filepath.Dir(pth)); err != nil {
   264  			return err
   265  		}
   266  		return os.Mkdir(pth, 0777)
   267  	}
   268  	return err
   269  }