github.com/ipld/go-ipld-prime@v0.21.0/storage/fsstore/fsstore.go (about) 1 package fsstore 2 3 import ( 4 "context" 5 "crypto/rand" 6 "encoding/base32" 7 "encoding/hex" 8 "fmt" 9 "io" 10 "os" 11 "path/filepath" 12 13 "github.com/ipld/go-ipld-prime/storage/sharding" 14 ) 15 16 // Store is implements storage.ReadableStorage and storage.WritableStorage, 17 // as well as quite a few of the other extended storage feature interfaces, 18 // backing it with simple filesystem operations. 19 // 20 // This implementation uses golang's usual `os` package for IO, 21 // so it should be highly portable. 22 // 23 // Both the sharding and escaping functions are configurable, 24 // but a typical recommended setup is to use base32 encoding, 25 // and a sharding function that returns two shards of two characters each. 26 // The escaping and sharding functions should be chosen with regard to each other -- 27 // the sharding function is applied to the escaped form. 28 type Store struct { 29 basepath string 30 escapingFunc func(string) string 31 shardingFunc func(key string, shards *[]string) 32 } 33 34 func (store *Store) InitDefaults(basepath string) error { 35 return store.Init( 36 basepath, 37 b32enc, // The same function as go-ipfs uses: see https://github.com/ipfs/go-ipfs-ds-help/blob/48b9cc210923d23b39582b5fa6670ed0d08dc2af/key.go#L20-L22 . 38 sharding.Shard_r12, // Equivalent to what go-ipfs uses by default with flatfs: see https://github.com/ipfs/go-ipfs/blob/52a747763f6c4e85b33ca051cda9cc4b75c815f9/docs/config.md#datastorespec and grep for "shard/v1/next-to-last/2". 39 ) 40 } 41 42 func (store *Store) Init( 43 basepath string, 44 escapingFunc func(string) string, 45 shardingFunc func(key string, shards *[]string), 46 ) error { 47 // Simple args and state check. 48 if basepath == "" { 49 return fmt.Errorf("fsstore: invalid setup args: need a path") 50 } 51 if store.basepath != "" { 52 return fmt.Errorf("fsstore: cannot init: is already initialized") 53 } 54 store.basepath = basepath 55 store.escapingFunc = escapingFunc 56 store.shardingFunc = shardingFunc 57 58 // Make sure basepath is a dir, and make sure the staging and content dirs exist. 59 if err := CheckAndMakeBasepath(basepath); err != nil { 60 return err 61 } 62 63 // That's it for setup on this one. 64 return nil 65 } 66 67 var b32encoder = base32.StdEncoding.WithPadding(base32.NoPadding) 68 69 func b32enc(in string) string { 70 return b32encoder.EncodeToString([]byte(in)) 71 } 72 73 // pathForKey applies sharding funcs as well as adds the basepath prefix, 74 // returning a string ready to use as a filesystem path. 75 func (store *Store) pathForKey(key string) string { 76 shards := make([]string, 1, 4) // future work: would be nice if we could reuse this rather than fresh allocating. 77 shards[0] = store.basepath // not part of the path shard, but will be a param to Join, so, practical to put here. 78 //shards[1] = storageDir // not part of the path shard, but will be a param to Join, so, practical to put here. 79 store.shardingFunc(key, &shards) 80 return filepath.Join(shards...) 81 } 82 83 // Has implements go-ipld-prime/storage.Storage.Has. 84 func (store *Store) Has(ctx context.Context, key string) (bool, error) { 85 _, err := os.Stat(store.pathForKey(key)) 86 if err == nil { 87 return true, nil 88 } 89 if os.IsNotExist(err) { 90 return false, nil 91 } 92 return false, err 93 } 94 95 // Get implements go-ipld-prime/storage.ReadableStorage.Get. 96 func (store *Store) Get(ctx context.Context, key string) ([]byte, error) { 97 f, err := store.GetStream(ctx, key) 98 if err != nil { 99 return nil, err 100 } 101 defer f.(io.Closer).Close() 102 return io.ReadAll(f) 103 } 104 105 // Put implements go-ipld-prime/storage.WritableStorage.Put. 106 func (store *Store) Put(ctx context.Context, key string, content []byte) error { 107 // We can't improve much on what we get by wrapping the stream interface; 108 // we always end up using a streaming action on the very bottom because that's how file writing works 109 // (especially since we care about controlling the write flow enough to be able to do the atomic move at the end). 110 wr, wrCommitter, err := store.PutStream(ctx) 111 if err != nil { 112 return err 113 } 114 // Write, all at once. 115 // Note we can ignore the size return, because the contract of io.Writer states "Write must return a non-nil error if it returns n < len(p)". 116 _, err = wr.Write(content) 117 if err != nil { 118 wrCommitter("") 119 return err 120 } 121 // Commit. 122 return wrCommitter(key) 123 } 124 125 // GetStream implements go-ipld-prime/storage.StreamingReadableStorage.GetStream. 126 func (store *Store) GetStream(ctx context.Context, key string) (io.ReadCloser, error) { 127 if ctx.Err() != nil { 128 return nil, ctx.Err() 129 } 130 131 // Figure out where we expect it to be. 132 destpath := store.pathForKey(key) 133 134 // Open and return. 135 // TODO: we should normalize things like "not exists" errors before hurling them up the stack. 136 return os.OpenFile(destpath, os.O_RDONLY, 0) 137 } 138 139 // PutStream implements go-ipld-prime/storage.StreamingWritableStorage.PutStream. 140 func (store *Store) PutStream(ctx context.Context) (io.Writer, func(string) error, error) { 141 for { 142 if ctx.Err() != nil { 143 return nil, nil, ctx.Err() 144 } 145 // Open a new file in the staging area, with a random name. 146 var bs [8]byte 147 rand.Read(bs[:]) 148 stagepath := filepath.Join(store.basepath, stagingDir, hex.EncodeToString(bs[:])) 149 f, err := os.OpenFile(stagepath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0666) 150 if os.IsExist(err) { 151 continue 152 } 153 if err != nil { 154 return nil, nil, fmt.Errorf("fsstore.BeginWrite: could not create a staging file: %w", err) 155 } 156 // Okay, got a handle. Return it... and its commit closure. 157 return f, func(key string) error { 158 // Close the staging file. 159 if err := f.Close(); err != nil { 160 return err 161 } 162 if key == "" { 163 return os.Remove(stagepath) 164 } 165 // n.b. there is a lack of fsync here. I am going to choose to believe that a sane filesystem will not let me do a 'move' without flushing somewhere in between. 166 // Fun little note: there are some times in history where this belief is not backed -- but, mostly, the evolution of kernel and filesystem development seems to have considered that a mistake, 167 // and things do again typically take 'move' as a strong cue to flush, unless you've actively configured your system oddly. 168 // See https://en.wikipedia.org/wiki/Ext4#Delayed_allocation_and_potential_data_loss for some fun history regarding Ext4; 169 // but ultimately, note that the kernel decided to again make 'move' cause flush, and has done so since 2.6.30, which came out sometime in 2009. 170 // Accordingly, our lack of fsync here seems justified. 171 // However, if you *really* find a system in the wild where this is problematic, 172 // *and* you cannot make your application recover gracefully (which should be relatively easy, because... content addressing; you can't have inconsistency, at least!), 173 // *and* you cannot configure your filesystem to have the level of durability and sanity that you want, so you must fix it in application land... 174 // then... patches welcome. :) 175 // 176 // History also seems to indicate that if we add fsyncs hereabouts, people will usually just turn around and seek to disable them for performance reasons; 177 // so by default, it seems best to just not do the dance of having a default that people hate. 178 179 // Figure out where we want it to go. 180 destpath := store.pathForKey(key) 181 182 // Get it there. 183 return move(stagepath, destpath) 184 }, nil 185 } 186 } 187 188 const stagingDir = ".temp" // same as flatfs uses. 189 190 func CheckAndMakeBasepath(basepath string) error { 191 // Is this basepath a dir? 192 // (This is TOCTOU, obviously, but also it's nice to sanity check early and return error quickly because it's probably a setup error.) 193 if fi, err := os.Stat(basepath); err != nil { 194 return fmt.Errorf("fsstore: cannot init: basepath must be a directory: %w", err) 195 } else { 196 if !fi.IsDir() { 197 return fmt.Errorf("fsstore: cannot init: basepath must be a directory") 198 } 199 } 200 201 // Make sure the staging dir exists. 202 err := os.Mkdir(filepath.Join(basepath, stagingDir), 0777) 203 switch { 204 case err == nil: 205 // excellent. 206 case os.IsExist(err): 207 // sanity check it's a directory already. 208 fi, err := os.Stat(filepath.Join(basepath, stagingDir)) 209 if err != nil { 210 return fmt.Errorf("fsstore: failed to make staging dir: %w", err) 211 } 212 if !fi.IsDir() { 213 return fmt.Errorf("fsstore: staging dir path contains not a dir") 214 } 215 default: 216 return fmt.Errorf("fsstore: failed to make staging dir: %w", err) 217 } 218 219 return nil 220 } 221 222 // move file at stagepath to destpath. 223 // First, attempt to directly rename to the destination; 224 // if we get a ENOENT error code, that means the parent didn't exist, and we make that and then retry. 225 // If making the parent failed: recurse, and use similar logic. 226 // 227 // This optimistic approach should have fewer syscall RTTs when most of the parents exist 228 // than would be taken if we checked that each parent segment exists. 229 // 230 // (An alternative approach would be to blindly mkdir the parent segments every time, 231 // rather than do this backwards stepping. Have not benchmarked these against each other.) 232 func move(stagepath, destpath string) error { 233 err := os.Rename(stagepath, destpath) 234 if os.IsNotExist(err) { 235 // This probably means parent of destpath doesn't exist yet, so we'll make it. 236 // It's technically a race condition to assume that this is because destpath has no parents vs that stagepath hasn't been removed out from underneath us, but, alas; kernel ABIs. 237 // If we did this will all fds, it could be somewhat better. 238 // (This is certainly possible, at least in linux; but we'd have to import the syscall package and do it ourselves, which is not a rubicon we're willing to cross in this package.) 239 // In practice, this is probably not going to kerfuffle things. 240 if err := haveDir(filepath.Dir(destpath)); err != nil { 241 return err 242 } 243 // Now try again. 244 // (And don't return quite yet; there's one more check to do, because someone might've raced us.) 245 err = os.Rename(stagepath, destpath) 246 } 247 if os.IsExist(err) { 248 // Oh! Some content is already there? 249 // We're a write-once (presumed-to-be-)content-addressable blob store -- that means *we keep what already exists*. 250 // FIXME: no, I wish this is how the Rename function worked, but it is not, actually. 251 return os.Remove(stagepath) 252 } 253 return err 254 } 255 256 // haveDir tries to make sure a directory exists at pth. 257 // If this sounds a lot like os.MkdirAll: yes, 258 // except this function is going to assume if it exists, it's a dir, 259 // and that saves us some stat syscalls. 260 func haveDir(pth string) error { 261 err := os.Mkdir(pth, 0777) 262 if os.IsNotExist(err) { 263 if err := haveDir(filepath.Dir(pth)); err != nil { 264 return err 265 } 266 return os.Mkdir(pth, 0777) 267 } 268 return err 269 }