github.com/creachadair/ffs@v0.17.3/file/file.go (about) 1 // Copyright 2019 Michael J. Fromberger. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package file implements a File API over a content-addressable blob.Store. 16 // 17 // A File as defined by this package differs from the POSIX file model in that 18 // any File may have both binary content and "children". Thus, any File is also 19 // a directory, which can contain other files in a Merkle tree structure. 20 // 21 // A File is addressed by a storage key, corresponding to the current state of 22 // its content, metadata, and children (recursively). File metadata are stored 23 // as wire-format protocol buffers, as defined in file/wiretype/wiretype.proto. 24 // 25 // Basic usage: 26 // 27 // ctx := context.Background() 28 // 29 // f := file.New(cas, nil) // create a new, empty file 30 // f.WriteAt(ctx, data, 0) // write some data to the file 31 // key, err := f.Flush(ctx) // commit the file to storage 32 // 33 // To open an existing file, 34 // 35 // f, err := file.Open(ctx, cas, key) 36 // 37 // The I/O methods of a File require a context argument. For compatibility with 38 // the standard interfaces in the io package, a File provides a wrapper for a 39 // request-scoped context: 40 // 41 // _, err := io.Copy(dst, f.Cursor(ctx)) 42 // 43 // A value of the file.Cursor type should not be used outside the dynamic 44 // extent of the request whose context it captures. 45 // 46 // # Metadata 47 // 48 // A File supports a subset of POSIX style data metadata, including mode, 49 // modification time, and owner/group identity. These metadata are not 50 // interpreted by the API, but will be persisted if they are set. 51 // 52 // By default, a File does not persist stat metadata. To enable stat 53 // persistence, you may either set the Stat field of file.NewOptions when the 54 // File is created, or use the Persist method of the Stat value to enable or 55 // disable persistence: 56 // 57 // s := f.Stat() 58 // s.ModTime = time.Now() 59 // s.Update().Persist(true) 60 // 61 // The file.Stat type defines the stat attributes that can be persisted. 62 // 63 // # Synchronization 64 // 65 // The exported methods of *File and the views of its data (Child, Data, Stat, 66 // XAttr) are safe for concurrent use by multiple goroutines. 67 package file 68 69 import ( 70 "context" 71 "errors" 72 "fmt" 73 "io" 74 "slices" 75 "sort" 76 "sync" 77 "time" 78 79 "github.com/creachadair/ffs/blob" 80 "github.com/creachadair/ffs/block" 81 "github.com/creachadair/ffs/file/wiretype" 82 ) 83 84 // New constructs a new, empty File with the given options and backed by s. The 85 // caller must call the new file's Flush method to ensure it is written to 86 // storage. If opts == nil, defaults are chosen. 87 func New(s blob.CAS, opts *NewOptions) *File { 88 if opts == nil { 89 opts = new(NewOptions) 90 } 91 f := &File{ 92 s: s, 93 name: opts.Name, 94 saveStat: opts.PersistStat, 95 data: fileData{sc: opts.Split}, 96 xattr: make(map[string]string), 97 } 98 // If the options contain stat metadata, copy them in. 99 if opts.Stat != nil { 100 f.setStatLocked(*opts.Stat) 101 } 102 return f 103 } 104 105 // NewOptions control the creation of new files. 106 type NewOptions struct { 107 // The name to attribute to the new file. The name of a File is not 108 // persisted in storage. 109 Name string 110 111 // Stat, if non-nil, is the initial stat metadata for the file. Note that 112 // stat metadata will not be persisted to storage when the file is flushed 113 // unless PersistStat is also true. 114 Stat *Stat 115 116 // PersistStat is whether stat metadata for the new file should be persisted 117 // to storage when the file is written out. 118 PersistStat bool 119 120 // The block splitter configuration to use. If omitted, the default values 121 // from the split package are used. Split configurations are not persisted 122 // in storage, but descendants created from a file (via the New method) will 123 // inherit the parent file config if they do not specify their own. 124 Split *block.SplitConfig 125 } 126 127 // Open opens an existing file given its storage key in s. 128 func Open(ctx context.Context, s blob.CAS, key string) (*File, error) { 129 var obj wiretype.Object 130 if err := wiretype.Load(ctx, s, key, &obj); err != nil { 131 return nil, fmt.Errorf("load %x: %w", key, err) 132 } 133 f := &File{s: s, key: key} 134 if err := f.fromWireType(&obj); err != nil { 135 return nil, fmt.Errorf("decode file %x: %w", key, err) 136 } 137 return f, nil 138 } 139 140 // A File represents a writable file stored in a content-addressable blobstore. 141 type File struct { 142 s blob.CAS 143 144 mu sync.RWMutex 145 name string // if this file is a child, its attributed name 146 key string // the storage key for the file record (wiretype.Node) 147 148 stat Stat // file metadata 149 saveStat bool // whether to persist file metadata 150 151 data fileData // binary file data 152 kids []child // ordered lexicographically by name 153 xattr map[string]string // extended attributes 154 } 155 156 // A child records the name and storage key of a child file. 157 type child struct { 158 Name string 159 Key string // the storage key of the child 160 File *File // the opened file for the child 161 162 // When a file is loaded from storage, the Key of each child is populated 163 // but its File is not created until explicitly requested. After the child 164 // is opened, the Key may go out of sync with the file due to modifications 165 // by the caller: When the enclosing file is flushed, any child with a File 166 // attached is also flushed and the Key is updated. 167 } 168 169 // findChildLocked reports whether f has a child with the specified name and 170 // its index in the slice if so, or otherwise -1. 171 func (f *File) findChildLocked(name string) (int, bool) { 172 if n := sort.Search(len(f.kids), func(i int) bool { 173 return f.kids[i].Name >= name 174 }); n < len(f.kids) && f.kids[n].Name == name { 175 return n, true 176 } 177 return -1, false 178 } 179 180 func (f *File) setStatLocked(s Stat) { 181 f.stat = s 182 if f.saveStat { 183 f.invalLocked() 184 } 185 } 186 187 func (f *File) invalLocked() { f.key = "" } 188 189 func (f *File) modifyLocked() { f.invalLocked(); f.stat.ModTime = time.Now() } 190 191 // New constructs a new empty node backed by the same store as f. 192 // If f persists stat metadata, then the new file does too, even if 193 // opts.PersistStat is false. The caller can override this default via the Stat 194 // view after the file is created. 195 func (f *File) New(opts *NewOptions) *File { 196 out := New(f.s, opts) 197 if f.saveStat { 198 out.saveStat = true 199 } 200 201 // Propagate the parent split settings to the child, if the child did not 202 // have any specifically defined. 203 if opts == nil || opts.Split == nil { 204 out.data.sc = f.data.sc 205 } 206 return out 207 } 208 209 // Stat returns the current stat metadata for f. Calling this method does not 210 // change stat persistence for f, use the Clear and Update methods of the Stat 211 // value to do that. 212 func (f *File) Stat() Stat { 213 f.mu.RLock() 214 defer f.mu.RUnlock() 215 cp := f.stat 216 cp.f = f 217 return cp 218 } 219 220 // FileInfo returns a [FileInfo] record for f. The resulting value is a 221 // snapshot at the moment of construction, and does not track changes to the 222 // file after the value was constructed. 223 func (f *File) FileInfo() FileInfo { 224 if f == nil { 225 return FileInfo{} 226 } 227 f.mu.Lock() 228 defer f.mu.Unlock() 229 return FileInfo{ 230 name: f.name, 231 size: f.data.totalBytes, 232 mode: f.stat.Mode, 233 modTime: f.stat.ModTime, 234 file: f, 235 } 236 } 237 238 // Data returns a view of the file content for f. 239 func (f *File) Data() Data { return Data{f: f} } 240 241 var ( 242 // ErrChildNotFound indicates that a requested child file does not exist. 243 ErrChildNotFound = errors.New("child file not found") 244 ) 245 246 // Open opens the specified child file of f, or returns ErrChildNotFound if no 247 // such child exists. 248 func (f *File) Open(ctx context.Context, name string) (*File, error) { 249 f.mu.Lock() 250 defer f.mu.Unlock() 251 i, ok := f.findChildLocked(name) 252 if !ok { 253 return nil, fmt.Errorf("open %q: %w", name, ErrChildNotFound) 254 } 255 if c := f.kids[i].File; c != nil { 256 return c, nil 257 } 258 c, err := Open(ctx, f.s, f.kids[i].Key) 259 if err == nil { 260 c.name = name // remember the name the file was opened with 261 f.kids[i].File = c 262 } 263 return c, err 264 } 265 266 // Load loads an existing file given its storage key in the store used by f. 267 // The specified file need not necessarily be a child of f. 268 func (f *File) Load(ctx context.Context, key string) (*File, error) { 269 return Open(ctx, f.s, key) 270 } 271 272 // Child returns a view of the children of f. 273 func (f *File) Child() Child { return Child{f: f} } 274 275 // ReadAt reads up to len(data) bytes into data from the given offset, and 276 // reports the number of bytes successfully read, as io.ReaderAt. 277 func (f *File) ReadAt(ctx context.Context, data []byte, offset int64) (int, error) { 278 f.mu.RLock() 279 defer f.mu.RUnlock() 280 return f.data.readAt(ctx, f.s, data, offset) 281 } 282 283 // WriteAt writes len(data) bytes from data at the given offset, and reports 284 // the number of bytes successfully written, as io.WriterAt. 285 func (f *File) WriteAt(ctx context.Context, data []byte, offset int64) (int, error) { 286 f.mu.Lock() 287 defer f.mu.Unlock() 288 defer f.modifyLocked() 289 return f.data.writeAt(ctx, f.s, data, offset) 290 } 291 292 // Flush flushes the current state of the file to storage if necessary, and 293 // returns the resulting storage key. This is the canonical way to obtain the 294 // storage key for a file. 295 func (f *File) Flush(ctx context.Context) (string, error) { 296 f.mu.Lock() 297 defer f.mu.Unlock() 298 return f.recFlushLocked(ctx, nil) 299 } 300 301 // Key returns the storage key of f if it is known, or "" if the file has not 302 // been flushed to storage in its current form. 303 func (f *File) Key() string { f.mu.RLock(); defer f.mu.RUnlock(); return f.key } 304 305 // recFlushLocked recursively flushes f and all its child nodes. The path gives 306 // the path of nodes from the root to the current flush target, and is used to 307 // verify that there are no cycles in the graph. 308 func (f *File) recFlushLocked(ctx context.Context, path []*File) (string, error) { 309 // Recursive flush is a long operation, check for timeout/cancellation. 310 if ctx.Err() != nil { 311 return "", ctx.Err() 312 } 313 needsUpdate := f.key == "" 314 315 // Flush any cached children. 316 for i, kid := range f.kids { 317 if kf := kid.File; kf != nil { 318 // Check for direct or indirect cycles. This check is quadratic in the 319 // height of the DAG over the whole scan in the worst case. In 320 // practice, this doesn't cause any real issues, since it's not common 321 // for file structures to be very deep. Compared to the cost of 322 // marshaling and writing back invalid entries to storage, the array 323 // scan is minor. 324 if slices.Contains(path, kf) { 325 return "", fmt.Errorf("flush: cycle in path at %p", kf) 326 } 327 cpath := append(path, f) 328 fkey, err := func() (string, error) { 329 kf.mu.Lock() 330 defer kf.mu.Unlock() 331 return kf.recFlushLocked(ctx, cpath) 332 }() 333 if err != nil { 334 return "", err 335 } 336 if fkey != kid.Key { 337 needsUpdate = true 338 } 339 f.kids[i].Key = fkey 340 } 341 } 342 343 if needsUpdate { 344 key, err := wiretype.Save(ctx, f.s, f.toWireTypeLocked()) 345 if err != nil { 346 return "", fmt.Errorf("flushing file %x: %w", key, err) 347 } 348 f.key = key 349 } 350 return f.key, nil 351 } 352 353 // Truncate modifies the length of f to end at offset, extending or contracting 354 // it as necessary. 355 func (f *File) Truncate(ctx context.Context, offset int64) error { 356 f.mu.Lock() 357 defer f.mu.Unlock() 358 defer f.modifyLocked() 359 return f.data.truncate(ctx, f.s, offset) 360 } 361 362 // SetData fully reads r replaces the binary contents of f with its data. 363 // On success, any existing data for f are discarded. In case of error, the 364 // contents of f are not changed. 365 func (f *File) SetData(ctx context.Context, r io.Reader) error { 366 s := block.NewSplitter(r, f.data.sc) 367 fd, err := newFileData(s, func(data []byte) (string, error) { 368 return f.s.CASPut(ctx, data) 369 }) 370 if err != nil { 371 return err 372 } 373 f.mu.Lock() 374 defer f.mu.Unlock() 375 f.invalLocked() 376 f.data = fd 377 return nil 378 } 379 380 // Name reports the attributed name of f, which may be "" if f is not a child 381 // file and was not assigned a name at creation. 382 func (f *File) Name() string { f.mu.RLock(); defer f.mu.RUnlock(); return f.name } 383 384 func (f *File) setName(name string) { f.mu.Lock(); defer f.mu.Unlock(); f.name = name } 385 386 // A ScanItem is the argument to the Scan callback. 387 type ScanItem struct { 388 *File // the current file being visited 389 390 Name string // the name of File within its parent ("" at the root) 391 } 392 393 // Scan recursively visits f and all its descendants in depth-first 394 // left-to-right order, calling visit for each file. If visit returns false, 395 // no descendants of f are visited. 396 // 397 // The visit function may modify the attributes or contents of the files it 398 // visits, but the caller is responsible for flushing the root of the scan 399 // afterward to persist changes to storage. 400 func (f *File) Scan(ctx context.Context, visit func(ScanItem) bool) error { 401 f.mu.Lock() 402 defer f.mu.Unlock() 403 return f.recScanLocked(ctx, "", func(s ScanItem) bool { 404 // Yield the lock while the caller visitor runs, then reacquire it. We 405 // do this so that the visitor can use methods that may themselves update 406 // the file, without deadlocking on the scan. 407 s.File.mu.Unlock() // N.B. unlock → lock 408 defer s.File.mu.Lock() 409 return visit(s) 410 }) 411 } 412 413 // recScanLocked recursively scans f and all its child nodes in depth-first 414 // left-to-right order, calling visit for each file. 415 func (f *File) recScanLocked(ctx context.Context, name string, visit func(ScanItem) bool) error { 416 if err := ctx.Err(); err != nil { 417 return err 418 } 419 if !visit(ScanItem{File: f, Name: name}) { 420 return nil // skip the descendants of f 421 } 422 for i, kid := range f.kids { 423 fp := kid.File 424 if fp == nil { 425 // If the child was not already open, we need to do so to scan it, but 426 // we won't persist it in the parent unless the visitor invalidated it. 427 var err error 428 fp, err = Open(ctx, f.s, kid.Key) 429 if err != nil { 430 return err 431 } 432 } 433 err := func() error { 434 fp.mu.Lock() 435 defer fp.mu.Unlock() 436 return fp.recScanLocked(ctx, kid.Name, visit) 437 }() 438 if err != nil { 439 return err 440 } 441 442 // If scanning invalidated fp, make sure the parent copy is updated. 443 // This ensures the parent will include these changes in a flush. 444 if fp.key == "" { 445 f.kids[i].File = fp 446 } 447 } 448 return nil 449 } 450 451 // Cursor binds f with a context so that it can be used to satisfy the standard 452 // interfaces defined by the io package. The resulting cursor may be used only 453 // during the lifetime of the request whose context it binds. 454 func (f *File) Cursor(ctx context.Context) *Cursor { return &Cursor{ctx: ctx, file: f} } 455 456 // XAttr returns a view of the extended attributes of f. 457 func (f *File) XAttr() XAttr { return XAttr{f: f} } 458 459 // Precondition: The caller holds f.mu exclusively, or has the only reference to f. 460 func (f *File) fromWireType(obj *wiretype.Object) error { 461 pb, ok := obj.Value.(*wiretype.Object_Node) 462 if !ok { 463 return errors.New("object does not contain a node") 464 } 465 466 pb.Node.Normalize() 467 f.data = fileData{} // reset 468 if err := f.data.fromWireType(pb.Node.Index); err != nil { 469 return fmt.Errorf("index: %w", err) 470 } 471 f.stat.fromWireType(pb.Node.Stat) 472 f.saveStat = pb.Node.Stat != nil 473 474 f.xattr = make(map[string]string) 475 for _, xa := range pb.Node.XAttrs { 476 f.xattr[xa.Name] = string(xa.Value) 477 } 478 479 f.kids = nil 480 for _, kid := range pb.Node.Children { 481 f.kids = append(f.kids, child{ 482 Name: kid.Name, 483 Key: string(kid.Key), 484 }) 485 } 486 return nil 487 } 488 489 func (f *File) toWireTypeLocked() *wiretype.Object { 490 n := &wiretype.Node{Index: f.data.toWireType()} 491 if f.saveStat { 492 n.Stat = f.stat.toWireType() 493 } 494 for name, value := range f.xattr { 495 n.XAttrs = append(n.XAttrs, &wiretype.XAttr{ 496 Name: name, 497 Value: []byte(value), 498 }) 499 } 500 for _, kid := range f.kids { 501 n.Children = append(n.Children, &wiretype.Child{ 502 Name: kid.Name, 503 Key: []byte(kid.Key), 504 }) 505 } 506 n.Normalize() 507 return &wiretype.Object{Value: &wiretype.Object_Node{Node: n}} 508 } 509 510 // Encode translates f as a protobuf message for storage. 511 func Encode(f *File) *wiretype.Object { 512 f.mu.RLock() 513 defer f.mu.RUnlock() 514 return f.toWireTypeLocked() 515 }