github.com/creachadair/ffs@v0.17.3/file/data.go (about)

     1  // Copyright 2019 Michael J. Fromberger. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package file
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"io"
    21  
    22  	"github.com/creachadair/ffs/blob"
    23  	"github.com/creachadair/ffs/block"
    24  	"github.com/creachadair/ffs/file/wiretype"
    25  	"github.com/creachadair/mds/mbits"
    26  )
    27  
    28  // A data value represents an ordered sequence of bytes stored in a blob.Store.
    29  // Other than length, no metadata are preserved. File data are recorded as a
    30  // flat array of discontiguous extents.
    31  type fileData struct {
    32  	sc         *block.SplitConfig
    33  	totalBytes int64
    34  	extents    []*extent
    35  
    36  	// Cache of last successfully-read block. This helps avoid reloading the
    37  	// same block repeatedly during incremental reads.
    38  	lastKey  string
    39  	lastData []byte
    40  }
    41  
    42  func (d *fileData) getBlock(ctx context.Context, s blob.CAS, key string) ([]byte, error) {
    43  	if key == d.lastKey {
    44  		return d.lastData, nil
    45  	}
    46  	data, err := s.Get(ctx, key)
    47  	if err == nil {
    48  		d.lastKey = key
    49  		d.lastData = data
    50  	}
    51  	return data, err
    52  }
    53  
    54  // isSingleBlock reports whether d can be represented as a single-block node.
    55  func (d *fileData) isSingleBlock() bool {
    56  	return len(d.extents) == 1 && d.extents[0].base == 0 && // one extent starting at offset 0
    57  		len(d.extents[0].blocks) == 1 && // it contains exactly one block
    58  		d.extents[0].blocks[0].bytes == d.totalBytes // that block is the entire content
    59  }
    60  
    61  // toWireType converts d to wire encoding.
    62  func (d *fileData) toWireType() *wiretype.Index {
    63  	if d.totalBytes == 0 && len(d.extents) == 0 {
    64  		// No data in this file.
    65  		return nil
    66  	}
    67  
    68  	// Many small files contain just one block of data spanning the entire file.
    69  	// When that occurs, just store the key of that block. No normalization is
    70  	// required in this case and we save a few bytes.
    71  	if d.isSingleBlock() {
    72  		return &wiretype.Index{
    73  			TotalBytes: uint64(d.totalBytes),
    74  			Single:     []byte(d.extents[0].blocks[0].key),
    75  		}
    76  	}
    77  
    78  	// At this point we have multiple blocks and/or a weird shape (e.g., sparse
    79  	// extents), so we actually have to do some work to pack and normalize them.
    80  	w := &wiretype.Index{
    81  		TotalBytes: uint64(d.totalBytes),
    82  		Extents:    make([]*wiretype.Extent, len(d.extents)),
    83  	}
    84  	for i, ext := range d.extents {
    85  		x := &wiretype.Extent{
    86  			Base:   uint64(ext.base),
    87  			Bytes:  uint64(ext.bytes),
    88  			Blocks: make([]*wiretype.Block, len(ext.blocks)),
    89  		}
    90  		for j, blk := range ext.blocks {
    91  			x.Blocks[j] = &wiretype.Block{
    92  				Bytes: uint64(blk.bytes),
    93  				Key:   []byte(blk.key),
    94  			}
    95  		}
    96  		w.Extents[i] = x
    97  	}
    98  	w.Normalize()
    99  	return w
   100  }
   101  
   102  // fromWireType replaces the contents of d from the wire encoding pb.
   103  func (d *fileData) fromWireType(pb *wiretype.Index) error {
   104  	if pb == nil {
   105  		return nil
   106  	}
   107  
   108  	d.totalBytes = int64(pb.TotalBytes)
   109  	if len(pb.Single) != 0 {
   110  		if len(pb.Extents) != 0 {
   111  			return errors.New("invalid index: single-block and extents both set")
   112  		}
   113  		d.extents = []*extent{{
   114  			base:   0,
   115  			bytes:  d.totalBytes,
   116  			blocks: []cblock{{key: string(pb.Single), bytes: d.totalBytes}},
   117  		}}
   118  		return nil
   119  	}
   120  
   121  	pb.Normalize()
   122  	d.extents = make([]*extent, len(pb.Extents))
   123  	for i, ext := range pb.Extents {
   124  		d.extents[i] = &extent{
   125  			base:   int64(ext.Base),
   126  			bytes:  int64(ext.Bytes),
   127  			blocks: make([]cblock, len(ext.Blocks)),
   128  		}
   129  		for j, blk := range ext.Blocks {
   130  			d.extents[i].blocks[j] = cblock{
   131  				bytes: int64(blk.Bytes),
   132  				key:   string(blk.Key),
   133  			}
   134  		}
   135  	}
   136  	return nil
   137  }
   138  
   139  // size reports the size of the data in bytes.
   140  func (d *fileData) size() int64 { return d.totalBytes }
   141  
   142  // blocks calls f once for each block used by d, giving the key and the size of
   143  // the blob. If the same blob is repeated, f will be called multiple times for
   144  // the same key.
   145  func (d *fileData) blocks(f func(int64, string)) {
   146  	for _, ext := range d.extents {
   147  		for _, blk := range ext.blocks {
   148  			f(blk.bytes, blk.key)
   149  		}
   150  	}
   151  }
   152  
   153  // truncate modifies the length of the file to end at offset, extending or
   154  // contracting it as necessary. Contraction may require splitting a block.
   155  func (d *fileData) truncate(ctx context.Context, s blob.CAS, offset int64) error {
   156  	if offset >= d.totalBytes {
   157  		d.totalBytes = offset
   158  		return nil
   159  	}
   160  	pre, span, _ := d.splitSpan(0, offset)
   161  	if len(span) != 0 {
   162  		n := len(span) - 1
   163  		last := span[n]
   164  		span = span[:n]
   165  
   166  		// If the offset transects a block, read that block and write back its
   167  		// prefix. If the offset is exactly at the start of the block, we can
   168  		// skip that step and discard the whole block.
   169  		if i, pos := last.findBlock(offset); i >= 0 && offset > pos {
   170  			keep := last.blocks[:i]
   171  			bits, err := s.Get(ctx, last.blocks[i].key)
   172  			if err != nil {
   173  				return err
   174  			}
   175  			blks, err := d.splitBlobs(ctx, s, bits[:int(offset-pos)])
   176  			if err != nil {
   177  				return err
   178  			}
   179  			span = append(span, splitExtent(&extent{
   180  				base:   last.base,
   181  				bytes:  offset - last.base,
   182  				blocks: append(keep, blks...),
   183  			})...)
   184  		}
   185  	}
   186  	d.extents = append(pre, span...)
   187  	d.totalBytes = offset
   188  	return nil
   189  }
   190  
   191  // writeAt writes the contents of data at the specified offset in d.  It
   192  // returns the number of bytes successfully written, and satisfies the
   193  // semantics of io.WriterAt.
   194  func (d *fileData) writeAt(ctx context.Context, s blob.CAS, data []byte, offset int64) (int, error) {
   195  	if len(data) == 0 {
   196  		return 0, nil
   197  	}
   198  	end := offset + int64(len(data))
   199  	pre, span, post := d.splitSpan(offset, end)
   200  
   201  	var left, right []cblock
   202  	var parts [][]byte
   203  	newBase := offset
   204  	newEnd := end
   205  
   206  	// If this write does not span any existing extents, create a new one
   207  	// containing just this write.
   208  	if len(span) == 0 {
   209  		parts = append(parts, data)
   210  	} else {
   211  		if span[0].base < newBase {
   212  			// The first extent starts before the write. Find the first block
   213  			// split by or contiguous to the write, preserve everything before
   214  			// that, and read in the contents to set up the split.
   215  			newBase = span[0].base
   216  
   217  			pos := span[0].base
   218  			for _, blk := range span[0].blocks {
   219  				next := pos + blk.bytes
   220  				if next < offset {
   221  					left = append(left, blk)
   222  					pos = next
   223  					continue
   224  				}
   225  
   226  				bits, err := s.Get(ctx, blk.key)
   227  				if err != nil {
   228  					return 0, err
   229  				}
   230  				parts = append(parts, bits[:int(offset-pos)])
   231  				break
   232  			}
   233  		}
   234  
   235  		// Insert the main body of the write.
   236  		parts = append(parts, data)
   237  
   238  		if last := span[len(span)-1]; last.base+last.bytes >= newEnd {
   239  			// The last extent ends after the write. Find the last block split by
   240  			// or contiguous to the write, preserve everything after that, and
   241  			// read in the contents to set up the split.
   242  			newEnd = last.base + last.bytes
   243  
   244  			pos := last.base
   245  			for i, blk := range last.blocks {
   246  				if pos > end {
   247  					// Preserve the rest of this extent
   248  					right = append(right, last.blocks[i:]...)
   249  					break
   250  				}
   251  				next := pos + blk.bytes
   252  				if next <= end {
   253  					pos = next
   254  					continue // skip overwritten block
   255  				}
   256  
   257  				bits, err := s.Get(ctx, blk.key)
   258  				if err != nil {
   259  					return 0, err
   260  				}
   261  
   262  				parts = append(parts, bits[int(end-pos):])
   263  				pos = next
   264  			}
   265  		}
   266  	}
   267  
   268  	// Now write out the combined data and assemble the new index.
   269  	body, err := d.splitBlobs(ctx, s, parts...)
   270  	if err != nil {
   271  		return 0, err
   272  	}
   273  
   274  	// N.B. It is possible that this write has created contiguous extents.
   275  	// Rather than fix it here, we rely on the normalization that happens during
   276  	// conversion to wire format, which includes this merge check.
   277  
   278  	d.extents = make([]*extent, 0, len(pre)+1+len(post))
   279  	//
   280  	// d.extents = [ ...pre... | ...merged ... | ...post... ]
   281  	//
   282  	d.extents = append(d.extents, pre...)
   283  	d.extents = append(d.extents, splitExtent(&extent{
   284  		base:   newBase,
   285  		bytes:  newEnd - newBase,
   286  		blocks: append(left, append(body, right...)...),
   287  	})...)
   288  	d.extents = append(d.extents, post...)
   289  	if end > d.totalBytes {
   290  		d.totalBytes = end
   291  	}
   292  
   293  	return len(data), nil
   294  }
   295  
   296  // readAt reads the content of d into data from the specified offset, returning
   297  // the number of bytes successfully read. It satisfies the semantics of the
   298  // io.ReaderAt interface.
   299  func (d *fileData) readAt(ctx context.Context, s blob.CAS, data []byte, offset int64) (int, error) {
   300  	if offset > d.totalBytes {
   301  		return 0, io.EOF
   302  	}
   303  	end := offset + int64(len(data))
   304  	if end > d.totalBytes {
   305  		end = d.totalBytes
   306  	}
   307  	_, span, _ := d.splitSpan(offset, end)
   308  
   309  	// If the entire requested range is unstored, zero as much as we can
   310  	// attribute given the total file size. Note that io.ReaderAt requires we
   311  	// report an error if the total is less than requested.
   312  	if len(span) == 0 {
   313  		nr := mbits.Zero(data[:int(end-offset)])
   314  		if nr < len(data) {
   315  			return nr, io.EOF
   316  		}
   317  		return nr, nil
   318  	}
   319  
   320  	// At this point, at least some of the data overlap a stored range.  Walk
   321  	// through the extents copying data into the output till we have enough or
   322  	// we run out of spaces.
   323  	nr := 0
   324  walkSpan:
   325  	for _, ext := range span {
   326  		// This extent starts after the current offset, zero-fill up to the
   327  		// beginning of the extent, or we run out ouf space.
   328  		if offset < ext.base {
   329  			cp := min(int(ext.base-offset), len(data)-nr)
   330  			nr += mbits.Zero(data[nr : nr+cp])
   331  			if nr == len(data) {
   332  				break walkSpan
   333  			}
   334  			offset += int64(cp)
   335  		}
   336  
   337  		// The output is not full, and offset at or past the start of this extent.
   338  		// Find the first block containing offset and walk forward.
   339  		i, base := ext.findBlock(offset)
   340  		if i < 0 {
   341  			continue
   342  		}
   343  		for _, blk := range ext.blocks[i:] {
   344  			if base > end {
   345  				break walkSpan
   346  			}
   347  
   348  			// Fetch the block contents and copy whatever we can.
   349  			bits, err := d.getBlock(ctx, s, blk.key)
   350  			if err != nil {
   351  				return 0, err
   352  			}
   353  
   354  			pos := int(offset - base)
   355  			cp := min(len(bits)-pos, len(data)-nr)
   356  			nr += copy(data[nr:], bits[pos:pos+cp])
   357  			if nr == len(data) {
   358  				break walkSpan
   359  			}
   360  			offset += int64(cp)
   361  			base += blk.bytes
   362  		}
   363  
   364  		// Reaching here, data is not yet full and we have not yet gone past the
   365  		// end of the requested range. Go back for another extent, if there is one.
   366  	}
   367  
   368  	// At this point we have all the stored data we can take.  If there is still
   369  	// space in the output, the remaining portions of the range are unstored.
   370  	if nr < len(data) && end > offset {
   371  		cp := int(end - offset)
   372  		if max := len(data) - nr; cp > max {
   373  			cp = max
   374  		}
   375  		nr += mbits.Zero(data[nr : nr+cp])
   376  	}
   377  
   378  	if nr < len(data) {
   379  		return nr, io.EOF
   380  	}
   381  	return nr, nil
   382  }
   383  
   384  // splitBlobs re-blocks the concatenation of the specified blobs and returns
   385  // the resulting blocks. Zero-valued blocks are not stored, the caller can
   386  // detect this by looking for a key of "".
   387  func (d *fileData) splitBlobs(ctx context.Context, s blob.CAS, blobs ...[]byte) ([]cblock, error) {
   388  	data := newBlockReader(blobs)
   389  
   390  	var blks []cblock
   391  	if err := block.NewSplitter(data, d.sc).Split(func(blk []byte) error {
   392  		// We do not store blocks of zeroes. They count against the total file
   393  		// size, but we do not explicitly record them.
   394  		zhead, ztail, n := zeroCheck(blk)
   395  		if zhead == n {
   396  			// This block is all zeroes.
   397  			blks = append(blks, cblock{bytes: int64(len(blk))})
   398  			return nil
   399  		}
   400  
   401  		if isWorthTrimming(zhead, n) {
   402  			// There is a tranch of zeroes at the head. Inject a "fake" zero block
   403  			// for the prefix, and remove it from the block to be stored.
   404  			blks = append(blks, cblock{bytes: int64(zhead)})
   405  			blk = blk[zhead:]
   406  		}
   407  		wantTail := isWorthTrimming(ztail, n)
   408  		if wantTail {
   409  			// There is a block of zeroes at the tail. Remove the suffix from the
   410  			// block to be stored, and store a fake block for the suffix after it.
   411  			blk = blk[:len(blk)-ztail]
   412  		}
   413  
   414  		key, err := s.CASPut(ctx, blk)
   415  		if err != nil {
   416  			return err
   417  		}
   418  		blks = append(blks, cblock{bytes: int64(len(blk)), key: key})
   419  
   420  		if wantTail {
   421  			// Inject a "fake" zero block for the suffix.
   422  			blks = append(blks, cblock{bytes: int64(ztail)})
   423  		}
   424  		return nil
   425  	}); err != nil {
   426  		return nil, err
   427  	}
   428  	return blks, nil
   429  }
   430  
   431  // splitSpan returns three subslices of the extents of d, those which end
   432  // entirely before offset lo, those fully containing the range from lo to hi,
   433  // and those which begin entirely at or after offset hi.
   434  //
   435  // If span is empty, the range fully spans unstored data. Otherwise, the first
   436  // and last elements of span are "split" by the range.
   437  func (d *fileData) splitSpan(lo, hi int64) (pre, span, post []*extent) {
   438  	for i, ext := range d.extents {
   439  		if lo > ext.base+ext.bytes {
   440  			pre = append(pre, ext)
   441  		} else if hi < ext.base {
   442  			post = append(post, d.extents[i:]...)
   443  			break // nothing more to do; everything else is bigger
   444  		} else {
   445  			span = append(span, ext)
   446  		}
   447  	}
   448  
   449  	return
   450  }
   451  
   452  // newfileData constructs a new fileData value containing exactly the data from
   453  // s.  For each data block, newFileData calls put to store the block and return
   454  // its key. An error from put stops construction and is reported to the caller.
   455  func newFileData(s *block.Splitter, put func([]byte) (string, error)) (fileData, error) {
   456  	fd := fileData{sc: s.Config()}
   457  
   458  	ext := new(extent)
   459  	push := func() {
   460  		if len(ext.blocks) != 0 {
   461  			fd.extents = append(fd.extents, ext)
   462  		}
   463  		ext = &extent{base: fd.totalBytes}
   464  	}
   465  
   466  	err := s.Split(func(data []byte) error {
   467  		dlen := int64(len(data))
   468  
   469  		zhead, ztail, n := zeroCheck(data)
   470  		// A block of zeroes ends the current extent. We count the block against
   471  		// the total file size, but do not explicitly store it.
   472  		if zhead == n {
   473  			// N.B. We have to update the total length first, so that push will
   474  			// see the correct new value for the next extent.
   475  			fd.totalBytes += dlen
   476  			push()
   477  			return nil
   478  		}
   479  
   480  		// If a block has a lot of zeroes at its head or tail, chop them.  We
   481  		// define "a lot" as a fraction of the block size.
   482  		if zhead*zhead >= n {
   483  			fd.totalBytes += int64(zhead)
   484  			push()
   485  			data = data[zhead:]
   486  			dlen = int64(len(data))
   487  		}
   488  
   489  		// Update the total length regardless whether we have trailing zeroes to
   490  		// remove from the block. Do this BEFORE adjusting the block.
   491  		fd.totalBytes += dlen
   492  		if ztail*ztail >= n {
   493  			data = data[:len(data)-ztail]
   494  			dlen = int64(len(data))
   495  			defer push() // start a new extent after this block
   496  		}
   497  		ext.bytes += dlen
   498  
   499  		key, err := put(data)
   500  		if err != nil {
   501  			return err
   502  		}
   503  		ext.blocks = append(ext.blocks, cblock{
   504  			bytes: dlen,
   505  			key:   key,
   506  		})
   507  
   508  		return nil
   509  	})
   510  	if err != nil {
   511  		return fileData{}, err
   512  	}
   513  	push() // flush any trailing extent
   514  
   515  	return fd, nil
   516  }
   517  
   518  // An extent represents a single contiguous stored subrange of a file. The
   519  // blocks record the offsets and block storage keys for the extent.
   520  type extent struct {
   521  	base   int64    // offset of the first byte within the file
   522  	bytes  int64    // number of bytes in the extent
   523  	blocks []cblock // continguous extent blocks
   524  	starts []int64  // block starting offsets, for search
   525  }
   526  
   527  // findBlock returns the index and base offset of the first block in e that
   528  // contains offset. It returns -1, -1 if no block in e contains offset.
   529  func (e *extent) findBlock(offset int64) (int, int64) {
   530  	// After a change, do a linear scan to (re)initialize the offsets cache.
   531  	// Lookups will then fall through to binary search below.
   532  	if len(e.starts) != len(e.blocks) {
   533  		e.starts = make([]int64, len(e.blocks))
   534  		pos := e.base
   535  
   536  		for i, blk := range e.blocks {
   537  			e.starts[i] = pos
   538  			pos += blk.bytes
   539  		}
   540  	}
   541  
   542  	// Subsequent searches binary search.
   543  	lo, hi := 0, len(e.starts)
   544  	for lo < hi {
   545  		mid := (lo + hi) / 2
   546  		base := e.starts[mid]
   547  		if offset < base {
   548  			hi = mid
   549  		} else if offset >= base+e.blocks[mid].bytes {
   550  			lo = mid + 1
   551  		} else {
   552  			return mid, base
   553  		}
   554  	}
   555  	return -1, -1
   556  }
   557  
   558  // A block represents a single content-addressable block of file data.
   559  type cblock struct {
   560  	bytes int64  // number of bytes in the block
   561  	key   string // storage key for this block
   562  }
   563  
   564  // isWorthTrimming reports whether a prefix or suffix of nz zeroes is worth
   565  // removing from a block of length n.
   566  //
   567  // Since trimming a prefix or suffix induces an extent split, we should not
   568  // bother doing this unless the overhead for another extent is at least as much
   569  // as the data we save by trimming. Ignoring the blocks (which take the same
   570  // amount of space regardless how many extents they are split over), the
   571  // overhead of an extent is the type tag and three varints (message length,
   572  // base, and byte count).  Assuming a reasonable "expected worst case" with
   573  // 4-byte varints (28 bits) that's 13 bytes.
   574  //
   575  // However, the smaller the block, the smaller the cost of an extent, with a
   576  // minimum baseline of 1 byte for the byte count. Moreover, splitting a long
   577  // extent shortens the byte count, so a reasonable heuristic average case is a
   578  // 2-3 byte base and 1-2 byte count, or 4-6 bytes. To account for this, use the
   579  // square root of the block size. That's cheaper than a log, and accuracy is
   580  // not important on short sizes.
   581  func isWorthTrimming(nz, n int) bool { return nz >= 13 || nz*nz >= n }