github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/fileset/storage.go (about)

     1  package fileset
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math"
     7  	"path"
     8  	"strings"
     9  	"time"
    10  
    11  	units "github.com/docker/go-units"
    12  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
    13  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/chunk"
    14  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/fileset/index"
    15  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/renew"
    16  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/track"
    17  	"golang.org/x/sync/semaphore"
    18  )
    19  
    20  const (
    21  	// DefaultMemoryThreshold is the default for the memory threshold that must
    22  	// be met before a file set part is serialized (excluding close).
    23  	DefaultMemoryThreshold = 1024 * units.MB
    24  	// DefaultShardThreshold is the default for the size threshold that must
    25  	// be met before a shard is created by the shard function.
    26  	DefaultShardThreshold = 1024 * units.MB
    27  	// DefaultLevelZeroSize is the default size for level zero in the compacted
    28  	// representation of a file set.
    29  	DefaultLevelZeroSize = 1 * units.MB
    30  	// DefaultLevelSizeBase is the default base of the exponential growth function
    31  	// for level sizes in the compacted representation of a file set.
    32  	DefaultLevelSizeBase = 10
    33  	// Diff is the suffix of a path that points to the diff of the prefix.
    34  	Diff = "diff"
    35  	// Compacted is the suffix of a path that points to the compaction of the prefix.
    36  	Compacted = "compacted"
    37  	// TrackerPrefix is used for creating tracker objects for filesets
    38  	TrackerPrefix = "fileset/"
    39  )
    40  
    41  var (
    42  	// ErrNoFileSetFound is returned by the methods on Storage when a fileset does not exist
    43  	ErrNoFileSetFound = errors.Errorf("no fileset found")
    44  )
    45  
    46  // Storage is the abstraction that manages fileset storage.
    47  type Storage struct {
    48  	tracker                      track.Tracker
    49  	store                        Store
    50  	chunks                       *chunk.Storage
    51  	memThreshold, shardThreshold int64
    52  	levelZeroSize                int64
    53  	levelSizeBase                int
    54  	filesetSem                   *semaphore.Weighted
    55  }
    56  
    57  // NewStorage creates a new Storage.
    58  func NewStorage(store Store, tr track.Tracker, chunks *chunk.Storage, opts ...StorageOption) *Storage {
    59  	s := &Storage{
    60  		store:          store,
    61  		tracker:        tr,
    62  		chunks:         chunks,
    63  		memThreshold:   DefaultMemoryThreshold,
    64  		shardThreshold: DefaultShardThreshold,
    65  		levelZeroSize:  DefaultLevelZeroSize,
    66  		levelSizeBase:  DefaultLevelSizeBase,
    67  		filesetSem:     semaphore.NewWeighted(math.MaxInt64),
    68  	}
    69  	for _, opt := range opts {
    70  		opt(s)
    71  	}
    72  	return s
    73  }
    74  
    75  // Store returns the underlying store.
    76  // TODO Store is just used to poke through the information about file set sizes.
    77  // I think there might be a cleaner way to handle this through the file set interface, and changing
    78  // the metadata we expose for a file set as a set of metadata entries.
    79  func (s *Storage) Store() Store {
    80  	return s.store
    81  }
    82  
    83  // ChunkStorage returns the underlying chunk storage instance for this storage instance.
    84  func (s *Storage) ChunkStorage() *chunk.Storage {
    85  	return s.chunks
    86  }
    87  
    88  // NewUnorderedWriter creates a new unordered file set writer.
    89  func (s *Storage) NewUnorderedWriter(ctx context.Context, fileSet, defaultTag string, opts ...UnorderedWriterOption) (*UnorderedWriter, error) {
    90  	return newUnorderedWriter(ctx, s, fileSet, s.memThreshold, defaultTag, opts...)
    91  }
    92  
    93  // NewWriter creates a new file set writer.
    94  func (s *Storage) NewWriter(ctx context.Context, fileSet string, opts ...WriterOption) *Writer {
    95  	return s.newWriter(ctx, fileSet, opts...)
    96  }
    97  
    98  func (s *Storage) newWriter(ctx context.Context, fileSet string, opts ...WriterOption) *Writer {
    99  	return newWriter(ctx, s.store, s.tracker, s.chunks, fileSet, opts...)
   100  }
   101  
   102  // TODO: Expose some notion of read ahead (read a certain number of chunks in parallel).
   103  // this will be necessary to speed up reading large files.
   104  func (s *Storage) newReader(fileSet string, opts ...index.Option) *Reader {
   105  	return newReader(s.store, s.chunks, fileSet, opts...)
   106  }
   107  
   108  // Open opens a file set for reading.
   109  // TODO: It might make sense to have some of the file set transforms as functional options here.
   110  func (s *Storage) Open(ctx context.Context, fileSets []string, opts ...index.Option) (FileSet, error) {
   111  	var fss []FileSet
   112  	for _, fileSet := range fileSets {
   113  		if err := s.store.Walk(ctx, fileSet, func(p string) error {
   114  			fss = append(fss, s.newReader(p, opts...))
   115  			return nil
   116  		}); err != nil {
   117  			return nil, err
   118  		}
   119  	}
   120  	if len(fss) == 0 {
   121  		return &emptyReader{}, nil
   122  	}
   123  	if len(fss) == 1 {
   124  		return fss[0], nil
   125  	}
   126  	return newMergeReader(s.chunks, fss), nil
   127  }
   128  
   129  // Shard shards the file set into path ranges.
   130  // TODO This should be extended to be more configurable (different criteria
   131  // for creating shards).
   132  func (s *Storage) Shard(ctx context.Context, fs FileSet, cb ShardCallback) error {
   133  	return shard(ctx, fs, s.shardThreshold, cb)
   134  }
   135  
   136  // ShardCallback is a callback that returns a path range for each shard.
   137  type ShardCallback func(*index.PathRange) error
   138  
   139  // shard creates shards (path ranges) from the file set streams being merged.
   140  // A shard is created when the size of the content for a path range is greater than
   141  // the passed in shard threshold.
   142  // For each shard, the callback is called with the path range for the shard.
   143  func shard(ctx context.Context, fs FileSet, shardThreshold int64, cb ShardCallback) error {
   144  	var size int64
   145  	pathRange := &index.PathRange{}
   146  	if err := fs.Iterate(ctx, func(f File) error {
   147  		// A shard is created when we have encountered more than shardThreshold content bytes.
   148  		if size >= shardThreshold {
   149  			pathRange.Upper = f.Index().Path
   150  			if err := cb(pathRange); err != nil {
   151  				return err
   152  			}
   153  			size = 0
   154  			pathRange = &index.PathRange{
   155  				Lower: f.Index().Path,
   156  			}
   157  		}
   158  		size += index.SizeBytes(f.Index())
   159  		return nil
   160  	}); err != nil {
   161  		return err
   162  	}
   163  	return cb(pathRange)
   164  }
   165  
   166  // Copy copies the fileset at srcPrefix to dstPrefix. It does *not* perform compaction
   167  // ttl sets the time to live on the keys under dstPrefix if ttl == 0, it is ignored
   168  func (s *Storage) Copy(ctx context.Context, srcPrefix, dstPrefix string, ttl time.Duration) error {
   169  	// TODO: perform this atomically with postgres
   170  	return s.store.Walk(ctx, srcPrefix, func(srcPath string) error {
   171  		dstPath := dstPrefix + srcPath[len(srcPrefix):]
   172  		return copyPath(ctx, s.store, s.store, srcPath, dstPath, s.tracker, ttl)
   173  	})
   174  }
   175  
   176  // CompactStats contains information about what was compacted.
   177  type CompactStats struct {
   178  	OutputSize int64
   179  }
   180  
   181  // Compact compacts a set of filesets into an output fileset.
   182  func (s *Storage) Compact(ctx context.Context, outputFileSet string, inputFileSets []string, ttl time.Duration, opts ...index.Option) (*CompactStats, error) {
   183  	var size int64
   184  	w := s.newWriter(ctx, outputFileSet, WithTTL(ttl), WithIndexCallback(func(idx *index.Index) error {
   185  		size += index.SizeBytes(idx)
   186  		return nil
   187  	}))
   188  	fs, err := s.Open(ctx, inputFileSets)
   189  	if err != nil {
   190  		return nil, err
   191  	}
   192  	if err := CopyFiles(ctx, w, fs, true); err != nil {
   193  		return nil, err
   194  	}
   195  	if err := w.Close(); err != nil {
   196  		return nil, err
   197  	}
   198  	return &CompactStats{OutputSize: size}, nil
   199  }
   200  
   201  // CompactSpec specifies the input and output for a compaction operation.
   202  type CompactSpec struct {
   203  	Output string
   204  	Input  []string
   205  }
   206  
   207  // CompactSpec returns a compaction specification that determines the input filesets (the diff file set and potentially
   208  // compacted filesets) and output fileset.
   209  func (s *Storage) CompactSpec(ctx context.Context, fileSet string, compactedFileSet ...string) (*CompactSpec, error) {
   210  	if len(compactedFileSet) > 1 {
   211  		return nil, errors.Errorf("multiple compacted FileSets")
   212  	}
   213  	spec, err := s.compactSpec(ctx, fileSet, compactedFileSet...)
   214  	if err != nil {
   215  		return nil, err
   216  	}
   217  	return spec, nil
   218  }
   219  
   220  func (s *Storage) compactSpec(ctx context.Context, fileSet string, compactedFileSet ...string) (ret *CompactSpec, retErr error) {
   221  	md, err := s.store.Get(ctx, path.Join(fileSet, Diff))
   222  	if err != nil {
   223  		return nil, err
   224  	}
   225  	size := md.SizeBytes
   226  	spec := &CompactSpec{
   227  		Input: []string{path.Join(fileSet, Diff)},
   228  	}
   229  	var level int
   230  	// Handle first commit being compacted.
   231  	if len(compactedFileSet) == 0 {
   232  		for size > s.levelSize(level) {
   233  			level++
   234  		}
   235  		spec.Output = path.Join(fileSet, Compacted, levelName(level))
   236  		return spec, nil
   237  	}
   238  	// While we can't fit it all in the current level
   239  	for {
   240  		levelPath := path.Join(compactedFileSet[0], Compacted, levelName(level))
   241  		md, err := s.store.Get(ctx, levelPath)
   242  		if err != nil {
   243  			if err != ErrPathNotExists {
   244  				return nil, err
   245  			}
   246  		} else {
   247  			spec.Input = append(spec.Input, levelPath)
   248  			size += md.SizeBytes
   249  		}
   250  		if size <= s.levelSize(level) {
   251  			break
   252  		}
   253  		level++
   254  	}
   255  	// Now we know the output level
   256  	spec.Output = path.Join(fileSet, Compacted, levelName(level))
   257  	// Copy the other levels that may exist
   258  	if err := s.store.Walk(ctx, path.Join(compactedFileSet[0], Compacted), func(src string) error {
   259  		lName := path.Base(src)
   260  		l, err := parseLevel(lName)
   261  		if err != nil {
   262  			return err
   263  		}
   264  		if l > level {
   265  			dst := path.Join(fileSet, Compacted, levelName(l))
   266  			if err := copyPath(ctx, s.store, s.store, src, dst, s.tracker, 0); err != nil {
   267  				return err
   268  			}
   269  		}
   270  		return nil
   271  	}); err != nil {
   272  		return nil, err
   273  	}
   274  	// Inputs should be ordered with priority from least to greatest.
   275  	for i := 0; i < len(spec.Input)/2; i++ {
   276  		spec.Input[i], spec.Input[len(spec.Input)-1-i] = spec.Input[len(spec.Input)-1-i], spec.Input[i]
   277  	}
   278  	return spec, nil
   279  }
   280  
   281  // Delete deletes a fileset.
   282  func (s *Storage) Delete(ctx context.Context, fileSet string) error {
   283  	return s.store.Walk(ctx, fileSet, func(name string) error {
   284  		oid := filesetObjectID(name)
   285  		if err := s.store.Delete(ctx, name); err != nil {
   286  			return err
   287  		}
   288  		return s.tracker.MarkTombstone(ctx, oid)
   289  	})
   290  }
   291  
   292  // SetTTL sets the time-to-live for the prefix p.
   293  func (s *Storage) SetTTL(ctx context.Context, p string, ttl time.Duration) (time.Time, error) {
   294  	oid := filesetObjectID(p)
   295  	return s.tracker.SetTTLPrefix(ctx, oid, ttl)
   296  }
   297  
   298  // WithRenewer calls cb with a Renewer, and a context which will be canceled if the renewer is unable to renew a path.
   299  func (s *Storage) WithRenewer(ctx context.Context, ttl time.Duration, cb func(context.Context, *renew.StringSet) error) error {
   300  	rf := func(ctx context.Context, p string, ttl time.Duration) error {
   301  		_, err := s.SetTTL(ctx, p, ttl)
   302  		return err
   303  	}
   304  	return renew.WithStringSet(ctx, ttl, rf, cb)
   305  }
   306  
   307  // GC creates a track.GarbageCollector with a Deleter that can handle deleting filesets and chunks
   308  func (s *Storage) GC(ctx context.Context) error {
   309  	const period = 10 * time.Second
   310  	tmpDeleter := track.NewTmpDeleter()
   311  	chunkDeleter := s.chunks.NewDeleter()
   312  	filesetDeleter := &deleter{
   313  		store: s.store,
   314  	}
   315  	mux := track.DeleterMux(func(id string) track.Deleter {
   316  		switch {
   317  		case strings.HasPrefix(id, track.TmpTrackerPrefix):
   318  			return tmpDeleter
   319  		case strings.HasPrefix(id, chunk.TrackerPrefix):
   320  			return chunkDeleter
   321  		case strings.HasPrefix(id, TrackerPrefix):
   322  			return filesetDeleter
   323  		default:
   324  			return nil
   325  		}
   326  	})
   327  	gc := track.NewGarbageCollector(s.tracker, period, mux)
   328  	return gc.Run(ctx)
   329  }
   330  
   331  func (s *Storage) levelSize(i int) int64 {
   332  	return s.levelZeroSize * int64(math.Pow(float64(s.levelSizeBase), float64(i)))
   333  }
   334  
   335  const subFileSetFmt = "%020d"
   336  const levelFmt = "level_" + subFileSetFmt
   337  
   338  // SubFileSetStr returns the string representation of a subfileset.
   339  func SubFileSetStr(subFileSet int64) string {
   340  	return fmt.Sprintf(subFileSetFmt, subFileSet)
   341  }
   342  
   343  func levelName(i int) string {
   344  	return fmt.Sprintf(levelFmt, i)
   345  }
   346  
   347  func parseLevel(x string) (int, error) {
   348  	var y int
   349  	_, err := fmt.Sscanf(x, levelFmt, &y)
   350  	return y, err
   351  }
   352  
   353  func filesetObjectID(p string) string {
   354  	return "fileset/" + p
   355  }
   356  
   357  var _ track.Deleter = &deleter{}
   358  
   359  type deleter struct {
   360  	store Store
   361  }
   362  
   363  // TODO: This needs to be implemented, temporary filesets are still in Postgres.
   364  func (d *deleter) Delete(ctx context.Context, id string) error {
   365  	return nil
   366  }