github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/exec/store.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package exec
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"encoding/binary"
    11  	"fmt"
    12  	"hash/fnv"
    13  	"io"
    14  	"io/ioutil"
    15  	"strconv"
    16  	"sync"
    17  
    18  	"github.com/grailbio/base/errors"
    19  	"github.com/grailbio/base/file"
    20  )
    21  
    22  // sliceInfo stores metadata for a stored slice.
    23  type sliceInfo struct {
    24  	// Size is the raw, encoded byte size of the stored slice.
    25  	// A value of -1 indicates the size is unknown.
    26  	Size int64
    27  	// Records contains the number of records in the stored slice.
    28  	// A value of -1 indicates the number of records is unknown.
    29  	Records int64
    30  }
    31  
    32  // A writeCommitter represents a committable write stream into a store.
    33  type writeCommitter interface {
    34  	io.Writer
    35  	// Commit commits the written data to storage. The caller should
    36  	// provide the number of records written as metadata.
    37  	Commit(ctx context.Context, records int64) error
    38  	// Discard discards the writer; it will not be committed.
    39  	Discard(ctx context.Context)
    40  }
    41  
    42  // Store is an abstraction that stores partitioned data as produced by a task.
    43  type Store interface {
    44  	// Create returns a writer that populates data for the given
    45  	// task name and partition. The data is not be available
    46  	// to Open until the returned closer has been closed.
    47  	//
    48  	// TODO(marius): should we allow writes to be discarded as well?
    49  	Create(ctx context.Context, task TaskName, partition int) (writeCommitter, error)
    50  
    51  	// Open returns a ReadCloser from which the stored contents of the named task
    52  	// and partition can be read. If the task and partition are not stored, an
    53  	// error with kind errors.NotExist is returned. The offset specifies the byte
    54  	// position from which to read.
    55  	Open(ctx context.Context, task TaskName, partition int, offset int64) (io.ReadCloser, error)
    56  
    57  	// Stat returns metadata for the stored slice.
    58  	Stat(ctx context.Context, task TaskName, partition int) (sliceInfo, error)
    59  
    60  	// Discard discards the data stored for task and partition. Subsequent calls
    61  	// to Open for the given (task, partition) will fail. ReadClosers that
    62  	// already exist may start returning errors, depending on the
    63  	// implementation. If no such (task, partition) is stored, returns a non-nil
    64  	// error.
    65  	Discard(ctx context.Context, task TaskName, partition int) error
    66  }
    67  
    68  // MemoryStore is a store implementation that maintains in-memory buffers
    69  // of task output.
    70  type memoryStore struct {
    71  	mu     sync.Mutex
    72  	tasks  map[TaskName][][]byte
    73  	counts map[TaskName][]int64
    74  }
    75  
    76  func newMemoryStore() *memoryStore {
    77  	return &memoryStore{
    78  		tasks:  make(map[TaskName][][]byte),
    79  		counts: make(map[TaskName][]int64),
    80  	}
    81  }
    82  
    83  func (m *memoryStore) get(task TaskName, partition int) ([]byte, int64) {
    84  	m.mu.Lock()
    85  	defer m.mu.Unlock()
    86  	if len(m.tasks[task]) <= partition {
    87  		return nil, 0
    88  	}
    89  	return m.tasks[task][partition], m.counts[task][partition]
    90  }
    91  
    92  func (m *memoryStore) put(task TaskName, partition int, p []byte, count int64) error {
    93  	m.mu.Lock()
    94  	defer m.mu.Unlock()
    95  	for len(m.tasks[task]) <= partition {
    96  		m.tasks[task] = append(m.tasks[task], nil)
    97  		m.counts[task] = append(m.counts[task], 0)
    98  	}
    99  	if m.tasks[task][partition] != nil {
   100  		return errors.E(errors.Exists, "partition already stored")
   101  	}
   102  	if p == nil {
   103  		p = []byte{}
   104  	}
   105  	m.tasks[task][partition] = p
   106  	m.counts[task][partition] = count
   107  	return nil
   108  }
   109  
   110  type memoryWriter struct {
   111  	bytes.Buffer
   112  	task      TaskName
   113  	partition int
   114  	store     *memoryStore
   115  }
   116  
   117  func (*memoryWriter) Discard(context.Context) {}
   118  
   119  func (m *memoryWriter) Commit(ctx context.Context, count int64) error {
   120  	return m.store.put(m.task, m.partition, m.Buffer.Bytes(), count)
   121  }
   122  
   123  func (m *memoryStore) Create(ctx context.Context, task TaskName, partition int) (writeCommitter, error) {
   124  	if b, _ := m.get(task, partition); b != nil {
   125  		return nil, errors.E(errors.Exists, fmt.Sprintf("create %s[%d]", task, partition))
   126  	}
   127  	return &memoryWriter{
   128  		task:      task,
   129  		partition: partition,
   130  		store:     m,
   131  	}, nil
   132  }
   133  
   134  func (m *memoryStore) Open(ctx context.Context, task TaskName, partition int, offset int64) (io.ReadCloser, error) {
   135  	p, _ := m.get(task, partition)
   136  	if p == nil {
   137  		return nil, errors.E(errors.NotExist, fmt.Sprintf("open %s[%d]", task, partition))
   138  	}
   139  	if int64(len(p)) < offset {
   140  		return nil, errors.E(errors.Invalid, fmt.Sprintf("open %s[%d]: seeked to %d, data size %d", task, partition, offset, len(p)))
   141  	}
   142  	return ioutil.NopCloser(bytes.NewReader(p[offset:])), nil
   143  }
   144  
   145  func (m *memoryStore) Stat(ctx context.Context, task TaskName, partition int) (sliceInfo, error) {
   146  	b, n := m.get(task, partition)
   147  	if b == nil {
   148  		return sliceInfo{}, errors.E(errors.NotExist, fmt.Sprintf("stat %s[%d]", task, partition))
   149  	}
   150  	return sliceInfo{
   151  		Size:    int64(len(b)),
   152  		Records: n,
   153  	}, nil
   154  }
   155  
   156  func (m *memoryStore) Discard(ctx context.Context, task TaskName, partition int) error {
   157  	m.mu.Lock()
   158  	defer m.mu.Unlock()
   159  	partitions, ok := m.tasks[task]
   160  	if !ok {
   161  		return errors.E(errors.NotExist, fmt.Sprintf("%s[%d]", task, partition))
   162  	}
   163  	if partition >= len(partitions) {
   164  		return errors.E(errors.NotExist, fmt.Sprintf("%s[%d]", task, partition))
   165  	}
   166  	partitions[partition] = nil
   167  	return ctx.Err()
   168  }
   169  
   170  // FileStore is a store implementation that uses grailfiles; thus
   171  // task output can be stored at any URL supported by grailfile (e.g.,
   172  // S3).
   173  type fileStore struct {
   174  	// Prefix is the grailfile prefix under which task data are stored.
   175  	// A task's output is stored at "{Prefix}/{ophash}/{op}/{shardspec}/p{partition}".
   176  	Prefix string
   177  }
   178  
   179  func (s *fileStore) path(task TaskName, partition int) string {
   180  	h := fnv.New32a()
   181  	_, _ = h.Write([]byte(task.String()))
   182  	h0 := int64(h.Sum(nil)[0])
   183  	path := file.Join(s.Prefix, strconv.FormatInt(h0, 16), task.Op)
   184  	if task.IsCombiner() {
   185  		path = file.Join(path, "combiner")
   186  	} else {
   187  		path = file.Join(path, fmt.Sprintf("%03d-of-%03d", task.Shard, task.NumShard))
   188  	}
   189  	return file.Join(path, fmt.Sprintf("p%03d", partition))
   190  }
   191  
   192  type fileWriter struct {
   193  	file.File
   194  	io.Writer
   195  }
   196  
   197  func (w *fileWriter) Commit(ctx context.Context, count int64) error {
   198  	var b [8]byte
   199  	binary.LittleEndian.PutUint64(b[:], uint64(count))
   200  	if _, err := w.Write(b[:]); err != nil {
   201  		return nil
   202  	}
   203  	return closeFile(ctx, w.File)
   204  }
   205  
   206  func (s *fileStore) Create(ctx context.Context, task TaskName, partition int) (writeCommitter, error) {
   207  	path := s.path(task, partition)
   208  	f, err := file.Create(ctx, path)
   209  	if err != nil {
   210  		return nil, err
   211  	}
   212  	return &fileWriter{File: f, Writer: f.Writer(ctx)}, nil
   213  }
   214  
   215  func (s *fileStore) Open(ctx context.Context, task TaskName, partition int, offset int64) (io.ReadCloser, error) {
   216  	f, err := file.Open(ctx, s.path(task, partition))
   217  	if err != nil {
   218  		return nil, err
   219  	}
   220  	info, err := f.Stat(ctx)
   221  	if err != nil {
   222  		return nil, err
   223  	}
   224  	r := f.Reader(ctx)
   225  	if n, err := r.Seek(offset, io.SeekStart); err != nil || n != offset {
   226  		if err == nil {
   227  			return nil, errors.E(errors.Invalid, fmt.Sprintf("Seeked to %d, got %d", offset, n))
   228  		}
   229  	}
   230  	return &fileIOCloser{
   231  		Reader: io.LimitReader(r, info.Size()-8-offset),
   232  		ctx:    ctx,
   233  		file:   f,
   234  	}, nil
   235  }
   236  
   237  func (s *fileStore) Stat(ctx context.Context, task TaskName, partition int) (sliceInfo, error) {
   238  	f, err := file.Open(ctx, s.path(task, partition))
   239  	if err != nil {
   240  		return sliceInfo{}, err
   241  	}
   242  	rs := f.Reader(ctx)
   243  	n, err := rs.Seek(-8, io.SeekEnd)
   244  	if err != nil {
   245  		return sliceInfo{}, err
   246  	}
   247  	var b [8]byte
   248  	if _, err := rs.Read(b[:]); err != nil {
   249  		return sliceInfo{}, err
   250  	}
   251  	count := int64(binary.LittleEndian.Uint64(b[:]))
   252  	return sliceInfo{
   253  		Size:    n,
   254  		Records: count,
   255  	}, nil
   256  }
   257  
   258  func (s *fileStore) Discard(ctx context.Context, task TaskName, partition int) error {
   259  	path := s.path(task, partition)
   260  	return file.Remove(ctx, path)
   261  }
   262  
   263  type fileIOCloser struct {
   264  	io.Writer
   265  	io.Reader
   266  	ctx  context.Context
   267  	file file.File
   268  }
   269  
   270  func (f *fileIOCloser) Close() error {
   271  	return closeFile(f.ctx, f.file)
   272  }
   273  
   274  type closeNoSyncer interface {
   275  	CloseNoSync(context.Context) error
   276  }
   277  
   278  // CloseFile closes the provided file. It avoids syncing if the implementation
   279  // supports it.
   280  func closeFile(ctx context.Context, f file.File) error {
   281  	if closer, ok := f.(closeNoSyncer); ok {
   282  		return closer.CloseNoSync(ctx)
   283  	}
   284  	return f.Close(ctx)
   285  }