github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/exec/store.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package exec 6 7 import ( 8 "bytes" 9 "context" 10 "encoding/binary" 11 "fmt" 12 "hash/fnv" 13 "io" 14 "io/ioutil" 15 "strconv" 16 "sync" 17 18 "github.com/grailbio/base/errors" 19 "github.com/grailbio/base/file" 20 ) 21 22 // sliceInfo stores metadata for a stored slice. 23 type sliceInfo struct { 24 // Size is the raw, encoded byte size of the stored slice. 25 // A value of -1 indicates the size is unknown. 26 Size int64 27 // Records contains the number of records in the stored slice. 28 // A value of -1 indicates the number of records is unknown. 29 Records int64 30 } 31 32 // A writeCommitter represents a committable write stream into a store. 33 type writeCommitter interface { 34 io.Writer 35 // Commit commits the written data to storage. The caller should 36 // provide the number of records written as metadata. 37 Commit(ctx context.Context, records int64) error 38 // Discard discards the writer; it will not be committed. 39 Discard(ctx context.Context) 40 } 41 42 // Store is an abstraction that stores partitioned data as produced by a task. 43 type Store interface { 44 // Create returns a writer that populates data for the given 45 // task name and partition. The data is not be available 46 // to Open until the returned closer has been closed. 47 // 48 // TODO(marius): should we allow writes to be discarded as well? 49 Create(ctx context.Context, task TaskName, partition int) (writeCommitter, error) 50 51 // Open returns a ReadCloser from which the stored contents of the named task 52 // and partition can be read. If the task and partition are not stored, an 53 // error with kind errors.NotExist is returned. The offset specifies the byte 54 // position from which to read. 55 Open(ctx context.Context, task TaskName, partition int, offset int64) (io.ReadCloser, error) 56 57 // Stat returns metadata for the stored slice. 58 Stat(ctx context.Context, task TaskName, partition int) (sliceInfo, error) 59 60 // Discard discards the data stored for task and partition. Subsequent calls 61 // to Open for the given (task, partition) will fail. ReadClosers that 62 // already exist may start returning errors, depending on the 63 // implementation. If no such (task, partition) is stored, returns a non-nil 64 // error. 65 Discard(ctx context.Context, task TaskName, partition int) error 66 } 67 68 // MemoryStore is a store implementation that maintains in-memory buffers 69 // of task output. 70 type memoryStore struct { 71 mu sync.Mutex 72 tasks map[TaskName][][]byte 73 counts map[TaskName][]int64 74 } 75 76 func newMemoryStore() *memoryStore { 77 return &memoryStore{ 78 tasks: make(map[TaskName][][]byte), 79 counts: make(map[TaskName][]int64), 80 } 81 } 82 83 func (m *memoryStore) get(task TaskName, partition int) ([]byte, int64) { 84 m.mu.Lock() 85 defer m.mu.Unlock() 86 if len(m.tasks[task]) <= partition { 87 return nil, 0 88 } 89 return m.tasks[task][partition], m.counts[task][partition] 90 } 91 92 func (m *memoryStore) put(task TaskName, partition int, p []byte, count int64) error { 93 m.mu.Lock() 94 defer m.mu.Unlock() 95 for len(m.tasks[task]) <= partition { 96 m.tasks[task] = append(m.tasks[task], nil) 97 m.counts[task] = append(m.counts[task], 0) 98 } 99 if m.tasks[task][partition] != nil { 100 return errors.E(errors.Exists, "partition already stored") 101 } 102 if p == nil { 103 p = []byte{} 104 } 105 m.tasks[task][partition] = p 106 m.counts[task][partition] = count 107 return nil 108 } 109 110 type memoryWriter struct { 111 bytes.Buffer 112 task TaskName 113 partition int 114 store *memoryStore 115 } 116 117 func (*memoryWriter) Discard(context.Context) {} 118 119 func (m *memoryWriter) Commit(ctx context.Context, count int64) error { 120 return m.store.put(m.task, m.partition, m.Buffer.Bytes(), count) 121 } 122 123 func (m *memoryStore) Create(ctx context.Context, task TaskName, partition int) (writeCommitter, error) { 124 if b, _ := m.get(task, partition); b != nil { 125 return nil, errors.E(errors.Exists, fmt.Sprintf("create %s[%d]", task, partition)) 126 } 127 return &memoryWriter{ 128 task: task, 129 partition: partition, 130 store: m, 131 }, nil 132 } 133 134 func (m *memoryStore) Open(ctx context.Context, task TaskName, partition int, offset int64) (io.ReadCloser, error) { 135 p, _ := m.get(task, partition) 136 if p == nil { 137 return nil, errors.E(errors.NotExist, fmt.Sprintf("open %s[%d]", task, partition)) 138 } 139 if int64(len(p)) < offset { 140 return nil, errors.E(errors.Invalid, fmt.Sprintf("open %s[%d]: seeked to %d, data size %d", task, partition, offset, len(p))) 141 } 142 return ioutil.NopCloser(bytes.NewReader(p[offset:])), nil 143 } 144 145 func (m *memoryStore) Stat(ctx context.Context, task TaskName, partition int) (sliceInfo, error) { 146 b, n := m.get(task, partition) 147 if b == nil { 148 return sliceInfo{}, errors.E(errors.NotExist, fmt.Sprintf("stat %s[%d]", task, partition)) 149 } 150 return sliceInfo{ 151 Size: int64(len(b)), 152 Records: n, 153 }, nil 154 } 155 156 func (m *memoryStore) Discard(ctx context.Context, task TaskName, partition int) error { 157 m.mu.Lock() 158 defer m.mu.Unlock() 159 partitions, ok := m.tasks[task] 160 if !ok { 161 return errors.E(errors.NotExist, fmt.Sprintf("%s[%d]", task, partition)) 162 } 163 if partition >= len(partitions) { 164 return errors.E(errors.NotExist, fmt.Sprintf("%s[%d]", task, partition)) 165 } 166 partitions[partition] = nil 167 return ctx.Err() 168 } 169 170 // FileStore is a store implementation that uses grailfiles; thus 171 // task output can be stored at any URL supported by grailfile (e.g., 172 // S3). 173 type fileStore struct { 174 // Prefix is the grailfile prefix under which task data are stored. 175 // A task's output is stored at "{Prefix}/{ophash}/{op}/{shardspec}/p{partition}". 176 Prefix string 177 } 178 179 func (s *fileStore) path(task TaskName, partition int) string { 180 h := fnv.New32a() 181 _, _ = h.Write([]byte(task.String())) 182 h0 := int64(h.Sum(nil)[0]) 183 path := file.Join(s.Prefix, strconv.FormatInt(h0, 16), task.Op) 184 if task.IsCombiner() { 185 path = file.Join(path, "combiner") 186 } else { 187 path = file.Join(path, fmt.Sprintf("%03d-of-%03d", task.Shard, task.NumShard)) 188 } 189 return file.Join(path, fmt.Sprintf("p%03d", partition)) 190 } 191 192 type fileWriter struct { 193 file.File 194 io.Writer 195 } 196 197 func (w *fileWriter) Commit(ctx context.Context, count int64) error { 198 var b [8]byte 199 binary.LittleEndian.PutUint64(b[:], uint64(count)) 200 if _, err := w.Write(b[:]); err != nil { 201 return nil 202 } 203 return closeFile(ctx, w.File) 204 } 205 206 func (s *fileStore) Create(ctx context.Context, task TaskName, partition int) (writeCommitter, error) { 207 path := s.path(task, partition) 208 f, err := file.Create(ctx, path) 209 if err != nil { 210 return nil, err 211 } 212 return &fileWriter{File: f, Writer: f.Writer(ctx)}, nil 213 } 214 215 func (s *fileStore) Open(ctx context.Context, task TaskName, partition int, offset int64) (io.ReadCloser, error) { 216 f, err := file.Open(ctx, s.path(task, partition)) 217 if err != nil { 218 return nil, err 219 } 220 info, err := f.Stat(ctx) 221 if err != nil { 222 return nil, err 223 } 224 r := f.Reader(ctx) 225 if n, err := r.Seek(offset, io.SeekStart); err != nil || n != offset { 226 if err == nil { 227 return nil, errors.E(errors.Invalid, fmt.Sprintf("Seeked to %d, got %d", offset, n)) 228 } 229 } 230 return &fileIOCloser{ 231 Reader: io.LimitReader(r, info.Size()-8-offset), 232 ctx: ctx, 233 file: f, 234 }, nil 235 } 236 237 func (s *fileStore) Stat(ctx context.Context, task TaskName, partition int) (sliceInfo, error) { 238 f, err := file.Open(ctx, s.path(task, partition)) 239 if err != nil { 240 return sliceInfo{}, err 241 } 242 rs := f.Reader(ctx) 243 n, err := rs.Seek(-8, io.SeekEnd) 244 if err != nil { 245 return sliceInfo{}, err 246 } 247 var b [8]byte 248 if _, err := rs.Read(b[:]); err != nil { 249 return sliceInfo{}, err 250 } 251 count := int64(binary.LittleEndian.Uint64(b[:])) 252 return sliceInfo{ 253 Size: n, 254 Records: count, 255 }, nil 256 } 257 258 func (s *fileStore) Discard(ctx context.Context, task TaskName, partition int) error { 259 path := s.path(task, partition) 260 return file.Remove(ctx, path) 261 } 262 263 type fileIOCloser struct { 264 io.Writer 265 io.Reader 266 ctx context.Context 267 file file.File 268 } 269 270 func (f *fileIOCloser) Close() error { 271 return closeFile(f.ctx, f.file) 272 } 273 274 type closeNoSyncer interface { 275 CloseNoSync(context.Context) error 276 } 277 278 // CloseFile closes the provided file. It avoids syncing if the implementation 279 // supports it. 280 func closeFile(ctx context.Context, f file.File) error { 281 if closer, ok := f.(closeNoSyncer); ok { 282 return closer.CloseNoSync(ctx) 283 } 284 return f.Close(ctx) 285 }