github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/chunk/writer.go (about) 1 package chunk 2 3 import ( 4 "bytes" 5 "context" 6 7 "github.com/chmduquesne/rollinghash/buzhash64" 8 units "github.com/docker/go-units" 9 "github.com/pachyderm/pachyderm/src/server/pkg/storage/hash" 10 ) 11 12 const ( 13 // WindowSize is the size of the rolling hash window. 14 WindowSize = 64 15 ) 16 17 // initialWindow is the set of bytes used to initialize the window 18 // of the rolling hash function. 19 var initialWindow = make([]byte, WindowSize) 20 21 // Annotation is used to associate information with data 22 // written into the chunk storage layer. 23 type Annotation struct { 24 RefDataRefs []*DataRef 25 NextDataRef *DataRef 26 Data interface{} 27 size int64 28 } 29 30 // WriterCallback is a callback that returns the updated annotations within a chunk. 31 type WriterCallback func([]*Annotation) error 32 33 type stats struct { 34 chunkCount int64 35 annotationCount int64 36 } 37 38 // TODO True max is avg + max, might want to reword or apply the max as max - avg. 39 const ( 40 defaultAverageBits = 23 41 defaultSeed = 1 42 defaultMinChunkSize = 1 * units.MB 43 defaultMaxChunkSize = 20 * units.MB 44 ) 45 46 type chunkSize struct { 47 avg, min, max int 48 } 49 50 // Writer splits a byte stream into content defined chunks that are hashed and deduplicated/uploaded to object storage. 51 // Chunk split points are determined by a bit pattern in a rolling hash function (buzhash64 at https://github.com/chmduquesne/rollinghash). 52 type Writer struct { 53 client *Client 54 cb WriterCallback 55 chunkSize *chunkSize 56 splitMask uint64 57 noUpload bool 58 59 ctx context.Context 60 cancel context.CancelFunc 61 err error 62 chain *TaskChain 63 annotations []*Annotation 64 numChunkBytesAnnotation int 65 hash *buzhash64.Buzhash64 66 buf *bytes.Buffer 67 stats *stats 68 buffering bool 69 first, last bool 70 } 71 72 func newWriter(ctx context.Context, client *Client, cb WriterCallback, opts ...WriterOption) *Writer { 73 cancelCtx, cancel := context.WithCancel(ctx) 74 w := &Writer{ 75 cb: cb, 76 client: client, 77 ctx: cancelCtx, 78 cancel: cancel, 79 chunkSize: &chunkSize{ 80 min: defaultMinChunkSize, 81 max: defaultMaxChunkSize, 82 }, 83 buf: &bytes.Buffer{}, 84 stats: &stats{}, 85 chain: NewTaskChain(cancelCtx), 86 first: true, 87 } 88 WithRollingHashConfig(defaultAverageBits, defaultSeed)(w) 89 for _, opt := range opts { 90 opt(w) 91 } 92 w.resetHash() 93 return w 94 } 95 96 func (w *Writer) resetHash() { 97 w.hash.Reset() 98 w.hash.Write(initialWindow) 99 } 100 101 // AnnotationCount returns a count of the number of annotations created/referenced by 102 // the writer. 103 func (w *Writer) AnnotationCount() int64 { 104 return w.stats.annotationCount 105 } 106 107 // ChunkCount returns a count of the number of chunks created/referenced by 108 // the writer. 109 func (w *Writer) ChunkCount() int64 { 110 return w.stats.chunkCount 111 } 112 113 // Annotate associates an annotation with the current data. 114 func (w *Writer) Annotate(a *Annotation) error { 115 // Create chunks at annotation boundaries if past the average chunk size. 116 if w.buf.Len() >= w.chunkSize.avg { 117 if err := w.createChunk(); err != nil { 118 return err 119 } 120 } 121 w.annotations = append(w.annotations, a) 122 w.numChunkBytesAnnotation = 0 123 w.stats.annotationCount++ 124 w.resetHash() 125 return nil 126 } 127 128 func (w *Writer) Write(data []byte) (int, error) { 129 if err := w.maybeDone(func() error { 130 if err := w.flushBuffer(); err != nil { 131 return err 132 } 133 w.roll(data) 134 return nil 135 }); err != nil { 136 return 0, err 137 } 138 return len(data), nil 139 } 140 141 func (w *Writer) maybeDone(cb func() error) (retErr error) { 142 if w.err != nil { 143 return w.err 144 } 145 defer func() { 146 if retErr != nil { 147 w.err = retErr 148 w.cancel() 149 } 150 }() 151 select { 152 case <-w.ctx.Done(): 153 return w.ctx.Err() 154 default: 155 } 156 return cb() 157 } 158 159 func (w *Writer) roll(data []byte) error { 160 offset := 0 161 for i, b := range data { 162 w.hash.Roll(b) 163 if w.hash.Sum64()&w.splitMask == 0 { 164 if w.numChunkBytesAnnotation+len(data[offset:i+1]) < w.chunkSize.min { 165 continue 166 } 167 w.writeData(data[offset : i+1]) 168 if err := w.createChunk(); err != nil { 169 return err 170 } 171 offset = i + 1 172 } 173 } 174 for w.numChunkBytesAnnotation+len(data[offset:]) >= w.chunkSize.max { 175 bytesLeft := w.chunkSize.max - w.numChunkBytesAnnotation 176 w.writeData(data[offset : offset+bytesLeft]) 177 if err := w.createChunk(); err != nil { 178 return err 179 } 180 offset += bytesLeft 181 } 182 w.writeData(data[offset:]) 183 return nil 184 } 185 186 func (w *Writer) writeData(data []byte) { 187 lastA := w.annotations[len(w.annotations)-1] 188 lastA.size += int64(len(data)) 189 w.numChunkBytesAnnotation += len(data) 190 w.buf.Write(data) 191 } 192 193 func (w *Writer) createChunk() error { 194 chunk := w.buf.Bytes() 195 edge := w.first || w.last 196 annotations := w.splitAnnotations() 197 if err := w.chain.CreateTask(func(ctx context.Context, serial func(func() error) error) error { 198 return w.processChunk(ctx, chunk, edge, annotations, serial) 199 }); err != nil { 200 return err 201 } 202 w.first = false 203 w.numChunkBytesAnnotation = 0 204 w.buf = &bytes.Buffer{} 205 w.stats.chunkCount++ 206 w.resetHash() 207 return nil 208 } 209 210 func (w *Writer) splitAnnotations() []*Annotation { 211 annotations := w.annotations 212 lastA := w.annotations[len(w.annotations)-1] 213 w.annotations = []*Annotation{copyAnnotation(lastA)} 214 return annotations 215 } 216 217 func copyAnnotation(a *Annotation) *Annotation { 218 copyA := &Annotation{} 219 if a.RefDataRefs != nil { 220 copyA.RefDataRefs = a.RefDataRefs 221 } 222 if a.Data != nil { 223 copyA.Data = a.Data 224 } 225 return copyA 226 } 227 228 func (w *Writer) processChunk(ctx context.Context, chunkBytes []byte, edge bool, annotations []*Annotation, serial func(func() error) error) error { 229 pointsTo := w.getPointsTo(annotations) 230 ref, err := w.maybeUpload(ctx, chunkBytes, pointsTo) 231 if err != nil { 232 return err 233 } 234 ref.Edge = edge 235 contentHash := Hash(chunkBytes) 236 chunkDataRef := &DataRef{ 237 Hash: contentHash.HexString(), 238 Ref: ref, 239 SizeBytes: int64(len(chunkBytes)), 240 } 241 // Process the annotations for the current chunk. 242 if err := w.processAnnotations(ctx, chunkDataRef, chunkBytes, annotations); err != nil { 243 return err 244 } 245 return serial(func() error { 246 return w.cb(annotations) 247 }) 248 } 249 250 func (w *Writer) maybeUpload(ctx context.Context, chunkBytes []byte, pointsTo []ID) (*Ref, error) { 251 md := Metadata{ 252 PointsTo: pointsTo, 253 Size: len(chunkBytes), 254 } 255 var chunkID ID 256 var err error 257 // Skip the upload if no upload is configured. 258 if !w.noUpload { 259 chunkID, err = w.client.Create(ctx, md, bytes.NewReader(chunkBytes)) 260 if err != nil { 261 return nil, err 262 } 263 } else { 264 // TODO: this has to also deal with compression and encryption 265 chunkID = Hash(chunkBytes) 266 } 267 return &Ref{ 268 Id: chunkID, 269 SizeBytes: int64(len(chunkBytes)), 270 }, nil 271 } 272 273 func (w *Writer) getPointsTo(annotations []*Annotation) (pointsTo []ID) { 274 ids := make(map[string]struct{}) 275 for _, a := range annotations { 276 for _, dr := range a.RefDataRefs { 277 id := dr.Ref.Id 278 if _, exists := ids[string(id)]; !exists { 279 pointsTo = append(pointsTo, id) 280 ids[string(id)] = struct{}{} 281 } 282 } 283 } 284 return pointsTo 285 } 286 287 func (w *Writer) processAnnotations(ctx context.Context, chunkDataRef *DataRef, chunkBytes []byte, annotations []*Annotation) error { 288 var offset int64 289 for _, a := range annotations { 290 // TODO: Empty data reference for size zero annotation? 291 if a.size == 0 { 292 continue 293 } 294 a.NextDataRef = newDataRef(chunkDataRef, chunkBytes, offset, a.size) 295 offset += a.size 296 // Skip references if no upload is configured. 297 if w.noUpload { 298 continue 299 } 300 } 301 return nil 302 } 303 304 func newDataRef(chunkRef *DataRef, chunkBytes []byte, offset, size int64) *DataRef { 305 dataRef := &DataRef{} 306 dataRef.Ref = chunkRef.Ref 307 if chunkRef.SizeBytes == size { 308 dataRef.Hash = chunkRef.Hash 309 } else { 310 dataRef.Hash = hash.EncodeHash(Hash(chunkBytes[offset : offset+size])) 311 } 312 dataRef.OffsetBytes = offset 313 dataRef.SizeBytes = size 314 return dataRef 315 } 316 317 // Copy copies a data reference to the writer. 318 func (w *Writer) Copy(dataRef *DataRef) error { 319 return w.maybeDone(func() error { 320 if err := w.maybeBufferDataRef(dataRef); err != nil { 321 return err 322 } 323 return w.maybeCheapCopy() 324 }) 325 } 326 327 func (w *Writer) maybeBufferDataRef(dataRef *DataRef) error { 328 lastA := w.annotations[len(w.annotations)-1] 329 if lastA.NextDataRef != nil && lastA.NextDataRef.OffsetBytes != 0 { 330 if err := w.flushBuffer(); err != nil { 331 return err 332 } 333 } 334 if !w.buffering { 335 // We can only begin buffering data refs when: 336 // - We are at a chunk split point. 337 // - The data ref does not reference an edge chunk. 338 // - It is the first data reference for the chunk. 339 if w.buf.Len() != 0 || dataRef.Ref.Edge || dataRef.OffsetBytes != 0 { 340 return w.flushDataRef(dataRef) 341 } 342 } else { 343 // We can only continue buffering data refs if each subsequent data ref is the next in the chunk. 344 prevDataRef := w.getPrevDataRef() 345 if !bytes.Equal(prevDataRef.Ref.Id, dataRef.Ref.Id) || prevDataRef.OffsetBytes+prevDataRef.SizeBytes != dataRef.OffsetBytes { 346 if err := w.flushBuffer(); err != nil { 347 return err 348 } 349 return w.flushDataRef(dataRef) 350 } 351 } 352 lastA.NextDataRef = mergeDataRef(lastA.NextDataRef, dataRef) 353 w.buffering = true 354 return nil 355 } 356 357 func mergeDataRef(dr1, dr2 *DataRef) *DataRef { 358 if dr1 == nil { 359 return dr2 360 } 361 dr1.SizeBytes += dr2.SizeBytes 362 if dr1.SizeBytes == dr1.Ref.SizeBytes { 363 dr1.Hash = ID(dr1.Ref.Id).HexString() 364 } 365 return dr1 366 } 367 368 func (w *Writer) getPrevDataRef() *DataRef { 369 for i := len(w.annotations) - 1; i >= 0; i-- { 370 if w.annotations[i].NextDataRef != nil { 371 return w.annotations[i].NextDataRef 372 } 373 } 374 // TODO: Reaching here would be a bug, maybe panic? 375 return nil 376 } 377 378 func (w *Writer) flushBuffer() error { 379 if w.buffering { 380 annotations := w.annotations 381 w.annotations = nil 382 for _, annotation := range annotations { 383 if err := w.Annotate(copyAnnotation(annotation)); err != nil { 384 return err 385 } 386 w.stats.annotationCount-- 387 if annotation.NextDataRef != nil { 388 if err := w.flushDataRef(annotation.NextDataRef); err != nil { 389 return err 390 } 391 } 392 } 393 w.buffering = false 394 } 395 return nil 396 } 397 398 func (w *Writer) flushDataRef(dataRef *DataRef) error { 399 buf := &bytes.Buffer{} 400 r := newDataReader(w.ctx, w.client, dataRef, nil) 401 if err := r.Get(buf); err != nil { 402 return err 403 } 404 return w.roll(buf.Bytes()) 405 } 406 407 func (w *Writer) maybeCheapCopy() error { 408 if w.buffering { 409 // Cheap copy if a full chunk is buffered. 410 lastDataRef := w.annotations[len(w.annotations)-1].NextDataRef 411 if lastDataRef.OffsetBytes+lastDataRef.SizeBytes == lastDataRef.Ref.SizeBytes { 412 annotations := w.splitAnnotations() 413 if err := w.chain.CreateTask(func(_ context.Context, serial func(func() error) error) error { 414 return serial(func() error { 415 return w.cb(annotations) 416 }) 417 }); err != nil { 418 return err 419 } 420 w.buffering = false 421 } 422 } 423 return nil 424 } 425 426 // Close closes the writer. 427 func (w *Writer) Close() error { 428 defer w.cancel() 429 defer w.client.Close() 430 return w.maybeDone(func() error { 431 if err := w.flushBuffer(); err != nil { 432 return err 433 } 434 if len(w.annotations) > 0 { 435 if err := w.createChunk(); err != nil { 436 return err 437 } 438 } 439 return w.chain.Wait() 440 }) 441 }