github.com/janelia-flyem/dvid@v1.0.0/datatype/imageblk/write.go (about) 1 package imageblk 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "sync" 8 9 "github.com/janelia-flyem/dvid/datastore" 10 "github.com/janelia-flyem/dvid/dvid" 11 "github.com/janelia-flyem/dvid/server" 12 "github.com/janelia-flyem/dvid/storage" 13 ) 14 15 // WriteBlock writes a subvolume or 2d image into a possibly intersecting block. 16 func (v *Voxels) WriteBlock(block *storage.TKeyValue, blockSize dvid.Point) error { 17 return v.writeBlock(block, blockSize) 18 } 19 func (v *Voxels) writeBlock(block *storage.TKeyValue, blockSize dvid.Point) error { 20 if blockSize.NumDims() > 3 { 21 return fmt.Errorf("DVID voxel blocks currently only supports up to 3d, not 4+ dimensions") 22 } 23 blockBeg, dataBeg, dataEnd, err := v.ComputeTransform(block, blockSize) 24 if err != nil { 25 return err 26 } 27 data := v.Data() 28 bytesPerVoxel := int64(v.Values().BytesPerElement()) 29 30 // Compute the strides (in bytes) 31 bX := int64(blockSize.Value(0)) * bytesPerVoxel 32 bY := int64(blockSize.Value(1)) * bX 33 dX := int64(v.Stride()) 34 35 blockBegX := int64(blockBeg.Value(0)) 36 blockBegY := int64(blockBeg.Value(1)) 37 blockBegZ := int64(blockBeg.Value(2)) 38 39 // Do the transfers depending on shape of the external voxels. 40 switch { 41 case v.DataShape().Equals(dvid.XY): 42 dataI := int64(dataBeg.Value(1))*dX + int64(dataBeg.Value(0))*bytesPerVoxel 43 blockI := blockBegZ*bY + blockBegY*bX + blockBegX*bytesPerVoxel 44 bytes := int64(dataEnd.Value(0)-dataBeg.Value(0)+1) * bytesPerVoxel 45 for y := dataBeg.Value(1); y <= dataEnd.Value(1); y++ { 46 copy(block.V[blockI:blockI+bytes], data[dataI:dataI+bytes]) 47 blockI += bX 48 dataI += dX 49 } 50 51 case v.DataShape().Equals(dvid.XZ): 52 dataI := int64(dataBeg.Value(2))*dX + int64(dataBeg.Value(0))*bytesPerVoxel 53 blockI := blockBegZ*bY + blockBegY*bX + blockBegX*bytesPerVoxel 54 bytes := int64(dataEnd.Value(0)-dataBeg.Value(0)+1) * bytesPerVoxel 55 for y := dataBeg.Value(2); y <= dataEnd.Value(2); y++ { 56 copy(block.V[blockI:blockI+bytes], data[dataI:dataI+bytes]) 57 blockI += bY 58 dataI += dX 59 } 60 61 case v.DataShape().Equals(dvid.YZ): 62 bz := blockBegZ 63 for y := int64(dataBeg.Value(2)); y <= int64(dataEnd.Value(2)); y++ { 64 dataI := y*dX + int64(dataBeg.Value(1))*bytesPerVoxel 65 blockI := bz*bY + blockBegY*bX + blockBegX*bytesPerVoxel 66 for x := dataBeg.Value(1); x <= dataEnd.Value(1); x++ { 67 copy(block.V[blockI:blockI+bytesPerVoxel], data[dataI:dataI+bytesPerVoxel]) 68 blockI += bX 69 dataI += bytesPerVoxel 70 } 71 bz++ 72 } 73 74 case v.DataShape().ShapeDimensions() == 2: 75 // TODO: General code for handling 2d ExtData in n-d space. 76 return fmt.Errorf("DVID currently does not support 2d in n-d space.") 77 78 case v.DataShape().Equals(dvid.Vol3d): 79 blockOffset := blockBegX * bytesPerVoxel 80 dX := int64(v.Size().Value(0)) * bytesPerVoxel 81 dY := int64(v.Size().Value(1)) * dX 82 dataOffset := int64(dataBeg.Value(0)) * bytesPerVoxel 83 bytes := int64(dataEnd.Value(0)-dataBeg.Value(0)+1) * bytesPerVoxel 84 blockZ := blockBegZ 85 86 for dataZ := int64(dataBeg.Value(2)); dataZ <= int64(dataEnd.Value(2)); dataZ++ { 87 blockY := blockBegY 88 for dataY := int64(dataBeg.Value(1)); dataY <= int64(dataEnd.Value(1)); dataY++ { 89 dataI := dataZ*dY + dataY*dX + dataOffset 90 blockI := blockZ*bY + blockY*bX + blockOffset 91 copy(block.V[blockI:blockI+bytes], data[dataI:dataI+bytes]) 92 blockY++ 93 } 94 blockZ++ 95 } 96 97 default: 98 return fmt.Errorf("Cannot writeBlock() unsupported voxels data shape %s", v.DataShape()) 99 } 100 return nil 101 } 102 103 type putOperation struct { 104 voxels *Voxels 105 indexZYX dvid.IndexZYX 106 version dvid.VersionID 107 mutate bool // if false, we just ingest without needing to GET previous value 108 mutID uint64 // should be unique within a server's uptime. 109 } 110 111 type patchGeo struct { 112 patchstart dvid.Point3d // offset in block where patch data begins 113 patchend dvid.Point3d // location in block where patch data ends 114 } 115 116 // IngestVoxels ingests voxels from a subvolume into the storage engine. 117 // The subvolume must be aligned to blocks of the data instance, which simplifies 118 // the routine since we are simply replacing a value instead of modifying values (GET + PUT). 119 func (d *Data) IngestVoxels(v dvid.VersionID, mutID uint64, vox *Voxels, roiname dvid.InstanceName) error { 120 return d.PutVoxels(v, mutID, vox, roiname, false) 121 } 122 123 // MutateVoxels mutates voxels from a subvolume into the storage engine. This differs from 124 // the IngestVoxels function in firing off a MutateBlockEvent instead of an IngestBlockEvent, 125 // which tells subscribers that a previous value has changed instead of a completely new 126 // key/value being inserted. There will be some decreased performance due to cleanup of prior 127 // denormalizations compared to IngestVoxels. 128 func (d *Data) MutateVoxels(v dvid.VersionID, mutID uint64, vox *Voxels, roiname dvid.InstanceName) error { 129 return d.PutVoxels(v, mutID, vox, roiname, true) 130 } 131 132 // PutVoxels persists voxels from a subvolume into the storage engine. 133 // The subvolume must be aligned to blocks of the data instance, which simplifies 134 // the routine if the PUT is a mutation (signals MutateBlockEvent) instead of ingestion. 135 func (d *Data) PutVoxels(v dvid.VersionID, mutID uint64, vox *Voxels, roiname dvid.InstanceName, mutate bool) error { 136 r, err := GetROI(v, roiname, vox) 137 if err != nil { 138 return err 139 } 140 141 // extract buffer interface if it exists 142 store, err := datastore.GetOrderedKeyValueDB(d) 143 if err != nil { 144 return fmt.Errorf("Data type imageblk had error initializing store: %v\n", err) 145 } 146 147 // extract buffer interface 148 _, hasbuffer := store.(storage.KeyValueRequester) 149 150 // Make sure vox is block-aligned 151 if !dvid.BlockAligned(vox, d.BlockSize()) { 152 return fmt.Errorf("cannot store voxels in non-block aligned geometry %s -> %s", vox.StartPoint(), vox.EndPoint()) 153 } 154 155 // Only do one request at a time, although each request can start many goroutines. 156 if !hasbuffer { 157 if vox.NumVoxels() > 256*256*256 { 158 server.LargeMutationMutex.Lock() 159 defer server.LargeMutationMutex.Unlock() 160 } 161 } 162 163 // create some buffer for handling requests 164 finishedRequests := make(chan error, 1000) 165 putrequests := 0 166 167 // Post new extents if there was a change (will always require 1 GET which should 168 // not be a big deal for large posts or for distributed back-ends) 169 // (assumes rest of the command will finish correctly which seems reasonable) 170 ctx := datastore.NewVersionedCtx(d, v) 171 putrequests++ 172 go func() { 173 err := d.PostExtents(ctx, vox.StartPoint(), vox.EndPoint()) 174 finishedRequests <- err 175 }() 176 177 // Iterate through index space for this data. 178 for it, err := vox.NewIndexIterator(d.BlockSize()); err == nil && it.Valid(); it.NextSpan() { 179 i0, i1, err := it.IndexSpan() 180 if err != nil { 181 return err 182 } 183 ptBeg := i0.Duplicate().(dvid.ChunkIndexer) 184 ptEnd := i1.Duplicate().(dvid.ChunkIndexer) 185 186 begX := ptBeg.Value(0) 187 endX := ptEnd.Value(0) 188 189 c := dvid.ChunkPoint3d{begX, ptBeg.Value(1), ptBeg.Value(2)} 190 for x := begX; x <= endX; x++ { 191 c[0] = x 192 curIndex := dvid.IndexZYX(c) 193 194 // Don't PUT if this index is outside a specified ROI 195 if r != nil && r.Iter != nil && !r.Iter.InsideFast(curIndex) { 196 continue 197 } 198 199 kv := &storage.TKeyValue{K: NewTKey(&curIndex)} 200 putOp := &putOperation{vox, curIndex, v, mutate, mutID} 201 op := &storage.ChunkOp{putOp, nil} 202 putrequests++ 203 d.PutChunk(&storage.Chunk{op, kv}, hasbuffer, finishedRequests) 204 } 205 } 206 // wait for everything to finish 207 for i := 0; i < putrequests; i++ { 208 errjob := <-finishedRequests 209 if errjob != nil { 210 err = errjob 211 } 212 } 213 return err 214 } 215 216 // PutBlocks stores blocks of data in a span along X 217 func (d *Data) PutBlocks(v dvid.VersionID, mutID uint64, start dvid.ChunkPoint3d, span int, data io.ReadCloser, mutate bool) error { 218 batcher, err := datastore.GetKeyValueBatcher(d) 219 if err != nil { 220 return err 221 } 222 223 ctx := datastore.NewVersionedCtx(d, v) 224 batch := batcher.NewBatch(ctx) 225 226 // Read blocks from the stream until we can output a batch put. 227 const BatchSize = 1000 228 var readBlocks int 229 numBlockBytes := d.BlockSize().Prod() 230 chunkPt := start 231 buf := make([]byte, numBlockBytes) 232 for { 233 // Read a block's worth of data 234 readBytes := int64(0) 235 for { 236 n, err := data.Read(buf[readBytes:]) 237 readBytes += int64(n) 238 if readBytes == numBlockBytes { 239 break 240 } 241 if err == io.EOF { 242 return fmt.Errorf("block data ceased before all block data read") 243 } 244 if err != nil { 245 return fmt.Errorf("error reading blocks: %v", err) 246 } 247 } 248 249 if readBytes != numBlockBytes { 250 return fmt.Errorf("expected %d bytes in block read, got %d instead, aborting", numBlockBytes, readBytes) 251 } 252 253 serialization, err := dvid.SerializeData(buf, d.Compression(), d.Checksum()) 254 if err != nil { 255 return err 256 } 257 zyx := dvid.IndexZYX(chunkPt) 258 tk := NewTKey(&zyx) 259 260 // If we are mutating, get the previous block of data. 261 var oldBlock []byte 262 if mutate { 263 oldBlock, err = d.GetBlock(v, tk) 264 if err != nil { 265 return fmt.Errorf("unable to load previous block in %q, key %v: %v", d.DataName(), tk, err) 266 } 267 } 268 269 // Write the new block 270 batch.Put(tk, serialization) 271 272 // Notify any subscribers that you've changed block. 273 var event string 274 var delta interface{} 275 if mutate { 276 event = MutateBlockEvent 277 delta = MutatedBlock{&zyx, oldBlock, buf, mutID} 278 } else { 279 event = IngestBlockEvent 280 delta = Block{&zyx, buf, mutID} 281 } 282 evt := datastore.SyncEvent{d.DataUUID(), event} 283 msg := datastore.SyncMessage{event, v, delta} 284 if err := datastore.NotifySubscribers(evt, msg); err != nil { 285 return err 286 } 287 288 // Advance to next block 289 chunkPt[0]++ 290 readBlocks++ 291 finish := (readBlocks == span) 292 if finish || readBlocks%BatchSize == 0 { 293 if err := batch.Commit(); err != nil { 294 return fmt.Errorf("error on batch commit, block %d: %v", readBlocks, err) 295 } 296 if finish { 297 break 298 } else { 299 batch = batcher.NewBatch(ctx) 300 } 301 } 302 } 303 return nil 304 } 305 306 // PutChunk puts a chunk of data as part of a mapped operation. 307 // Only some multiple of the # of CPU cores can be used for chunk handling before 308 // it waits for chunk processing to abate via the buffered server.HandlerToken channel. 309 func (d *Data) PutChunk(chunk *storage.Chunk, hasbuffer bool, finishedRequests chan error) error { 310 if !hasbuffer { 311 // if storage engine handles buffering, limited advantage to throttling here 312 server.CheckChunkThrottling() 313 } 314 315 go d.putChunk(chunk, hasbuffer, finishedRequests) 316 return nil 317 } 318 319 func (d *Data) putChunk(chunk *storage.Chunk, hasbuffer bool, finishedRequests chan error) { 320 var err error 321 defer func() { 322 // After processing a chunk, return the token. 323 if !hasbuffer { 324 server.HandlerToken <- 1 325 } 326 // Notify the requestor that this chunk is done. 327 if chunk.Wg != nil { 328 chunk.Wg.Done() 329 } 330 331 finishedRequests <- err 332 }() 333 334 op, ok := chunk.Op.(*putOperation) 335 336 if !ok { 337 log.Fatalf("Illegal operation passed to ProcessChunk() for data %s\n", d.DataName()) 338 } 339 340 // Make sure our received chunk is valid. 341 if chunk == nil { 342 dvid.Errorf("Received nil chunk in ProcessChunk. Ignoring chunk.\n") 343 err = fmt.Errorf("Received nil chunk in ProcessChunk. Ignoring chunk.\n") 344 return 345 } 346 if chunk.K == nil { 347 dvid.Errorf("Received nil chunk key in ProcessChunk. Ignoring chunk.\n") 348 err = fmt.Errorf("Received nil chunk key in ProcessChunk. Ignoring chunk.\n") 349 return 350 } 351 352 // Initialize the block buffer using the chunk of data. For voxels, this chunk of 353 // data needs to be uncompressed and deserialized. 354 var blockData []byte 355 if chunk.V == nil { 356 blockData = d.BackgroundBlock() 357 } else { 358 blockData, _, err = dvid.DeserializeData(chunk.V, true) 359 if err != nil { 360 dvid.Errorf("Unable to deserialize block in %q: %v\n", d.DataName(), err) 361 return 362 } 363 } 364 365 // If we are mutating, get the previous block of data. 366 var oldBlock []byte 367 if op.mutate { 368 oldBlock, err = d.GetBlock(op.version, chunk.K) 369 if err != nil { 370 dvid.Errorf("Unable to load previous block in %q, key %v: %v\n", d.DataName(), chunk.K, err) 371 return 372 } 373 } 374 375 // Perform the operation. 376 block := &storage.TKeyValue{K: chunk.K, V: blockData} 377 if err = op.voxels.WriteBlock(block, d.BlockSize()); err != nil { 378 dvid.Errorf("Unable to WriteBlock() in %q: %v\n", d.DataName(), err) 379 return 380 } 381 var serialization []byte 382 serialization, err = dvid.SerializeData(blockData, d.Compression(), d.Checksum()) 383 if err != nil { 384 dvid.Errorf("Unable to serialize block in %q: %v\n", d.DataName(), err) 385 return 386 } 387 store, err := datastore.GetOrderedKeyValueDB(d) 388 if err != nil { 389 dvid.Errorf("Data type imageblk had error initializing store: %v\n", err) 390 return 391 } 392 393 ready := make(chan error, 1) 394 callback := func() { 395 // Notify any subscribers that you've changed block. 396 resperr := <-ready 397 if resperr != nil { 398 dvid.Errorf("Unable to PUT voxel data for key %v: %v\n", chunk.K, resperr) 399 err = fmt.Errorf("Unable to PUT voxel data for key %v: %v", chunk.K, resperr) 400 return 401 } 402 var event string 403 var delta interface{} 404 if op.mutate { 405 event = MutateBlockEvent 406 delta = MutatedBlock{&op.indexZYX, oldBlock, block.V, op.mutID} 407 } else { 408 event = IngestBlockEvent 409 delta = Block{&op.indexZYX, block.V, op.mutID} 410 } 411 evt := datastore.SyncEvent{d.DataUUID(), event} 412 msg := datastore.SyncMessage{event, op.version, delta} 413 if err = datastore.NotifySubscribers(evt, msg); err != nil { 414 dvid.Errorf("Unable to notify subscribers of event %s in %s\n", event, d.DataName()) 415 } 416 } 417 418 // put data -- use buffer if available 419 ctx := datastore.NewVersionedCtx(d, op.version) 420 if err = store.Put(ctx, chunk.K, serialization); err != nil { 421 dvid.Errorf("Unable to PUT voxel data for key %v: %v\n", chunk.K, err) 422 return 423 } 424 ready <- nil 425 callback() 426 } 427 428 // Writes a XY image into the blocks that intersect it. This function assumes the 429 // blocks have been allocated and if necessary, filled with old data. 430 func (d *Data) writeXYImage(v dvid.VersionID, vox *Voxels, b storage.TKeyValues) (err error) { 431 432 // Setup concurrency in image -> block transfers. 433 var wg sync.WaitGroup 434 defer wg.Wait() 435 436 // Iterate through index space for this data using ZYX ordering. 437 blockSize := d.BlockSize() 438 var startingBlock int32 439 440 for it, err := vox.NewIndexIterator(blockSize); err == nil && it.Valid(); it.NextSpan() { 441 indexBeg, indexEnd, err := it.IndexSpan() 442 if err != nil { 443 return err 444 } 445 446 ptBeg := indexBeg.Duplicate().(dvid.ChunkIndexer) 447 ptEnd := indexEnd.Duplicate().(dvid.ChunkIndexer) 448 449 // Do image -> block transfers in concurrent goroutines. 450 begX := ptBeg.Value(0) 451 endX := ptEnd.Value(0) 452 453 server.CheckChunkThrottling() 454 wg.Add(1) 455 go func(blockNum int32) { 456 c := dvid.ChunkPoint3d{begX, ptBeg.Value(1), ptBeg.Value(2)} 457 for x := begX; x <= endX; x++ { 458 c[0] = x 459 curIndex := dvid.IndexZYX(c) 460 b[blockNum].K = NewTKey(&curIndex) 461 462 // Write this slice data into the block. 463 vox.WriteBlock(&(b[blockNum]), blockSize) 464 blockNum++ 465 } 466 server.HandlerToken <- 1 467 wg.Done() 468 }(startingBlock) 469 470 startingBlock += (endX - begX + 1) 471 } 472 return 473 } 474 475 // KVWriteSize is the # of key-value pairs we will write as one atomic batch write. 476 const KVWriteSize = 500 477 478 // TODO -- Clean up all the writing and simplify now that we have block-aligned writes. 479 // writeBlocks ingests blocks of voxel data asynchronously using batch writes. 480 func (d *Data) writeBlocks(v dvid.VersionID, b storage.TKeyValues, wg1, wg2 *sync.WaitGroup) error { 481 batcher, err := datastore.GetKeyValueBatcher(d) 482 if err != nil { 483 return err 484 } 485 486 preCompress, postCompress := 0, 0 487 488 ctx := datastore.NewVersionedCtx(d, v) 489 evt := datastore.SyncEvent{d.DataUUID(), IngestBlockEvent} 490 491 server.CheckChunkThrottling() 492 go func() { 493 defer func() { 494 wg1.Done() 495 wg2.Done() 496 dvid.Debugf("Wrote voxel blocks. Before %s: %d bytes. After: %d bytes\n", d.Compression(), preCompress, postCompress) 497 server.HandlerToken <- 1 498 }() 499 500 mutID := d.NewMutationID() 501 batch := batcher.NewBatch(ctx) 502 for i, block := range b { 503 serialization, err := dvid.SerializeData(block.V, d.Compression(), d.Checksum()) 504 preCompress += len(block.V) 505 postCompress += len(serialization) 506 if err != nil { 507 dvid.Errorf("Unable to serialize block: %v\n", err) 508 return 509 } 510 batch.Put(block.K, serialization) 511 512 indexZYX, err := DecodeTKey(block.K) 513 if err != nil { 514 dvid.Errorf("Unable to recover index from block key: %v\n", block.K) 515 return 516 } 517 msg := datastore.SyncMessage{IngestBlockEvent, v, Block{indexZYX, block.V, mutID}} 518 if err := datastore.NotifySubscribers(evt, msg); err != nil { 519 dvid.Errorf("Unable to notify subscribers of ChangeBlockEvent in %s\n", d.DataName()) 520 return 521 } 522 523 // Check if we should commit 524 if i%KVWriteSize == KVWriteSize-1 { 525 if err := batch.Commit(); err != nil { 526 dvid.Errorf("Error on trying to write batch: %v\n", err) 527 return 528 } 529 batch = batcher.NewBatch(ctx) 530 } 531 } 532 if err := batch.Commit(); err != nil { 533 dvid.Errorf("Error on trying to write batch: %v\n", err) 534 return 535 } 536 }() 537 return nil 538 }