github.com/janelia-flyem/dvid@v1.0.0/datatype/labelvol/merge_split.go (about) 1 /* 2 This file contains code that manages long-lived merge/split operations using small 3 amount of globally-coordinated metadata. 4 */ 5 6 package labelvol 7 8 import ( 9 "fmt" 10 "io" 11 "sort" 12 13 "github.com/janelia-flyem/dvid/datastore" 14 "github.com/janelia-flyem/dvid/datatype/common/labels" 15 "github.com/janelia-flyem/dvid/dvid" 16 "github.com/janelia-flyem/dvid/storage" 17 ) 18 19 type sizeChange struct { 20 oldSize, newSize uint64 21 } 22 23 // Returns the InstanceVersion for the synced labelblk if available or 24 // defaults to its own instance. 25 func (d *Data) getMergeIV(v dvid.VersionID) dvid.InstanceVersion { 26 syncedLabelblk, err := d.GetSyncedLabelblk() 27 if err != nil { 28 return dvid.InstanceVersion{d.DataUUID(), v} 29 } 30 return dvid.InstanceVersion{syncedLabelblk.DataUUID(), v} 31 } 32 33 // MergeLabels handles merging of any number of labels throughout the various label data 34 // structures. It assumes that the merges aren't cascading, e.g., there is no attempt 35 // to merge label 3 into 4 and also 4 into 5. The caller should have flattened the merges. 36 // TODO: Provide some indication that subset of labels are under evolution, returning 37 // an "unavailable" status or 203 for non-authoritative response. This might not be 38 // feasible for clustered DVID front-ends due to coordination issues. 39 // 40 // EVENTS 41 // 42 // labels.MergeStartEvent occurs at very start of merge and transmits labels.DeltaMergeStart struct. 43 // 44 // labels.MergeBlockEvent occurs for every block of a merged label and transmits labels.DeltaMerge struct. 45 // 46 // labels.MergeEndEvent occurs at end of merge and transmits labels.DeltaMergeEnd struct. 47 // 48 func (d *Data) MergeLabels(v dvid.VersionID, m labels.MergeOp) error { 49 dvid.Infof("Merging data %q (labels %s) into label %d ...\n", d.DataName(), m.Merged, m.Target) 50 51 // Mark these labels as dirty until done, and make sure we can actually initiate the merge. 52 if err := labels.MergeStart(d.getMergeIV(v), m); err != nil { 53 return err 54 } 55 d.StartUpdate() 56 57 go func() { 58 if err := labels.LogMerge(d, v, m); err != nil { 59 dvid.Errorf("logging merge %q: %v\n", d.DataName(), err) 60 } 61 }() 62 63 // Signal that we are starting a merge. 64 evt := datastore.SyncEvent{d.DataUUID(), labels.MergeStartEvent} 65 msg := datastore.SyncMessage{labels.MergeStartEvent, v, labels.DeltaMergeStart{m}} 66 if err := datastore.NotifySubscribers(evt, msg); err != nil { 67 d.StopUpdate() 68 return err 69 } 70 71 // Asynchronously perform merge and handle any concurrent requests using the cache map until 72 // labelvol and labelblk are updated and consistent. 73 go func() { 74 fmt.Printf("Starting merge %v\n", m) 75 d.asyncMergeLabels(v, m) 76 fmt.Printf("Finished merge %v\n", m) 77 78 // Remove dirty labels and updating flag when done. 79 labels.MergeStop(d.getMergeIV(v), m) 80 d.StopUpdate() 81 dvid.Infof("Finished with merge of labels %s.\n", m) 82 }() 83 fmt.Printf("async return from merge on %v\n", m) 84 85 return nil 86 } 87 88 func (d *Data) asyncMergeLabels(v dvid.VersionID, m labels.MergeOp) { 89 // Get storage objects 90 store, err := datastore.GetOrderedKeyValueDB(d) 91 if err != nil { 92 dvid.Errorf("Data type labelvol had error initializing store: %v\n", err) 93 return 94 } 95 batcher, ok := store.(storage.KeyValueBatcher) 96 if !ok { 97 dvid.Errorf("Data type labelvol requires batch-enabled store, which %q is not\n", store) 98 return 99 } 100 101 // All blocks that have changed during this merge. Key = string of block index 102 blocksChanged := make(map[dvid.IZYXString]struct{}) 103 104 // Get the block-level RLEs for the toLabel 105 toLabel := m.Target 106 toLabelRLEs, err := d.GetLabelRLEs(v, toLabel) 107 if err != nil { 108 dvid.Criticalf("Can't get block-level RLEs for label %d: %v", toLabel, err) 109 return 110 } 111 toLabelSize := toLabelRLEs.NumVoxels() 112 113 // Iterate through all labels to be merged. 114 var addedVoxels uint64 115 for fromLabel := range m.Merged { 116 dvid.Debugf("Merging label %d to label %d...\n", fromLabel, toLabel) 117 118 fromLabelRLEs, err := d.GetLabelRLEs(v, fromLabel) 119 if err != nil { 120 dvid.Errorf("Can't get block-level RLEs for label %d: %v", fromLabel, err) 121 return 122 } 123 fromLabelSize := fromLabelRLEs.NumVoxels() 124 if fromLabelSize == 0 || len(fromLabelRLEs) == 0 { 125 dvid.Debugf("Label %d is empty. Skipping.\n", fromLabel) 126 continue 127 } 128 addedVoxels += fromLabelSize 129 130 // Notify linked labelsz instances 131 delta := labels.DeltaDeleteSize{ 132 Label: fromLabel, 133 OldSize: fromLabelSize, 134 OldKnown: true, 135 } 136 evt := datastore.SyncEvent{d.DataUUID(), labels.ChangeSizeEvent} 137 msg := datastore.SyncMessage{labels.ChangeSizeEvent, v, delta} 138 if err := datastore.NotifySubscribers(evt, msg); err != nil { 139 dvid.Criticalf("can't notify subscribers for event %v: %v\n", evt, err) 140 } 141 142 // Append or insert RLE runs from fromLabel blocks into toLabel blocks. 143 for blockStr, fromRLEs := range fromLabelRLEs { 144 // Mark the fromLabel blocks as modified 145 blocksChanged[blockStr] = struct{}{} 146 147 // Get the toLabel RLEs for this block and add the fromLabel RLEs 148 toRLEs, found := toLabelRLEs[blockStr] 149 if found { 150 toRLEs.Add(fromRLEs) 151 } else { 152 toRLEs = fromRLEs 153 } 154 toLabelRLEs[blockStr] = toRLEs 155 } 156 157 // Delete all fromLabel RLEs since they are all integrated into toLabel RLEs 158 minTKey := NewTKey(fromLabel, dvid.MinIndexZYX.ToIZYXString()) 159 maxTKey := NewTKey(fromLabel, dvid.MaxIndexZYX.ToIZYXString()) 160 ctx := datastore.NewVersionedCtx(d, v) 161 fmt.Printf("Deleting all versions of label %d, key %v to %v\n", fromLabel, minTKey, maxTKey) 162 if err := store.DeleteRange(ctx, minTKey, maxTKey); err != nil { 163 dvid.Criticalf("Can't delete label %d RLEs: %v", fromLabel, err) 164 } 165 } 166 167 if len(blocksChanged) == 0 { 168 dvid.Debugf("No changes needed when merging %s into %d. Aborting.\n", m.Merged, m.Target) 169 return 170 } 171 172 // Publish block-level merge 173 evt := datastore.SyncEvent{d.DataUUID(), labels.MergeBlockEvent} 174 msg := datastore.SyncMessage{labels.MergeBlockEvent, v, labels.DeltaMerge{MergeOp: m, BlockMap: blocksChanged}} 175 if err := datastore.NotifySubscribers(evt, msg); err != nil { 176 dvid.Errorf("can't notify subscribers for event %v: %v\n", evt, err) 177 } 178 179 // Update datastore with all toLabel RLEs that were changed 180 ctx := datastore.NewVersionedCtx(d, v) 181 batch := batcher.NewBatch(ctx) 182 for blockStr := range blocksChanged { 183 tk := NewTKey(toLabel, blockStr) 184 serialization, err := toLabelRLEs[blockStr].MarshalBinary() 185 if err != nil { 186 dvid.Errorf("Error serializing RLEs for label %d: %v\n", toLabel, err) 187 } 188 fmt.Printf("Updating new merged key %v\n", tk) 189 batch.Put(tk, serialization) 190 } 191 if err := batch.Commit(); err != nil { 192 dvid.Errorf("Error on updating RLEs for label %d: %v\n", toLabel, err) 193 } 194 delta := labels.DeltaReplaceSize{ 195 Label: toLabel, 196 OldSize: toLabelSize, 197 NewSize: toLabelSize + addedVoxels, 198 } 199 evt = datastore.SyncEvent{d.DataUUID(), labels.ChangeSizeEvent} 200 msg = datastore.SyncMessage{labels.ChangeSizeEvent, v, delta} 201 if err := datastore.NotifySubscribers(evt, msg); err != nil { 202 dvid.Errorf("can't notify subscribers for event %v: %v\n", evt, err) 203 } 204 205 evt = datastore.SyncEvent{d.DataUUID(), labels.MergeEndEvent} 206 msg = datastore.SyncMessage{labels.MergeEndEvent, v, labels.DeltaMergeEnd{m}} 207 if err := datastore.NotifySubscribers(evt, msg); err != nil { 208 dvid.Errorf("can't notify subscribers for event %v: %v\n", evt, err) 209 } 210 } 211 212 // SplitLabels splits a portion of a label's voxels into a given split label or, if the given split 213 // label is 0, a new label, which is returned. The input is a binary sparse volume and should 214 // preferably be the smaller portion of a labeled region. In other words, the caller should chose 215 // to submit for relabeling the smaller portion of any split. It is assumed that the given split 216 // voxels are within the fromLabel set of voxels and will generate unspecified behavior if this is 217 // not the case. 218 // 219 // EVENTS 220 // 221 // labels.SplitStartEvent occurs at very start of split and transmits labels.DeltaSplitStart struct. 222 // 223 // labels.SplitBlockEvent occurs for every block of a split label and transmits labels.DeltaSplit struct. 224 // 225 // labels.SplitEndEvent occurs at end of split and transmits labels.DeltaSplitEnd struct. 226 // 227 func (d *Data) SplitLabels(v dvid.VersionID, fromLabel, splitLabel uint64, r io.ReadCloser) (toLabel uint64, err error) { 228 store, err := datastore.GetOrderedKeyValueDB(d) 229 if err != nil { 230 err = fmt.Errorf("Data type labelvol had error initializing store: %v\n", err) 231 return 232 } 233 batcher, ok := store.(storage.KeyValueBatcher) 234 if !ok { 235 err = fmt.Errorf("Data type labelvol requires batch-enabled store, which %q is not\n", store) 236 return 237 } 238 239 // Create a new label id for this version that will persist to store 240 if splitLabel != 0 { 241 toLabel = splitLabel 242 dvid.Debugf("Splitting subset of label %d into given label %d ...\n", fromLabel, splitLabel) 243 } else { 244 toLabel, err = d.NewLabel(v) 245 if err != nil { 246 return 247 } 248 dvid.Debugf("Splitting subset of label %d into new label %d ...\n", fromLabel, toLabel) 249 } 250 251 evt := datastore.SyncEvent{d.DataUUID(), labels.SplitStartEvent} 252 splitOpStart := labels.DeltaSplitStart{fromLabel, toLabel} 253 splitOpEnd := labels.DeltaSplitEnd{fromLabel, toLabel} 254 255 // Make sure we can split given current merges in progress 256 if err := labels.SplitStart(d.getMergeIV(v), splitOpStart); err != nil { 257 return toLabel, err 258 } 259 defer labels.SplitStop(d.getMergeIV(v), splitOpEnd) 260 261 // Signal that we are starting a split. 262 msg := datastore.SyncMessage{labels.SplitStartEvent, v, splitOpStart} 263 if err := datastore.NotifySubscribers(evt, msg); err != nil { 264 return 0, err 265 } 266 267 // Read the sparse volume from reader. 268 var split dvid.RLEs 269 split, err = dvid.ReadRLEs(r) 270 if err != nil { 271 return 272 } 273 toLabelSize, _ := split.Stats() 274 275 mutID := d.NewMutationID() 276 splitOp := labels.SplitOp{ 277 MutID: mutID, 278 Target: fromLabel, 279 NewLabel: toLabel, 280 RLEs: split, 281 } 282 go func() { 283 if err := labels.LogSplit(d, v, splitOp); err != nil { 284 dvid.Errorf("logging split %q: %v\n", d.DataName(), err) 285 } 286 }() 287 288 // Partition the split spans into blocks. 289 var splitmap dvid.BlockRLEs 290 splitmap, err = split.Partition(d.BlockSize) 291 if err != nil { 292 return 293 } 294 295 // Get a sorted list of blocks that cover split. 296 splitblks := splitmap.SortedKeys() 297 298 // Publish split event 299 deltaSplit := labels.DeltaSplit{ 300 MutID: mutID, 301 OldLabel: fromLabel, 302 NewLabel: toLabel, 303 Split: splitmap, 304 SortedBlocks: splitblks, 305 SplitVoxels: toLabelSize, 306 } 307 308 evt = datastore.SyncEvent{d.DataUUID(), labels.SplitLabelEvent} 309 msg = datastore.SyncMessage{labels.SplitLabelEvent, v, deltaSplit} 310 if err = datastore.NotifySubscribers(evt, msg); err != nil { 311 return 312 } 313 314 // Iterate through the split blocks, read the original block. If the RLEs 315 // are identical, just delete the original. If not, modify the original. 316 // TODO: Modifications should be transactional since it's GET-PUT, therefore use 317 // hash on block coord to direct it to blockLabel, splitLabel-specific goroutine; we serialize 318 // requests to handle concurrency. 319 ctx := datastore.NewVersionedCtx(d, v) 320 batch := batcher.NewBatch(ctx) 321 322 for _, splitblk := range splitblks { 323 324 // Get original block 325 tk := NewTKey(fromLabel, splitblk) 326 val, err := store.Get(ctx, tk) 327 if err != nil { 328 return toLabel, err 329 } 330 331 if val == nil { 332 return toLabel, fmt.Errorf("Split RLEs at block %s are not part of original label %d", splitblk, fromLabel) 333 } 334 var rles dvid.RLEs 335 if err := rles.UnmarshalBinary(val); err != nil { 336 return toLabel, fmt.Errorf("Unable to unmarshal RLE for original labels in block %s", splitblk) 337 } 338 339 // Compare and process based on modifications required. 340 remain, err := rles.Split(splitmap[splitblk]) 341 if err != nil { 342 return toLabel, err 343 } 344 if len(remain) == 0 { 345 batch.Delete(tk) 346 } else { 347 rleBytes, err := remain.MarshalBinary() 348 if err != nil { 349 return toLabel, fmt.Errorf("can't serialize remain RLEs for split of %d: %v\n", fromLabel, err) 350 } 351 batch.Put(tk, rleBytes) 352 } 353 } 354 355 if err = batch.Commit(); err != nil { 356 err = fmt.Errorf("Batch PUT during split of %q label %d: %v\n", d.DataName(), fromLabel, err) 357 return 358 } 359 360 // Write the split sparse vol. 361 if err = d.writeLabelVol(v, toLabel, splitmap, splitblks); err != nil { 362 return 363 } 364 365 // Publish change in label sizes. 366 delta := labels.DeltaNewSize{ 367 Label: toLabel, 368 Size: toLabelSize, 369 } 370 evt = datastore.SyncEvent{d.DataUUID(), labels.ChangeSizeEvent} 371 msg = datastore.SyncMessage{labels.ChangeSizeEvent, v, delta} 372 if err = datastore.NotifySubscribers(evt, msg); err != nil { 373 return 374 } 375 376 delta2 := labels.DeltaModSize{ 377 Label: fromLabel, 378 SizeChange: int64(-toLabelSize), 379 } 380 evt = datastore.SyncEvent{d.DataUUID(), labels.ChangeSizeEvent} 381 msg = datastore.SyncMessage{labels.ChangeSizeEvent, v, delta2} 382 if err = datastore.NotifySubscribers(evt, msg); err != nil { 383 return 384 } 385 386 // Publish split end 387 evt = datastore.SyncEvent{d.DataUUID(), labels.SplitEndEvent} 388 msg = datastore.SyncMessage{labels.SplitEndEvent, v, splitOpEnd} 389 if err = datastore.NotifySubscribers(evt, msg); err != nil { 390 return 391 } 392 393 return toLabel, nil 394 } 395 396 // SplitCoarseLabels splits a portion of a label's voxels into a given split label or, if the given split 397 // label is 0, a new label, which is returned. The input is a binary sparse volume defined by block 398 // coordinates and should be the smaller portion of a labeled region-to-be-split. 399 // 400 // EVENTS 401 // 402 // labels.SplitStartEvent occurs at very start of split and transmits labels.DeltaSplitStart struct. 403 // 404 // labels.SplitBlockEvent occurs for every block of a split label and transmits labels.DeltaSplit struct. 405 // 406 // labels.SplitEndEvent occurs at end of split and transmits labels.DeltaSplitEnd struct. 407 // 408 func (d *Data) SplitCoarseLabels(v dvid.VersionID, fromLabel, splitLabel uint64, r io.ReadCloser) (toLabel uint64, err error) { 409 store, err := datastore.GetOrderedKeyValueDB(d) 410 if err != nil { 411 err = fmt.Errorf("Data type labelvol had error initializing store: %v\n", err) 412 return 413 } 414 batcher, ok := store.(storage.KeyValueBatcher) 415 if !ok { 416 err = fmt.Errorf("Data type labelvol requires batch-enabled store, which %q is not\n", store) 417 return 418 } 419 420 // Create a new label id for this version that will persist to store 421 if splitLabel != 0 { 422 toLabel = splitLabel 423 dvid.Debugf("Splitting coarse subset of label %d into given label %d ...\n", fromLabel, splitLabel) 424 } else { 425 toLabel, err = d.NewLabel(v) 426 if err != nil { 427 return 428 } 429 dvid.Debugf("Splitting coarse subset of label %d into new label %d ...\n", fromLabel, toLabel) 430 } 431 432 evt := datastore.SyncEvent{d.DataUUID(), labels.SplitStartEvent} 433 splitOpStart := labels.DeltaSplitStart{fromLabel, toLabel} 434 splitOpEnd := labels.DeltaSplitEnd{fromLabel, toLabel} 435 436 // Make sure we can split given current merges in progress 437 if err := labels.SplitStart(d.getMergeIV(v), splitOpStart); err != nil { 438 return toLabel, err 439 } 440 defer labels.SplitStop(d.getMergeIV(v), splitOpEnd) 441 442 // Signal that we are starting a split. 443 msg := datastore.SyncMessage{labels.SplitStartEvent, v, splitOpStart} 444 if err := datastore.NotifySubscribers(evt, msg); err != nil { 445 return 0, err 446 } 447 448 // Read the sparse volume from reader. 449 var splits dvid.RLEs 450 splits, err = dvid.ReadRLEs(r) 451 if err != nil { 452 return 453 } 454 numBlocks, _ := splits.Stats() 455 456 mutID := d.NewMutationID() 457 splitOp := labels.SplitOp{ 458 MutID: mutID, 459 Target: fromLabel, 460 NewLabel: toLabel, 461 RLEs: splits, 462 Coarse: true, 463 } 464 go func() { 465 if err := labels.LogSplit(d, v, splitOp); err != nil { 466 dvid.Errorf("logging split %q: %v\n", d.DataName(), err) 467 } 468 }() 469 470 // Order the split blocks 471 splitblks := make(dvid.IZYXSlice, numBlocks) 472 n := 0 473 for _, rle := range splits { 474 p := rle.StartPt() 475 run := rle.Length() 476 for i := int32(0); i < run; i++ { 477 izyx := dvid.IndexZYX{p[0] + i, p[1], p[2]} 478 splitblks[n] = izyx.ToIZYXString() 479 n++ 480 } 481 } 482 sort.Sort(splitblks) 483 484 // Iterate through the split blocks, read the original block and change labels. 485 // TODO: Modifications should be transactional since it's GET-PUT, therefore use 486 // hash on block coord to direct it to block-specific goroutine; we serialize 487 // requests to handle concurrency. 488 ctx := datastore.NewVersionedCtx(d, v) 489 batch := batcher.NewBatch(ctx) 490 491 var toLabelSize uint64 492 for _, splitblk := range splitblks { 493 // Get original block 494 tk := NewTKey(fromLabel, splitblk) 495 val, err := store.Get(ctx, tk) 496 if err != nil { 497 return toLabel, err 498 } 499 if val == nil { 500 return toLabel, fmt.Errorf("Split block %s is not part of original label %d", splitblk, fromLabel) 501 } 502 var rles dvid.RLEs 503 if err := rles.UnmarshalBinary(val); err != nil { 504 return toLabel, fmt.Errorf("Unable to unmarshal RLE for original labels in block %s", splitblk) 505 } 506 numVoxels, _ := rles.Stats() 507 toLabelSize += numVoxels 508 509 // Delete the old block and save the sparse volume but under a new label. 510 batch.Delete(tk) 511 tk2 := NewTKey(toLabel, splitblk) 512 batch.Put(tk2, val) 513 } 514 515 if err := batch.Commit(); err != nil { 516 dvid.Errorf("Batch PUT during split of %q label %d: %v\n", d.DataName(), fromLabel, err) 517 } 518 519 // Publish split event 520 deltaSplit := labels.DeltaSplit{ 521 MutID: mutID, 522 OldLabel: fromLabel, 523 NewLabel: toLabel, 524 SortedBlocks: splitblks, 525 SplitVoxels: toLabelSize, 526 } 527 evt = datastore.SyncEvent{d.DataUUID(), labels.SplitLabelEvent} 528 msg = datastore.SyncMessage{labels.SplitLabelEvent, v, deltaSplit} 529 if err := datastore.NotifySubscribers(evt, msg); err != nil { 530 return 0, err 531 } 532 533 // Publish change in label sizes. 534 delta := labels.DeltaNewSize{ 535 Label: toLabel, 536 Size: toLabelSize, 537 } 538 evt = datastore.SyncEvent{d.DataUUID(), labels.ChangeSizeEvent} 539 msg = datastore.SyncMessage{labels.ChangeSizeEvent, v, delta} 540 if err := datastore.NotifySubscribers(evt, msg); err != nil { 541 return 0, err 542 } 543 544 delta2 := labels.DeltaModSize{ 545 Label: fromLabel, 546 SizeChange: int64(-toLabelSize), 547 } 548 evt = datastore.SyncEvent{d.DataUUID(), labels.ChangeSizeEvent} 549 msg = datastore.SyncMessage{labels.ChangeSizeEvent, v, delta2} 550 if err := datastore.NotifySubscribers(evt, msg); err != nil { 551 return 0, err 552 } 553 554 // Publish split end 555 evt = datastore.SyncEvent{d.DataUUID(), labels.SplitEndEvent} 556 msg = datastore.SyncMessage{labels.SplitEndEvent, v, splitOpEnd} 557 if err := datastore.NotifySubscribers(evt, msg); err != nil { 558 return 0, err 559 } 560 dvid.Infof("Split %d voxels from label %d to label %d\n", toLabelSize, fromLabel, toLabel) 561 562 return toLabel, nil 563 } 564 565 // write label volume in sorted order if available. 566 func (d *Data) writeLabelVol(v dvid.VersionID, label uint64, brles dvid.BlockRLEs, sortblks []dvid.IZYXString) error { 567 store, err := datastore.GetOrderedKeyValueDB(d) 568 if err != nil { 569 return fmt.Errorf("Data type labelvol had error initializing store: %v\n", err) 570 } 571 batcher, ok := store.(storage.KeyValueBatcher) 572 if !ok { 573 return fmt.Errorf("Data type labelvol requires batch-enabled store, which %q is not\n", store) 574 } 575 576 ctx := datastore.NewVersionedCtx(d, v) 577 batch := batcher.NewBatch(ctx) 578 if sortblks != nil { 579 for _, izyxStr := range sortblks { 580 serialization, err := brles[izyxStr].MarshalBinary() 581 if err != nil { 582 return fmt.Errorf("Error serializing RLEs for label %d: %v\n", label, err) 583 } 584 batch.Put(NewTKey(label, izyxStr), serialization) 585 } 586 } else { 587 for izyxStr, rles := range brles { 588 serialization, err := rles.MarshalBinary() 589 if err != nil { 590 return fmt.Errorf("Error serializing RLEs for label %d: %v\n", label, err) 591 } 592 batch.Put(NewTKey(label, izyxStr), serialization) 593 } 594 } 595 if err := batch.Commit(); err != nil { 596 return fmt.Errorf("Error on updating RLEs for label %d: %v\n", label, err) 597 } 598 return nil 599 }