github.com/janelia-flyem/dvid@v1.0.0/datatype/annotation/denormalizations.go (about) 1 package annotation 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "reflect" 7 "sort" 8 "sync" 9 "sync/atomic" 10 11 "github.com/janelia-flyem/dvid/datastore" 12 "github.com/janelia-flyem/dvid/dvid" 13 "github.com/janelia-flyem/dvid/storage" 14 ) 15 16 // RecreateDenormalizations will recreate label and tag denormalizations from 17 // the block-based elements. 18 func (d *Data) RecreateDenormalizations(ctx *datastore.VersionedCtx, inMemory, check bool) { 19 if inMemory { 20 go d.resyncInMemory(ctx, check) 21 } else { 22 go d.resyncLowMemory(ctx) 23 } 24 } 25 26 func (d *Data) storeTags(batcher storage.KeyValueBatcher, ctx *datastore.VersionedCtx, tagE map[Tag]Elements) error { 27 batch := batcher.NewBatch(ctx) 28 if err := d.storeTagElements(ctx, batch, tagE); err != nil { 29 return err 30 } 31 if err := batch.Commit(); err != nil { 32 return fmt.Errorf("bad batch commit in reload for data %q: %v", d.DataName(), err) 33 } 34 return nil 35 } 36 37 func (d *Data) storeLabels(batcher storage.KeyValueBatcher, ctx *datastore.VersionedCtx, blockE Elements) error { 38 batch := batcher.NewBatch(ctx) 39 if err := d.storeLabelElements(ctx, batch, blockE); err != nil { 40 return err 41 } 42 if err := batch.Commit(); err != nil { 43 return fmt.Errorf("bad batch commit in reload for data %q: %v", d.DataName(), err) 44 } 45 return nil 46 } 47 48 func (d *Data) deleteDenormalizations(ctx *datastore.VersionedCtx) error { 49 store, err := datastore.GetOrderedKeyValueDB(d) 50 if err != nil { 51 return fmt.Errorf("annotation %q had error initializing store: %v", d.DataName(), err) 52 } 53 54 timedLog := dvid.NewTimeLog() 55 dvid.Infof("Deleting label kv denormalizations for annotation %q...\n", d.DataName()) 56 minLabelTKey := storage.MinTKey(keyLabel) 57 maxLabelTKey := storage.MaxTKey(keyLabel) 58 if err := store.DeleteRange(ctx, minLabelTKey, maxLabelTKey); err != nil { 59 return fmt.Errorf("unable to delete label denormalization for annotations %q: %v", d.DataName(), err) 60 } 61 timedLog.Infof("Finished deletion of label kv denormalizations for annotation %q", d.DataName()) 62 63 timedLog = dvid.NewTimeLog() 64 dvid.Infof("Deleting tag kv denormalizations for annotation %q...\n", d.DataName()) 65 minTagTKey := storage.MinTKey(keyTag) 66 maxTagTKey := storage.MaxTKey(keyTag) 67 if err := store.DeleteRange(ctx, minTagTKey, maxTagTKey); err != nil { 68 return fmt.Errorf("unable to delete tag denormalization for annotations %q: %v", d.DataName(), err) 69 } 70 timedLog.Infof("Finished deletion of tag kv denormalizations for annotation %q", d.DataName()) 71 return nil 72 } 73 74 type denormElems struct { 75 tk storage.TKey 76 elems ElementsNR 77 } 78 79 // Do in-memory resync of all keyBlock kv pairs, forcing the label and tag denormalizations. 80 // If check is true, checks denormalizations, logging any issues, and only replaces denormalizations 81 // when they are incorrect. 82 func (d *Data) resyncInMemory(ctx *datastore.VersionedCtx, check bool) { 83 d.Lock() 84 d.denormOngoing = true 85 d.Unlock() 86 defer func() { 87 d.Lock() 88 d.denormOngoing = false 89 d.Unlock() 90 }() 91 92 store, err := datastore.GetOrderedKeyValueDB(d) 93 if err != nil { 94 dvid.Errorf("Annotation %q had error initializing store: %v\n", d.DataName(), err) 95 return 96 } 97 if !check { 98 if err := d.deleteDenormalizations(ctx); err != nil { 99 dvid.Errorf("Can't delete denormalizations: %v\n", err) 100 return 101 } 102 } 103 104 var totBlocks, totElemErrs, totLabelE, totTagE int 105 106 labelE := LabelElements{} 107 tagE := make(map[Tag]ElementsNR) 108 109 minTKey := storage.MinTKey(keyBlock) 110 maxTKey := storage.MaxTKey(keyBlock) 111 112 timedLog := dvid.NewTimeLog() 113 err = store.ProcessRange(ctx, minTKey, maxTKey, &storage.ChunkOp{}, func(c *storage.Chunk) error { 114 if c == nil { 115 return fmt.Errorf("received nil chunk in reload for data %q", d.DataName()) 116 } 117 if c.V == nil { 118 return nil 119 } 120 chunkPt, err := DecodeBlockTKey(c.K) 121 if err != nil { 122 return fmt.Errorf("couldn't decode chunk key %v for data %q", c.K, d.DataName()) 123 } 124 totBlocks++ 125 var elems Elements 126 if err := json.Unmarshal(c.V, &elems); err != nil { 127 return fmt.Errorf("couldn't unmarshal elements for data %q", d.DataName()) 128 } 129 if len(elems) == 0 { 130 return nil 131 } 132 133 blockSize := d.blockSize() 134 for _, elem := range elems { 135 // Check element is in correct block 136 elemChunkPt := elem.Pos.Chunk(blockSize).(dvid.ChunkPoint3d) 137 if !chunkPt.Equals(elemChunkPt) { 138 var keyBlockSize [3]int32 139 for i := uint8(0); i < 3; i++ { 140 keyIndex := chunkPt.Value(i) 141 if keyIndex != 0 { 142 keyBlockSize[i] = elem.Pos.Value(i) / keyIndex 143 } 144 145 } 146 dvid.Errorf("Element at %s found in incorrect block %s (using block size %s) instead of block key of %s (requires block size %d x %d x %d): %v\n", elem.Pos, elemChunkPt, blockSize, chunkPt, keyBlockSize[0], keyBlockSize[1], keyBlockSize[2], elem) 147 totElemErrs++ 148 } 149 // Add to Tag elements 150 if len(elem.Tags) > 0 { 151 for _, tag := range elem.Tags { 152 te := tagE[tag] 153 te = append(te, elem.ElementNR) 154 totTagE++ 155 tagE[tag] = te 156 } 157 } 158 } 159 elemsAdded, err := d.addLabelElements(ctx.VersionID(), labelE, chunkPt, elems) 160 if err != nil { 161 return err 162 } 163 totLabelE += elemsAdded 164 165 if totBlocks%1000 == 0 { 166 timedLog.Infof("Loaded %d blocks of annotations (%d elements in %d labels / %d elements in %d tags), errors %d", totBlocks, totLabelE, len(labelE), totTagE, len(tagE), totElemErrs) 167 } 168 return nil 169 }) 170 if err != nil { 171 dvid.Errorf("Error in reload of data %q: %v\n", d.DataName(), err) 172 } 173 timedLog.Infof("Completed loading %d blocks of annotations (%d elements in %d labels / %d elements in %d tags), errors %d", totBlocks, totLabelE, len(labelE), totTagE, len(tagE), totElemErrs) 174 175 // Get a sorted list of the labels so we can sequentially write them. 176 labels := make([]uint64, len(labelE)) 177 i := 0 178 for label := range labelE { 179 labels[i] = label 180 i++ 181 } 182 sort.Slice(labels, func(i, j int) bool { return labels[i] < labels[j] }) 183 184 if check { 185 d.write_denorms_with_check(ctx, store, labelE, tagE, labels) 186 } else { 187 d.write_denorms(ctx, store, labelE, tagE, labels) 188 } 189 190 } 191 192 func (d *Data) write_denorms_with_check(ctx *datastore.VersionedCtx, store storage.OrderedKeyValueDB, 193 labelE LabelElements, tagE map[Tag]ElementsNR, labels []uint64) { 194 195 timedLog := dvid.NewTimeLog() 196 197 // Write denormalizations 198 var wg sync.WaitGroup 199 var numErrs, numProcessed, numChanged int64 200 numTags := int64(len(tagE)) 201 ch := make(chan denormElems, 1000) 202 for i := 0; i < 100; i++ { 203 wg.Add(1) 204 go func() { 205 for de := range ch { 206 changed := true 207 correctNormalized := de.elems.Normalize() 208 old, err := getElementsNR(ctx, de.tk) 209 if err != nil { 210 atomic.AddInt64(&numErrs, 1) 211 continue 212 } 213 oldNormalized := old.Normalize() 214 if reflect.DeepEqual(correctNormalized, oldNormalized) { 215 changed = false 216 } 217 if changed { 218 atomic.AddInt64(&numChanged, 1) 219 val, err := json.Marshal(de.elems) 220 if err != nil { 221 atomic.AddInt64(&numErrs, 1) 222 continue 223 } 224 if err := store.Put(ctx, de.tk, val); err != nil { 225 atomic.AddInt64(&numErrs, 1) 226 } 227 } 228 atomic.AddInt64(&numProcessed, 1) 229 if numProcessed%100000 == 0 { 230 pct := float64(numProcessed) / float64(len(labels)) * 100.0 231 timedLog.Infof("Processed %6.3f%% of %d labels", pct, len(labels)) 232 } 233 } 234 wg.Done() 235 }() 236 } 237 238 dvid.Infof("Writing elements using checks for %d labels, %d tags ...\n", len(labels), numTags) 239 for _, label := range labels { 240 ch <- denormElems{tk: NewLabelTKey(label), elems: labelE[label]} 241 } 242 for tag, elems := range tagE { 243 tk, err := NewTagTKey(tag) 244 if err != nil { 245 dvid.Errorf("problem with tag key tkey for tag %q: %v\n", tag, err) 246 atomic.AddInt64(&numErrs, 1) 247 continue 248 } 249 ch <- denormElems{tk: tk, elems: elems} 250 } 251 close(ch) 252 wg.Wait() 253 timedLog.Infof("Finished checked denormalization of %d kvs, %d changed (%d errors)", numProcessed, numChanged, numErrs) 254 } 255 256 type denormJSON struct { 257 tk storage.TKey 258 elemsJSON []byte 259 } 260 261 func (d *Data) write_denorms(ctx *datastore.VersionedCtx, store storage.OrderedKeyValueDB, 262 labelE LabelElements, tagE map[Tag]ElementsNR, labels []uint64) { 263 264 timedLog := dvid.NewTimeLog() 265 266 // Write denormalizations 267 var wg sync.WaitGroup 268 var numErrs, numProcessed int64 269 numTags := int64(len(tagE)) 270 ch := make(chan denormJSON, 1000) 271 for i := 0; i < 100; i++ { 272 wg.Add(1) 273 go func() { 274 for de := range ch { 275 if err := store.Put(ctx, de.tk, de.elemsJSON); err != nil { 276 atomic.AddInt64(&numErrs, 1) 277 } 278 atomic.AddInt64(&numProcessed, 1) 279 if numProcessed%100000 == 0 { 280 pct := float64(numProcessed) / float64(len(labels)) * 100.0 281 timedLog.Infof("Processed %6.3f%% of %d labels", pct, len(labels)) 282 } 283 } 284 wg.Done() 285 }() 286 } 287 288 dvid.Infof("Writing elements for %d labels, %d tags ...\n", len(labels), numTags) 289 for _, label := range labels { 290 val, err := json.Marshal(labelE[label]) 291 delete(labelE, label) // once copied to JSON, we don't need the original 292 if err != nil { 293 atomic.AddInt64(&numErrs, 1) 294 continue 295 } 296 ch <- denormJSON{tk: NewLabelTKey(label), elemsJSON: val} 297 } 298 for tag, elems := range tagE { 299 tk, err := NewTagTKey(tag) 300 if err != nil { 301 dvid.Errorf("problem with tag key tkey for tag %q: %v\n", tag, err) 302 atomic.AddInt64(&numErrs, 1) 303 continue 304 } 305 val, err := json.Marshal(elems) 306 if err != nil { 307 atomic.AddInt64(&numErrs, 1) 308 continue 309 } 310 ch <- denormJSON{tk: tk, elemsJSON: val} 311 } 312 close(ch) 313 wg.Wait() 314 timedLog.Infof("Finished denormalization of %d kvs (%d errors)", numProcessed, numErrs) 315 } 316 317 // Get all keyBlock kv pairs, forcing the label and tag denormalizations. 318 func (d *Data) resyncLowMemory(ctx *datastore.VersionedCtx) { 319 d.Lock() 320 d.denormOngoing = true 321 d.Unlock() 322 defer func() { 323 d.Lock() 324 d.denormOngoing = false 325 d.Unlock() 326 }() 327 328 timedLog := dvid.NewTimeLog() 329 330 store, err := datastore.GetOrderedKeyValueDB(d) 331 if err != nil { 332 dvid.Errorf("Annotation %q had error initializing store: %v\n", d.DataName(), err) 333 return 334 } 335 batcher, ok := store.(storage.KeyValueBatcher) 336 if !ok { 337 dvid.Errorf("Data type annotation requires batch-enabled store, which %q is not\n", store) 338 return 339 } 340 341 if err := d.deleteDenormalizations(ctx); err != nil { 342 dvid.Errorf("Can't delete denormalizations: %v\n", err) 343 return 344 } 345 346 var numBlocks, numBlockE, numTagE int 347 var totMoved, totBlockE, totTagE int 348 349 var blockE Elements 350 tagE := make(map[Tag]Elements) 351 352 minTKey := storage.MinTKey(keyBlock) 353 maxTKey := storage.MaxTKey(keyBlock) 354 355 err = store.ProcessRange(ctx, minTKey, maxTKey, &storage.ChunkOp{}, func(c *storage.Chunk) error { 356 if c == nil { 357 return fmt.Errorf("received nil chunk in reload for data %q", d.DataName()) 358 } 359 if c.V == nil { 360 return nil 361 } 362 chunkPt, err := DecodeBlockTKey(c.K) 363 if err != nil { 364 return fmt.Errorf("couldn't decode chunk key %v for data %q", c.K, d.DataName()) 365 } 366 367 var elems Elements 368 if err := json.Unmarshal(c.V, &elems); err != nil { 369 return fmt.Errorf("couldn't unmarshal elements for data %q", d.DataName()) 370 } 371 if len(elems) == 0 { 372 return nil 373 } 374 numBlocks++ 375 376 // Iterate through elements, organizing them into blocks and tags. 377 // Note: we do not check for redundancy and guarantee uniqueness at this stage. 378 blockFixBatch := batcher.NewBatch(ctx) 379 deleteElems := make(map[int]struct{}) 380 for i, elem := range elems { 381 // Check element is in correct block 382 elemChunkPt := elem.Pos.Chunk(d.blockSize()).(dvid.ChunkPoint3d) 383 if !chunkPt.Equals(elemChunkPt) { 384 dvid.Criticalf("Bad element at %s found in block %s: %v\n", elem.Pos, elemChunkPt, elem) 385 deleteElems[i] = struct{}{} 386 } 387 // Append to tags if present 388 if len(elem.Tags) > 0 { 389 for _, tag := range elem.Tags { 390 te := tagE[tag] 391 te = append(te, elem) 392 numTagE++ 393 tagE[tag] = te 394 } 395 } 396 } 397 if len(deleteElems) > 0 { 398 fixed := elems[:0] 399 for i, elem := range elems { 400 if _, found := deleteElems[i]; !found { 401 fixed = append(fixed, elem) 402 } 403 } 404 if err := putBatchElements(blockFixBatch, c.K, fixed); err != nil { 405 return err 406 } 407 if err := blockFixBatch.Commit(); err != nil { 408 return fmt.Errorf("bad batch commit in fixing block keyvalues for data %q: %v", d.DataName(), err) 409 } 410 elems = fixed 411 totMoved += len(deleteElems) 412 } 413 blockE = append(blockE, elems...) 414 numBlockE += len(elems) 415 416 if numTagE > 1000 { 417 if err := d.storeTags(batcher, ctx, tagE); err != nil { 418 return err 419 } 420 totTagE += numTagE 421 numTagE = 0 422 tagE = make(map[Tag]Elements) 423 } 424 if numBlockE > 1000 { 425 if err := d.storeLabels(batcher, ctx, blockE); err != nil { 426 return err 427 } 428 totBlockE += numBlockE 429 numBlockE = 0 430 blockE = Elements{} 431 timedLog.Infof("Loaded %d blocks of annotations (%d elements), moved %d", numBlocks, totBlockE, totMoved) 432 } 433 434 return nil 435 }) 436 if err != nil { 437 dvid.Errorf("Error in reload of data %q: %v\n", d.DataName(), err) 438 } 439 if numTagE > 0 { 440 totTagE += numTagE 441 if err := d.storeTags(batcher, ctx, tagE); err != nil { 442 dvid.Errorf("Error writing final set of tags of data %q: %v", d.DataName(), err) 443 } 444 } 445 if numBlockE > 0 { 446 totBlockE += numBlockE 447 if err := d.storeLabels(batcher, ctx, blockE); err != nil { 448 dvid.Errorf("Error writing final set of label elements of data %q: %v", d.DataName(), err) 449 } 450 } 451 452 timedLog.Infof("Completed asynchronous annotation %q reload of %d block and %d tag elements.", d.DataName(), totBlockE, totTagE) 453 }