github.com/janelia-flyem/dvid@v1.0.0/datatype/annotation/denormalizations.go (about)

     1  package annotation
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"reflect"
     7  	"sort"
     8  	"sync"
     9  	"sync/atomic"
    10  
    11  	"github.com/janelia-flyem/dvid/datastore"
    12  	"github.com/janelia-flyem/dvid/dvid"
    13  	"github.com/janelia-flyem/dvid/storage"
    14  )
    15  
    16  // RecreateDenormalizations will recreate label and tag denormalizations from
    17  // the block-based elements.
    18  func (d *Data) RecreateDenormalizations(ctx *datastore.VersionedCtx, inMemory, check bool) {
    19  	if inMemory {
    20  		go d.resyncInMemory(ctx, check)
    21  	} else {
    22  		go d.resyncLowMemory(ctx)
    23  	}
    24  }
    25  
    26  func (d *Data) storeTags(batcher storage.KeyValueBatcher, ctx *datastore.VersionedCtx, tagE map[Tag]Elements) error {
    27  	batch := batcher.NewBatch(ctx)
    28  	if err := d.storeTagElements(ctx, batch, tagE); err != nil {
    29  		return err
    30  	}
    31  	if err := batch.Commit(); err != nil {
    32  		return fmt.Errorf("bad batch commit in reload for data %q: %v", d.DataName(), err)
    33  	}
    34  	return nil
    35  }
    36  
    37  func (d *Data) storeLabels(batcher storage.KeyValueBatcher, ctx *datastore.VersionedCtx, blockE Elements) error {
    38  	batch := batcher.NewBatch(ctx)
    39  	if err := d.storeLabelElements(ctx, batch, blockE); err != nil {
    40  		return err
    41  	}
    42  	if err := batch.Commit(); err != nil {
    43  		return fmt.Errorf("bad batch commit in reload for data %q: %v", d.DataName(), err)
    44  	}
    45  	return nil
    46  }
    47  
    48  func (d *Data) deleteDenormalizations(ctx *datastore.VersionedCtx) error {
    49  	store, err := datastore.GetOrderedKeyValueDB(d)
    50  	if err != nil {
    51  		return fmt.Errorf("annotation %q had error initializing store: %v", d.DataName(), err)
    52  	}
    53  
    54  	timedLog := dvid.NewTimeLog()
    55  	dvid.Infof("Deleting label kv denormalizations for annotation %q...\n", d.DataName())
    56  	minLabelTKey := storage.MinTKey(keyLabel)
    57  	maxLabelTKey := storage.MaxTKey(keyLabel)
    58  	if err := store.DeleteRange(ctx, minLabelTKey, maxLabelTKey); err != nil {
    59  		return fmt.Errorf("unable to delete label denormalization for annotations %q: %v", d.DataName(), err)
    60  	}
    61  	timedLog.Infof("Finished deletion of label kv denormalizations for annotation %q", d.DataName())
    62  
    63  	timedLog = dvid.NewTimeLog()
    64  	dvid.Infof("Deleting tag kv denormalizations for annotation %q...\n", d.DataName())
    65  	minTagTKey := storage.MinTKey(keyTag)
    66  	maxTagTKey := storage.MaxTKey(keyTag)
    67  	if err := store.DeleteRange(ctx, minTagTKey, maxTagTKey); err != nil {
    68  		return fmt.Errorf("unable to delete tag denormalization for annotations %q: %v", d.DataName(), err)
    69  	}
    70  	timedLog.Infof("Finished deletion of tag kv denormalizations for annotation %q", d.DataName())
    71  	return nil
    72  }
    73  
    74  type denormElems struct {
    75  	tk    storage.TKey
    76  	elems ElementsNR
    77  }
    78  
    79  // Do in-memory resync of all keyBlock kv pairs, forcing the label and tag denormalizations.
    80  // If check is true, checks denormalizations, logging any issues, and only replaces denormalizations
    81  // when they are incorrect.
    82  func (d *Data) resyncInMemory(ctx *datastore.VersionedCtx, check bool) {
    83  	d.Lock()
    84  	d.denormOngoing = true
    85  	d.Unlock()
    86  	defer func() {
    87  		d.Lock()
    88  		d.denormOngoing = false
    89  		d.Unlock()
    90  	}()
    91  
    92  	store, err := datastore.GetOrderedKeyValueDB(d)
    93  	if err != nil {
    94  		dvid.Errorf("Annotation %q had error initializing store: %v\n", d.DataName(), err)
    95  		return
    96  	}
    97  	if !check {
    98  		if err := d.deleteDenormalizations(ctx); err != nil {
    99  			dvid.Errorf("Can't delete denormalizations: %v\n", err)
   100  			return
   101  		}
   102  	}
   103  
   104  	var totBlocks, totElemErrs, totLabelE, totTagE int
   105  
   106  	labelE := LabelElements{}
   107  	tagE := make(map[Tag]ElementsNR)
   108  
   109  	minTKey := storage.MinTKey(keyBlock)
   110  	maxTKey := storage.MaxTKey(keyBlock)
   111  
   112  	timedLog := dvid.NewTimeLog()
   113  	err = store.ProcessRange(ctx, minTKey, maxTKey, &storage.ChunkOp{}, func(c *storage.Chunk) error {
   114  		if c == nil {
   115  			return fmt.Errorf("received nil chunk in reload for data %q", d.DataName())
   116  		}
   117  		if c.V == nil {
   118  			return nil
   119  		}
   120  		chunkPt, err := DecodeBlockTKey(c.K)
   121  		if err != nil {
   122  			return fmt.Errorf("couldn't decode chunk key %v for data %q", c.K, d.DataName())
   123  		}
   124  		totBlocks++
   125  		var elems Elements
   126  		if err := json.Unmarshal(c.V, &elems); err != nil {
   127  			return fmt.Errorf("couldn't unmarshal elements for data %q", d.DataName())
   128  		}
   129  		if len(elems) == 0 {
   130  			return nil
   131  		}
   132  
   133  		blockSize := d.blockSize()
   134  		for _, elem := range elems {
   135  			// Check element is in correct block
   136  			elemChunkPt := elem.Pos.Chunk(blockSize).(dvid.ChunkPoint3d)
   137  			if !chunkPt.Equals(elemChunkPt) {
   138  				var keyBlockSize [3]int32
   139  				for i := uint8(0); i < 3; i++ {
   140  					keyIndex := chunkPt.Value(i)
   141  					if keyIndex != 0 {
   142  						keyBlockSize[i] = elem.Pos.Value(i) / keyIndex
   143  					}
   144  
   145  				}
   146  				dvid.Errorf("Element at %s found in incorrect block %s (using block size %s) instead of block key of %s (requires block size %d x %d x %d): %v\n", elem.Pos, elemChunkPt, blockSize, chunkPt, keyBlockSize[0], keyBlockSize[1], keyBlockSize[2], elem)
   147  				totElemErrs++
   148  			}
   149  			// Add to Tag elements
   150  			if len(elem.Tags) > 0 {
   151  				for _, tag := range elem.Tags {
   152  					te := tagE[tag]
   153  					te = append(te, elem.ElementNR)
   154  					totTagE++
   155  					tagE[tag] = te
   156  				}
   157  			}
   158  		}
   159  		elemsAdded, err := d.addLabelElements(ctx.VersionID(), labelE, chunkPt, elems)
   160  		if err != nil {
   161  			return err
   162  		}
   163  		totLabelE += elemsAdded
   164  
   165  		if totBlocks%1000 == 0 {
   166  			timedLog.Infof("Loaded %d blocks of annotations (%d elements in %d labels / %d elements in %d tags), errors %d", totBlocks, totLabelE, len(labelE), totTagE, len(tagE), totElemErrs)
   167  		}
   168  		return nil
   169  	})
   170  	if err != nil {
   171  		dvid.Errorf("Error in reload of data %q: %v\n", d.DataName(), err)
   172  	}
   173  	timedLog.Infof("Completed loading %d blocks of annotations (%d elements in %d labels / %d elements in %d tags), errors %d", totBlocks, totLabelE, len(labelE), totTagE, len(tagE), totElemErrs)
   174  
   175  	// Get a sorted list of the labels so we can sequentially write them.
   176  	labels := make([]uint64, len(labelE))
   177  	i := 0
   178  	for label := range labelE {
   179  		labels[i] = label
   180  		i++
   181  	}
   182  	sort.Slice(labels, func(i, j int) bool { return labels[i] < labels[j] })
   183  
   184  	if check {
   185  		d.write_denorms_with_check(ctx, store, labelE, tagE, labels)
   186  	} else {
   187  		d.write_denorms(ctx, store, labelE, tagE, labels)
   188  	}
   189  
   190  }
   191  
   192  func (d *Data) write_denorms_with_check(ctx *datastore.VersionedCtx, store storage.OrderedKeyValueDB,
   193  	labelE LabelElements, tagE map[Tag]ElementsNR, labels []uint64) {
   194  
   195  	timedLog := dvid.NewTimeLog()
   196  
   197  	// Write denormalizations
   198  	var wg sync.WaitGroup
   199  	var numErrs, numProcessed, numChanged int64
   200  	numTags := int64(len(tagE))
   201  	ch := make(chan denormElems, 1000)
   202  	for i := 0; i < 100; i++ {
   203  		wg.Add(1)
   204  		go func() {
   205  			for de := range ch {
   206  				changed := true
   207  				correctNormalized := de.elems.Normalize()
   208  				old, err := getElementsNR(ctx, de.tk)
   209  				if err != nil {
   210  					atomic.AddInt64(&numErrs, 1)
   211  					continue
   212  				}
   213  				oldNormalized := old.Normalize()
   214  				if reflect.DeepEqual(correctNormalized, oldNormalized) {
   215  					changed = false
   216  				}
   217  				if changed {
   218  					atomic.AddInt64(&numChanged, 1)
   219  					val, err := json.Marshal(de.elems)
   220  					if err != nil {
   221  						atomic.AddInt64(&numErrs, 1)
   222  						continue
   223  					}
   224  					if err := store.Put(ctx, de.tk, val); err != nil {
   225  						atomic.AddInt64(&numErrs, 1)
   226  					}
   227  				}
   228  				atomic.AddInt64(&numProcessed, 1)
   229  				if numProcessed%100000 == 0 {
   230  					pct := float64(numProcessed) / float64(len(labels)) * 100.0
   231  					timedLog.Infof("Processed %6.3f%% of %d labels", pct, len(labels))
   232  				}
   233  			}
   234  			wg.Done()
   235  		}()
   236  	}
   237  
   238  	dvid.Infof("Writing elements using checks for %d labels, %d tags ...\n", len(labels), numTags)
   239  	for _, label := range labels {
   240  		ch <- denormElems{tk: NewLabelTKey(label), elems: labelE[label]}
   241  	}
   242  	for tag, elems := range tagE {
   243  		tk, err := NewTagTKey(tag)
   244  		if err != nil {
   245  			dvid.Errorf("problem with tag key tkey for tag %q: %v\n", tag, err)
   246  			atomic.AddInt64(&numErrs, 1)
   247  			continue
   248  		}
   249  		ch <- denormElems{tk: tk, elems: elems}
   250  	}
   251  	close(ch)
   252  	wg.Wait()
   253  	timedLog.Infof("Finished checked denormalization of %d kvs, %d changed (%d errors)", numProcessed, numChanged, numErrs)
   254  }
   255  
   256  type denormJSON struct {
   257  	tk        storage.TKey
   258  	elemsJSON []byte
   259  }
   260  
   261  func (d *Data) write_denorms(ctx *datastore.VersionedCtx, store storage.OrderedKeyValueDB,
   262  	labelE LabelElements, tagE map[Tag]ElementsNR, labels []uint64) {
   263  
   264  	timedLog := dvid.NewTimeLog()
   265  
   266  	// Write denormalizations
   267  	var wg sync.WaitGroup
   268  	var numErrs, numProcessed int64
   269  	numTags := int64(len(tagE))
   270  	ch := make(chan denormJSON, 1000)
   271  	for i := 0; i < 100; i++ {
   272  		wg.Add(1)
   273  		go func() {
   274  			for de := range ch {
   275  				if err := store.Put(ctx, de.tk, de.elemsJSON); err != nil {
   276  					atomic.AddInt64(&numErrs, 1)
   277  				}
   278  				atomic.AddInt64(&numProcessed, 1)
   279  				if numProcessed%100000 == 0 {
   280  					pct := float64(numProcessed) / float64(len(labels)) * 100.0
   281  					timedLog.Infof("Processed %6.3f%% of %d labels", pct, len(labels))
   282  				}
   283  			}
   284  			wg.Done()
   285  		}()
   286  	}
   287  
   288  	dvid.Infof("Writing elements for %d labels, %d tags ...\n", len(labels), numTags)
   289  	for _, label := range labels {
   290  		val, err := json.Marshal(labelE[label])
   291  		delete(labelE, label) // once copied to JSON, we don't need the original
   292  		if err != nil {
   293  			atomic.AddInt64(&numErrs, 1)
   294  			continue
   295  		}
   296  		ch <- denormJSON{tk: NewLabelTKey(label), elemsJSON: val}
   297  	}
   298  	for tag, elems := range tagE {
   299  		tk, err := NewTagTKey(tag)
   300  		if err != nil {
   301  			dvid.Errorf("problem with tag key tkey for tag %q: %v\n", tag, err)
   302  			atomic.AddInt64(&numErrs, 1)
   303  			continue
   304  		}
   305  		val, err := json.Marshal(elems)
   306  		if err != nil {
   307  			atomic.AddInt64(&numErrs, 1)
   308  			continue
   309  		}
   310  		ch <- denormJSON{tk: tk, elemsJSON: val}
   311  	}
   312  	close(ch)
   313  	wg.Wait()
   314  	timedLog.Infof("Finished denormalization of %d kvs (%d errors)", numProcessed, numErrs)
   315  }
   316  
   317  // Get all keyBlock kv pairs, forcing the label and tag denormalizations.
   318  func (d *Data) resyncLowMemory(ctx *datastore.VersionedCtx) {
   319  	d.Lock()
   320  	d.denormOngoing = true
   321  	d.Unlock()
   322  	defer func() {
   323  		d.Lock()
   324  		d.denormOngoing = false
   325  		d.Unlock()
   326  	}()
   327  
   328  	timedLog := dvid.NewTimeLog()
   329  
   330  	store, err := datastore.GetOrderedKeyValueDB(d)
   331  	if err != nil {
   332  		dvid.Errorf("Annotation %q had error initializing store: %v\n", d.DataName(), err)
   333  		return
   334  	}
   335  	batcher, ok := store.(storage.KeyValueBatcher)
   336  	if !ok {
   337  		dvid.Errorf("Data type annotation requires batch-enabled store, which %q is not\n", store)
   338  		return
   339  	}
   340  
   341  	if err := d.deleteDenormalizations(ctx); err != nil {
   342  		dvid.Errorf("Can't delete denormalizations: %v\n", err)
   343  		return
   344  	}
   345  
   346  	var numBlocks, numBlockE, numTagE int
   347  	var totMoved, totBlockE, totTagE int
   348  
   349  	var blockE Elements
   350  	tagE := make(map[Tag]Elements)
   351  
   352  	minTKey := storage.MinTKey(keyBlock)
   353  	maxTKey := storage.MaxTKey(keyBlock)
   354  
   355  	err = store.ProcessRange(ctx, minTKey, maxTKey, &storage.ChunkOp{}, func(c *storage.Chunk) error {
   356  		if c == nil {
   357  			return fmt.Errorf("received nil chunk in reload for data %q", d.DataName())
   358  		}
   359  		if c.V == nil {
   360  			return nil
   361  		}
   362  		chunkPt, err := DecodeBlockTKey(c.K)
   363  		if err != nil {
   364  			return fmt.Errorf("couldn't decode chunk key %v for data %q", c.K, d.DataName())
   365  		}
   366  
   367  		var elems Elements
   368  		if err := json.Unmarshal(c.V, &elems); err != nil {
   369  			return fmt.Errorf("couldn't unmarshal elements for data %q", d.DataName())
   370  		}
   371  		if len(elems) == 0 {
   372  			return nil
   373  		}
   374  		numBlocks++
   375  
   376  		// Iterate through elements, organizing them into blocks and tags.
   377  		// Note: we do not check for redundancy and guarantee uniqueness at this stage.
   378  		blockFixBatch := batcher.NewBatch(ctx)
   379  		deleteElems := make(map[int]struct{})
   380  		for i, elem := range elems {
   381  			// Check element is in correct block
   382  			elemChunkPt := elem.Pos.Chunk(d.blockSize()).(dvid.ChunkPoint3d)
   383  			if !chunkPt.Equals(elemChunkPt) {
   384  				dvid.Criticalf("Bad element at %s found in block %s: %v\n", elem.Pos, elemChunkPt, elem)
   385  				deleteElems[i] = struct{}{}
   386  			}
   387  			// Append to tags if present
   388  			if len(elem.Tags) > 0 {
   389  				for _, tag := range elem.Tags {
   390  					te := tagE[tag]
   391  					te = append(te, elem)
   392  					numTagE++
   393  					tagE[tag] = te
   394  				}
   395  			}
   396  		}
   397  		if len(deleteElems) > 0 {
   398  			fixed := elems[:0]
   399  			for i, elem := range elems {
   400  				if _, found := deleteElems[i]; !found {
   401  					fixed = append(fixed, elem)
   402  				}
   403  			}
   404  			if err := putBatchElements(blockFixBatch, c.K, fixed); err != nil {
   405  				return err
   406  			}
   407  			if err := blockFixBatch.Commit(); err != nil {
   408  				return fmt.Errorf("bad batch commit in fixing block keyvalues for data %q: %v", d.DataName(), err)
   409  			}
   410  			elems = fixed
   411  			totMoved += len(deleteElems)
   412  		}
   413  		blockE = append(blockE, elems...)
   414  		numBlockE += len(elems)
   415  
   416  		if numTagE > 1000 {
   417  			if err := d.storeTags(batcher, ctx, tagE); err != nil {
   418  				return err
   419  			}
   420  			totTagE += numTagE
   421  			numTagE = 0
   422  			tagE = make(map[Tag]Elements)
   423  		}
   424  		if numBlockE > 1000 {
   425  			if err := d.storeLabels(batcher, ctx, blockE); err != nil {
   426  				return err
   427  			}
   428  			totBlockE += numBlockE
   429  			numBlockE = 0
   430  			blockE = Elements{}
   431  			timedLog.Infof("Loaded %d blocks of annotations (%d elements), moved %d", numBlocks, totBlockE, totMoved)
   432  		}
   433  
   434  		return nil
   435  	})
   436  	if err != nil {
   437  		dvid.Errorf("Error in reload of data %q: %v\n", d.DataName(), err)
   438  	}
   439  	if numTagE > 0 {
   440  		totTagE += numTagE
   441  		if err := d.storeTags(batcher, ctx, tagE); err != nil {
   442  			dvid.Errorf("Error writing final set of tags of data %q: %v", d.DataName(), err)
   443  		}
   444  	}
   445  	if numBlockE > 0 {
   446  		totBlockE += numBlockE
   447  		if err := d.storeLabels(batcher, ctx, blockE); err != nil {
   448  			dvid.Errorf("Error writing final set of label elements of data %q: %v", d.DataName(), err)
   449  		}
   450  	}
   451  
   452  	timedLog.Infof("Completed asynchronous annotation %q reload of %d block and %d tag elements.", d.DataName(), totBlockE, totTagE)
   453  }