github.com/janelia-flyem/dvid@v1.0.0/datatype/labelmap/vcache.go (about)

     1  package labelmap
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"fmt"
     7  	"io"
     8  	"strings"
     9  	"sync"
    10  
    11  	"github.com/DmitriyVTitov/size"
    12  	"github.com/janelia-flyem/dvid/datastore"
    13  	"github.com/janelia-flyem/dvid/datatype/common/labels"
    14  	"github.com/janelia-flyem/dvid/datatype/common/proto"
    15  	"github.com/janelia-flyem/dvid/dvid"
    16  	"github.com/janelia-flyem/dvid/storage"
    17  	pb "google.golang.org/protobuf/proto"
    18  )
    19  
    20  // VCache is holds in-memory versioned data for (1) version-aware, sharded label map,
    21  // (2) distance of a version from DAG root, and (3) split ops.
    22  // The label map tries to be memory efficient and also improves concurrency by
    23  // using sharded maps hashed by label modulo.
    24  type VCache struct {
    25  	mu sync.RWMutex // mutex for all fields
    26  
    27  	// Sharded maps of label mappings.
    28  	numShards uint64
    29  	mapShards []*mapShard
    30  	mapUsed   bool // true if there's at least one mapping
    31  
    32  	// Cache of versions with mappings and their distance from root (necessary for quick versioned value retrieval)
    33  	// Read/used very frequently (same as fm above), written heavily on server startup
    34  	// but extremely limited writes (only once per new version) afterwards.
    35  	mappedVersions   map[dvid.VersionID]distFromRoot // mapped versions' distance from root
    36  	mappedVersionsMu sync.RWMutex
    37  
    38  	// Cache of split ops done across versions. Read and write infrequently.
    39  	splits   map[dvid.VersionID][]proto.SupervoxelSplitOp
    40  	splitsMu sync.RWMutex
    41  }
    42  
    43  // Mapping from a label to a vmap, which holds (version, label) tuples.
    44  // Read very frequently, written heavily on server startup and occasionally afterwards.
    45  type mapShard struct {
    46  	fm   map[uint64]vmap // forward map from orig label to agglomerated (body) id
    47  	fmMu sync.RWMutex
    48  }
    49  
    50  func newVCache(numMaps int) (vc *VCache) {
    51  	vc = new(VCache)
    52  	vc.numShards = uint64(numMaps)
    53  	vc.mapShards = make([]*mapShard, numMaps)
    54  	for i := 0; i < numMaps; i++ {
    55  		vc.mapShards[i] = &mapShard{
    56  			fm: make(map[uint64]vmap),
    57  		}
    58  	}
    59  	vc.mappedVersions = make(map[dvid.VersionID]distFromRoot)
    60  	vc.splits = make(map[dvid.VersionID][]proto.SupervoxelSplitOp)
    61  	return
    62  }
    63  
    64  // --- Low-level mapping functions that handle locking ----
    65  
    66  // check if label has been mapped
    67  func (vc *VCache) hasMapping(label uint64) bool {
    68  	shard := label % vc.numShards
    69  	lmap := vc.mapShards[shard]
    70  
    71  	lmap.fmMu.RLock()
    72  	_, found := lmap.fm[label]
    73  	lmap.fmMu.RUnlock()
    74  	return found
    75  }
    76  
    77  // get mapping that should pass in mappedVersions from a getMappedVersionsDist().
    78  func (vc *VCache) mapLabel(label uint64, mappedVersions distFromRoot) (uint64, bool) {
    79  	if len(mappedVersions) == 0 {
    80  		return label, false
    81  	}
    82  	shard := label % vc.numShards
    83  	lmap := vc.mapShards[shard]
    84  
    85  	lmap.fmMu.RLock()
    86  	defer lmap.fmMu.RUnlock()
    87  
    88  	vm, found := lmap.fm[label]
    89  	if !found {
    90  		return label, false
    91  	}
    92  	return vm.value(mappedVersions)
    93  }
    94  
    95  // set mapping with expectation that SVMap has been locked for write
    96  func (vc *VCache) setMapping(v dvid.VersionID, from, to uint64) {
    97  	vc.mapUsed = true
    98  	shard := from % vc.numShards
    99  	lmap := vc.mapShards[shard]
   100  
   101  	lmap.fmMu.Lock()
   102  	vm := lmap.fm[from]
   103  	lmap.fm[from] = vm.modify(v, to, true)
   104  	lmap.fmMu.Unlock()
   105  }
   106  
   107  // write all mappings by iterating through the shards, holding the read lock only
   108  // for in-memory operation and writing data outside the lock.
   109  func (vc *VCache) writeMappings(w io.Writer, v dvid.VersionID, binaryFormat bool) (numMappings uint64, err error) {
   110  	mappedVersions := vc.getMappedVersionsDist(v)
   111  
   112  	var outBuf bytes.Buffer
   113  	for _, lmap := range vc.mapShards {
   114  		lmap.fmMu.RLock()
   115  		for fromLabel, vm := range lmap.fm {
   116  			toLabel, present := vm.value(mappedVersions)
   117  			if present {
   118  				numMappings++
   119  				if fromLabel != toLabel {
   120  					if binaryFormat {
   121  						err = binary.Write(&outBuf, binary.LittleEndian, fromLabel)
   122  						if err == nil {
   123  							err = binary.Write(&outBuf, binary.LittleEndian, toLabel)
   124  						}
   125  					} else {
   126  						line := fmt.Sprintf("%d %d\n", fromLabel, toLabel)
   127  						_, err = outBuf.WriteString(line)
   128  					}
   129  
   130  					if err != nil {
   131  						lmap.fmMu.RUnlock()
   132  						return
   133  					}
   134  				}
   135  			}
   136  		}
   137  		lmap.fmMu.RUnlock()
   138  		if _, err = w.Write(outBuf.Bytes()); err != nil {
   139  			return
   140  		}
   141  		outBuf.Reset()
   142  	}
   143  	return
   144  }
   145  
   146  func (vc *VCache) mapStats() (entries, numBytes uint64) {
   147  	for _, lmap := range vc.mapShards {
   148  		lmap.fmMu.RLock()
   149  		numBytes += uint64(size.Of(lmap))
   150  		entries += uint64(len(lmap.fm))
   151  		lmap.fmMu.RUnlock()
   152  	}
   153  	return
   154  }
   155  
   156  // --------------------------------------------------------
   157  
   158  func (vc *VCache) getMappedVersionsDist(v dvid.VersionID) distFromRoot {
   159  	vc.mappedVersionsMu.RLock()
   160  	dist, found := vc.mappedVersions[v]
   161  	vc.mappedVersionsMu.RUnlock()
   162  
   163  	if !found { // We have an uncached version so cache the distFromRoot
   164  		ancestry, err := datastore.GetAncestry(v)
   165  		if err != nil {
   166  			dvid.Errorf("Error getting ancestry for version %d: %v\n", v, err)
   167  			return nil
   168  		}
   169  		vc.mappedVersionsMu.Lock()
   170  		dist = getDistFromRoot(ancestry)
   171  		vc.mappedVersions[v] = dist
   172  		vc.mappedVersionsMu.Unlock()
   173  	}
   174  	return dist
   175  }
   176  
   177  // goroutine-safe function for intializing the in-memory mapping with a version's mutations log
   178  // and caching the mapped versions with the distance from the root.
   179  func (vc *VCache) loadVersionMapping(ancestors []dvid.VersionID, dataname dvid.InstanceName, ch chan storage.LogMessage, wg *sync.WaitGroup) {
   180  	if len(ancestors) == 0 {
   181  		return
   182  	}
   183  	timedLog := dvid.NewTimeLog()
   184  
   185  	v := ancestors[0]
   186  	var splits []proto.SupervoxelSplitOp
   187  	numMsgs := map[string]uint64{
   188  		"Mapping":         0,
   189  		"Split":           0,
   190  		"SupervoxelSplit": 0,
   191  		"Cleave":          0,
   192  		"Renumber":        0,
   193  	}
   194  
   195  	for msg := range ch { // expects channel to be closed on completion
   196  		switch msg.EntryType {
   197  		case proto.MappingOpType:
   198  			numMsgs["Mapping"]++
   199  			var op proto.MappingOp
   200  			if err := pb.Unmarshal(msg.Data, &op); err != nil {
   201  				dvid.Errorf("unable to unmarshal mapping log message for version %d: %v\n", v, err)
   202  				continue
   203  			}
   204  			mapped := op.GetMapped()
   205  			for _, supervoxel := range op.GetOriginal() {
   206  				vc.setMapping(v, supervoxel, mapped)
   207  			}
   208  
   209  		case proto.SplitOpType:
   210  			numMsgs["Split"]++
   211  			var op proto.SplitOp
   212  			if err := pb.Unmarshal(msg.Data, &op); err != nil {
   213  				dvid.Errorf("unable to unmarshal split log message for version %d: %v\n", v, err)
   214  				continue
   215  			}
   216  			for supervoxel, svsplit := range op.GetSvsplits() {
   217  				rec := proto.SupervoxelSplitOp{
   218  					Mutid:       op.Mutid,
   219  					Supervoxel:  supervoxel,
   220  					Remainlabel: svsplit.Remainlabel,
   221  					Splitlabel:  svsplit.Splitlabel,
   222  				}
   223  				splits = append(splits, rec)
   224  				vc.setMapping(v, supervoxel, 0)
   225  			}
   226  
   227  		case proto.SupervoxelSplitType:
   228  			numMsgs["SupervoxelSplit"]++
   229  			var op proto.SupervoxelSplitOp
   230  			if err := pb.Unmarshal(msg.Data, &op); err != nil {
   231  				dvid.Errorf("unable to unmarshal split log message for version %d: %v\n", v, err)
   232  				continue
   233  			}
   234  			rec := proto.SupervoxelSplitOp{
   235  				Mutid:       op.Mutid,
   236  				Supervoxel:  op.Supervoxel,
   237  				Remainlabel: op.Remainlabel,
   238  				Splitlabel:  op.Splitlabel,
   239  			}
   240  			splits = append(splits, rec)
   241  			vc.setMapping(v, op.Supervoxel, 0)
   242  
   243  		case proto.CleaveOpType:
   244  			numMsgs["Cleave"]++
   245  			var op proto.CleaveOp
   246  			if err := pb.Unmarshal(msg.Data, &op); err != nil {
   247  				dvid.Errorf("unable to unmarshal cleave log message for version %d: %v\n", v, err)
   248  				continue
   249  			}
   250  			vc.setMapping(v, op.Cleavedlabel, 0)
   251  
   252  		case proto.RenumberOpType:
   253  			numMsgs["Renumber"]++
   254  			var op proto.RenumberOp
   255  			if err := pb.Unmarshal(msg.Data, &op); err != nil {
   256  				dvid.Errorf("unable to unmarshal renumber log message for version %d: %v\n", v, err)
   257  				continue
   258  			}
   259  			// We don't set op.Target to 0 because it could be the ID of a supervoxel.
   260  			vc.setMapping(v, op.Newlabel, 0)
   261  
   262  		default:
   263  		}
   264  	}
   265  
   266  	vc.splitsMu.Lock()
   267  	vc.splits[v] = splits
   268  	vc.splitsMu.Unlock()
   269  	timedLog.Infof("Loaded mappings for data %q, version %d", dataname, v)
   270  	dvid.Infof("Mutations for version %d for data %q: %v\n", v, dataname, numMsgs)
   271  	wg.Done()
   272  }
   273  
   274  // makes sure that current map has been initialized with all forward mappings up to
   275  // given version as well as split index.  Note that this function can be called
   276  // multiple times and it won't reload formerly visited ancestors because it initializes
   277  // the mapping from current version -> root.
   278  func (vc *VCache) initToVersion(d dvid.Data, v dvid.VersionID, loadMutations bool) error {
   279  	vc.mu.Lock()
   280  	defer vc.mu.Unlock()
   281  
   282  	ancestors, err := datastore.GetAncestry(v)
   283  	if err != nil {
   284  		return err
   285  	}
   286  	for pos, ancestor := range ancestors {
   287  		vc.mappedVersionsMu.Lock()
   288  		if _, found := vc.mappedVersions[ancestor]; found {
   289  			vc.mappedVersionsMu.Unlock()
   290  			return nil // we have already loaded this version and its ancestors
   291  		}
   292  		vc.mappedVersions[ancestor] = getDistFromRoot(ancestors[pos:])
   293  		vc.mappedVersionsMu.Unlock()
   294  
   295  		if loadMutations {
   296  			ch := make(chan storage.LogMessage, 1000)
   297  			wg := new(sync.WaitGroup)
   298  			wg.Add(1)
   299  			go vc.loadVersionMapping(ancestors[pos:], d.DataName(), ch, wg)
   300  
   301  			if err = labels.StreamLog(d, ancestor, ch); err != nil {
   302  				return fmt.Errorf("problem loading mapping logs for data %q, version %d: %v", d.DataName(), ancestor, err)
   303  			}
   304  			wg.Wait()
   305  		}
   306  	}
   307  	return nil
   308  }
   309  
   310  // SupervoxelSplitsJSON returns a JSON string giving all the supervoxel splits from
   311  // this version to the root.
   312  func (vc *VCache) SupervoxelSplitsJSON(v dvid.VersionID) (string, error) {
   313  	ancestors, err := datastore.GetAncestry(v)
   314  	if err != nil {
   315  		return "", err
   316  	}
   317  	var items []string
   318  	for _, ancestor := range ancestors {
   319  		splitops, found := vc.splits[ancestor]
   320  		if !found || len(splitops) == 0 {
   321  			continue
   322  		}
   323  		uuid, err := datastore.UUIDFromVersion(ancestor)
   324  		if err != nil {
   325  			return "", err
   326  		}
   327  		str := fmt.Sprintf(`"%s",`, uuid)
   328  		splitstrs := make([]string, len(splitops))
   329  		for i, splitop := range splitops {
   330  			splitstrs[i] = fmt.Sprintf("[%d,%d,%d,%d]", splitop.Mutid, splitop.Supervoxel, splitop.Remainlabel, splitop.Splitlabel)
   331  		}
   332  		str += "[" + strings.Join(splitstrs, ",") + "]"
   333  		items = append(items, str)
   334  	}
   335  	return "[" + strings.Join(items, ",") + "]", nil
   336  }
   337  
   338  // MappedLabel returns the mapped label and a boolean: true if
   339  // a mapping was found and false if none was found.  For faster mapping,
   340  // large scale transformations, e.g. block-level output, should not use this
   341  // routine but work directly with mapLabel() doing locking and ancestry lookup
   342  // outside loops.
   343  func (vc *VCache) MappedLabel(v dvid.VersionID, label uint64) (uint64, bool) {
   344  	if vc == nil || !vc.hasMapping(label) {
   345  		return label, false
   346  	}
   347  	vc.mappedVersionsMu.RLock()
   348  	mappedVersions := vc.mappedVersions[v]
   349  	vc.mappedVersionsMu.RUnlock()
   350  
   351  	return vc.mapLabel(label, mappedVersions)
   352  }
   353  
   354  // MappedLabels returns an array of mapped labels, which could be the same as the passed slice.
   355  func (vc *VCache) MappedLabels(v dvid.VersionID, supervoxels []uint64) (mapped []uint64, found []bool, err error) {
   356  	found = make([]bool, len(supervoxels))
   357  	mapped = make([]uint64, len(supervoxels))
   358  	copy(mapped, supervoxels)
   359  
   360  	if vc == nil || !vc.mapUsed {
   361  		return
   362  	}
   363  	dist := vc.getMappedVersionsDist(v)
   364  	for i, supervoxel := range supervoxels {
   365  		label, wasMapped := vc.mapLabel(supervoxel, dist)
   366  		if wasMapped {
   367  			mapped[i] = label
   368  			found[i] = wasMapped
   369  		}
   370  	}
   371  	return
   372  }
   373  
   374  // ApplyMappingToBlock applies label mapping (given an ancestry path) to the passed labels.Block.
   375  func (vc *VCache) ApplyMappingToBlock(mappedVersions distFromRoot, block *labels.Block) {
   376  	for i, label := range block.Labels {
   377  		mapped, found := vc.mapLabel(label, mappedVersions)
   378  		if found {
   379  			block.Labels[i] = mapped
   380  		}
   381  	}
   382  }
   383  
   384  // a cache of the index of each version in an ancestry
   385  // where the root is 1 and leaf is len(ancestry).
   386  type distFromRoot map[dvid.VersionID]uint32
   387  
   388  func getDistFromRoot(ancestry []dvid.VersionID) distFromRoot {
   389  	distMap := make(distFromRoot, len(ancestry))
   390  	for i, v := range ancestry {
   391  		distMap[v] = uint32(len(ancestry) - i)
   392  	}
   393  	return distMap
   394  }
   395  
   396  // Versioned map entries for a given supervoxel, corresponding to
   397  // varint encodings of (VersionID, uint64) pairs.
   398  type vmap []byte
   399  
   400  func createEncodedMapping(v dvid.VersionID, label uint64) []byte {
   401  	buf := make([]byte, binary.MaxVarintLen32+binary.MaxVarintLen64)
   402  	n := binary.PutUvarint(buf, uint64(v))
   403  	n += binary.PutUvarint(buf[n:], label)
   404  	return buf[:n]
   405  }
   406  
   407  func readEncodedMapping(buf []byte) (v dvid.VersionID, label uint64, nbytes int) {
   408  	vid, vlen := binary.Uvarint(buf)
   409  	v = dvid.VersionID(vid)
   410  	label, llen := binary.Uvarint(buf[vlen:])
   411  	nbytes = vlen + llen
   412  	return
   413  }
   414  
   415  func (vm vmap) decodeMappings() map[dvid.VersionID]uint64 {
   416  	fm := map[dvid.VersionID]uint64{}
   417  	n := 0
   418  	for n < len(vm) {
   419  		v, label, nbytes := readEncodedMapping(vm[n:])
   420  		fm[v] = label
   421  		n += nbytes
   422  	}
   423  	return fm
   424  }
   425  
   426  func (vm vmap) String() string {
   427  	fm := vm.decodeMappings()
   428  	var out string
   429  	for v, label := range fm {
   430  		out += fmt.Sprintf("%d:%d ", v, label)
   431  	}
   432  	return out
   433  }
   434  
   435  // Returns the mapping for a given version given its mappedVersions.
   436  // Typically there will be fewer mappings for a given label than the
   437  // number of versions in the mappedVersions, so we cache the priority of
   438  // versions in the mappedVersions and iterate over the decoded mappings.
   439  // While this is O(N), N = modified mappings and should be small.
   440  func (vm vmap) value(mappedVersions distFromRoot) (label uint64, present bool) {
   441  	sz := len(vm)
   442  	if sz == 0 || len(mappedVersions) == 0 {
   443  		return 0, false
   444  	}
   445  	mapping := vm.decodeMappings()
   446  	var farthest uint32
   447  	for v, curLabel := range mapping {
   448  		rootDist, found := mappedVersions[v]
   449  		if found && rootDist > farthest {
   450  			farthest = rootDist
   451  			label = curLabel
   452  			present = true
   453  		}
   454  	}
   455  	return
   456  }
   457  
   458  // Returns the vmap encoding with the given version excised.
   459  func (vm vmap) excludeVersion(v dvid.VersionID) (out vmap) {
   460  	var exciseStart, exciseStop int
   461  	n := 0
   462  	for n < len(vm) {
   463  		vid, _, nbytes := readEncodedMapping(vm[n:])
   464  		if v == vid {
   465  			exciseStart = n
   466  			exciseStop = n + nbytes
   467  			break
   468  		}
   469  		n += nbytes
   470  	}
   471  	if exciseStop == 0 {
   472  		return vm
   473  	}
   474  	if exciseStart == 0 {
   475  		return vm[exciseStop:]
   476  	}
   477  	if exciseStop == len(vm) {
   478  		return vm[0:exciseStart]
   479  	}
   480  	return append(vm[0:exciseStart], vm[exciseStop:]...)
   481  }
   482  
   483  // Adds a unique version id and mapped label. If replace is true,
   484  // the mappings are checked for the given version and updated.
   485  // If it is known that the mappings don't include a version, like
   486  // when ingesting during initialization, then replace should be set
   487  // to false.
   488  func (vm vmap) modify(v dvid.VersionID, label uint64, replace bool) (out vmap) {
   489  	if len(vm) == 0 {
   490  		out = createEncodedMapping(v, label)
   491  		return
   492  	}
   493  	if replace {
   494  		out = vm.excludeVersion(v)
   495  	} else {
   496  		out = vm
   497  	}
   498  	return append(out, createEncodedMapping(v, label)...)
   499  }