github.com/janelia-flyem/dvid@v1.0.0/datatype/labelmap/vcache.go (about) 1 package labelmap 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "fmt" 7 "io" 8 "strings" 9 "sync" 10 11 "github.com/DmitriyVTitov/size" 12 "github.com/janelia-flyem/dvid/datastore" 13 "github.com/janelia-flyem/dvid/datatype/common/labels" 14 "github.com/janelia-flyem/dvid/datatype/common/proto" 15 "github.com/janelia-flyem/dvid/dvid" 16 "github.com/janelia-flyem/dvid/storage" 17 pb "google.golang.org/protobuf/proto" 18 ) 19 20 // VCache is holds in-memory versioned data for (1) version-aware, sharded label map, 21 // (2) distance of a version from DAG root, and (3) split ops. 22 // The label map tries to be memory efficient and also improves concurrency by 23 // using sharded maps hashed by label modulo. 24 type VCache struct { 25 mu sync.RWMutex // mutex for all fields 26 27 // Sharded maps of label mappings. 28 numShards uint64 29 mapShards []*mapShard 30 mapUsed bool // true if there's at least one mapping 31 32 // Cache of versions with mappings and their distance from root (necessary for quick versioned value retrieval) 33 // Read/used very frequently (same as fm above), written heavily on server startup 34 // but extremely limited writes (only once per new version) afterwards. 35 mappedVersions map[dvid.VersionID]distFromRoot // mapped versions' distance from root 36 mappedVersionsMu sync.RWMutex 37 38 // Cache of split ops done across versions. Read and write infrequently. 39 splits map[dvid.VersionID][]proto.SupervoxelSplitOp 40 splitsMu sync.RWMutex 41 } 42 43 // Mapping from a label to a vmap, which holds (version, label) tuples. 44 // Read very frequently, written heavily on server startup and occasionally afterwards. 45 type mapShard struct { 46 fm map[uint64]vmap // forward map from orig label to agglomerated (body) id 47 fmMu sync.RWMutex 48 } 49 50 func newVCache(numMaps int) (vc *VCache) { 51 vc = new(VCache) 52 vc.numShards = uint64(numMaps) 53 vc.mapShards = make([]*mapShard, numMaps) 54 for i := 0; i < numMaps; i++ { 55 vc.mapShards[i] = &mapShard{ 56 fm: make(map[uint64]vmap), 57 } 58 } 59 vc.mappedVersions = make(map[dvid.VersionID]distFromRoot) 60 vc.splits = make(map[dvid.VersionID][]proto.SupervoxelSplitOp) 61 return 62 } 63 64 // --- Low-level mapping functions that handle locking ---- 65 66 // check if label has been mapped 67 func (vc *VCache) hasMapping(label uint64) bool { 68 shard := label % vc.numShards 69 lmap := vc.mapShards[shard] 70 71 lmap.fmMu.RLock() 72 _, found := lmap.fm[label] 73 lmap.fmMu.RUnlock() 74 return found 75 } 76 77 // get mapping that should pass in mappedVersions from a getMappedVersionsDist(). 78 func (vc *VCache) mapLabel(label uint64, mappedVersions distFromRoot) (uint64, bool) { 79 if len(mappedVersions) == 0 { 80 return label, false 81 } 82 shard := label % vc.numShards 83 lmap := vc.mapShards[shard] 84 85 lmap.fmMu.RLock() 86 defer lmap.fmMu.RUnlock() 87 88 vm, found := lmap.fm[label] 89 if !found { 90 return label, false 91 } 92 return vm.value(mappedVersions) 93 } 94 95 // set mapping with expectation that SVMap has been locked for write 96 func (vc *VCache) setMapping(v dvid.VersionID, from, to uint64) { 97 vc.mapUsed = true 98 shard := from % vc.numShards 99 lmap := vc.mapShards[shard] 100 101 lmap.fmMu.Lock() 102 vm := lmap.fm[from] 103 lmap.fm[from] = vm.modify(v, to, true) 104 lmap.fmMu.Unlock() 105 } 106 107 // write all mappings by iterating through the shards, holding the read lock only 108 // for in-memory operation and writing data outside the lock. 109 func (vc *VCache) writeMappings(w io.Writer, v dvid.VersionID, binaryFormat bool) (numMappings uint64, err error) { 110 mappedVersions := vc.getMappedVersionsDist(v) 111 112 var outBuf bytes.Buffer 113 for _, lmap := range vc.mapShards { 114 lmap.fmMu.RLock() 115 for fromLabel, vm := range lmap.fm { 116 toLabel, present := vm.value(mappedVersions) 117 if present { 118 numMappings++ 119 if fromLabel != toLabel { 120 if binaryFormat { 121 err = binary.Write(&outBuf, binary.LittleEndian, fromLabel) 122 if err == nil { 123 err = binary.Write(&outBuf, binary.LittleEndian, toLabel) 124 } 125 } else { 126 line := fmt.Sprintf("%d %d\n", fromLabel, toLabel) 127 _, err = outBuf.WriteString(line) 128 } 129 130 if err != nil { 131 lmap.fmMu.RUnlock() 132 return 133 } 134 } 135 } 136 } 137 lmap.fmMu.RUnlock() 138 if _, err = w.Write(outBuf.Bytes()); err != nil { 139 return 140 } 141 outBuf.Reset() 142 } 143 return 144 } 145 146 func (vc *VCache) mapStats() (entries, numBytes uint64) { 147 for _, lmap := range vc.mapShards { 148 lmap.fmMu.RLock() 149 numBytes += uint64(size.Of(lmap)) 150 entries += uint64(len(lmap.fm)) 151 lmap.fmMu.RUnlock() 152 } 153 return 154 } 155 156 // -------------------------------------------------------- 157 158 func (vc *VCache) getMappedVersionsDist(v dvid.VersionID) distFromRoot { 159 vc.mappedVersionsMu.RLock() 160 dist, found := vc.mappedVersions[v] 161 vc.mappedVersionsMu.RUnlock() 162 163 if !found { // We have an uncached version so cache the distFromRoot 164 ancestry, err := datastore.GetAncestry(v) 165 if err != nil { 166 dvid.Errorf("Error getting ancestry for version %d: %v\n", v, err) 167 return nil 168 } 169 vc.mappedVersionsMu.Lock() 170 dist = getDistFromRoot(ancestry) 171 vc.mappedVersions[v] = dist 172 vc.mappedVersionsMu.Unlock() 173 } 174 return dist 175 } 176 177 // goroutine-safe function for intializing the in-memory mapping with a version's mutations log 178 // and caching the mapped versions with the distance from the root. 179 func (vc *VCache) loadVersionMapping(ancestors []dvid.VersionID, dataname dvid.InstanceName, ch chan storage.LogMessage, wg *sync.WaitGroup) { 180 if len(ancestors) == 0 { 181 return 182 } 183 timedLog := dvid.NewTimeLog() 184 185 v := ancestors[0] 186 var splits []proto.SupervoxelSplitOp 187 numMsgs := map[string]uint64{ 188 "Mapping": 0, 189 "Split": 0, 190 "SupervoxelSplit": 0, 191 "Cleave": 0, 192 "Renumber": 0, 193 } 194 195 for msg := range ch { // expects channel to be closed on completion 196 switch msg.EntryType { 197 case proto.MappingOpType: 198 numMsgs["Mapping"]++ 199 var op proto.MappingOp 200 if err := pb.Unmarshal(msg.Data, &op); err != nil { 201 dvid.Errorf("unable to unmarshal mapping log message for version %d: %v\n", v, err) 202 continue 203 } 204 mapped := op.GetMapped() 205 for _, supervoxel := range op.GetOriginal() { 206 vc.setMapping(v, supervoxel, mapped) 207 } 208 209 case proto.SplitOpType: 210 numMsgs["Split"]++ 211 var op proto.SplitOp 212 if err := pb.Unmarshal(msg.Data, &op); err != nil { 213 dvid.Errorf("unable to unmarshal split log message for version %d: %v\n", v, err) 214 continue 215 } 216 for supervoxel, svsplit := range op.GetSvsplits() { 217 rec := proto.SupervoxelSplitOp{ 218 Mutid: op.Mutid, 219 Supervoxel: supervoxel, 220 Remainlabel: svsplit.Remainlabel, 221 Splitlabel: svsplit.Splitlabel, 222 } 223 splits = append(splits, rec) 224 vc.setMapping(v, supervoxel, 0) 225 } 226 227 case proto.SupervoxelSplitType: 228 numMsgs["SupervoxelSplit"]++ 229 var op proto.SupervoxelSplitOp 230 if err := pb.Unmarshal(msg.Data, &op); err != nil { 231 dvid.Errorf("unable to unmarshal split log message for version %d: %v\n", v, err) 232 continue 233 } 234 rec := proto.SupervoxelSplitOp{ 235 Mutid: op.Mutid, 236 Supervoxel: op.Supervoxel, 237 Remainlabel: op.Remainlabel, 238 Splitlabel: op.Splitlabel, 239 } 240 splits = append(splits, rec) 241 vc.setMapping(v, op.Supervoxel, 0) 242 243 case proto.CleaveOpType: 244 numMsgs["Cleave"]++ 245 var op proto.CleaveOp 246 if err := pb.Unmarshal(msg.Data, &op); err != nil { 247 dvid.Errorf("unable to unmarshal cleave log message for version %d: %v\n", v, err) 248 continue 249 } 250 vc.setMapping(v, op.Cleavedlabel, 0) 251 252 case proto.RenumberOpType: 253 numMsgs["Renumber"]++ 254 var op proto.RenumberOp 255 if err := pb.Unmarshal(msg.Data, &op); err != nil { 256 dvid.Errorf("unable to unmarshal renumber log message for version %d: %v\n", v, err) 257 continue 258 } 259 // We don't set op.Target to 0 because it could be the ID of a supervoxel. 260 vc.setMapping(v, op.Newlabel, 0) 261 262 default: 263 } 264 } 265 266 vc.splitsMu.Lock() 267 vc.splits[v] = splits 268 vc.splitsMu.Unlock() 269 timedLog.Infof("Loaded mappings for data %q, version %d", dataname, v) 270 dvid.Infof("Mutations for version %d for data %q: %v\n", v, dataname, numMsgs) 271 wg.Done() 272 } 273 274 // makes sure that current map has been initialized with all forward mappings up to 275 // given version as well as split index. Note that this function can be called 276 // multiple times and it won't reload formerly visited ancestors because it initializes 277 // the mapping from current version -> root. 278 func (vc *VCache) initToVersion(d dvid.Data, v dvid.VersionID, loadMutations bool) error { 279 vc.mu.Lock() 280 defer vc.mu.Unlock() 281 282 ancestors, err := datastore.GetAncestry(v) 283 if err != nil { 284 return err 285 } 286 for pos, ancestor := range ancestors { 287 vc.mappedVersionsMu.Lock() 288 if _, found := vc.mappedVersions[ancestor]; found { 289 vc.mappedVersionsMu.Unlock() 290 return nil // we have already loaded this version and its ancestors 291 } 292 vc.mappedVersions[ancestor] = getDistFromRoot(ancestors[pos:]) 293 vc.mappedVersionsMu.Unlock() 294 295 if loadMutations { 296 ch := make(chan storage.LogMessage, 1000) 297 wg := new(sync.WaitGroup) 298 wg.Add(1) 299 go vc.loadVersionMapping(ancestors[pos:], d.DataName(), ch, wg) 300 301 if err = labels.StreamLog(d, ancestor, ch); err != nil { 302 return fmt.Errorf("problem loading mapping logs for data %q, version %d: %v", d.DataName(), ancestor, err) 303 } 304 wg.Wait() 305 } 306 } 307 return nil 308 } 309 310 // SupervoxelSplitsJSON returns a JSON string giving all the supervoxel splits from 311 // this version to the root. 312 func (vc *VCache) SupervoxelSplitsJSON(v dvid.VersionID) (string, error) { 313 ancestors, err := datastore.GetAncestry(v) 314 if err != nil { 315 return "", err 316 } 317 var items []string 318 for _, ancestor := range ancestors { 319 splitops, found := vc.splits[ancestor] 320 if !found || len(splitops) == 0 { 321 continue 322 } 323 uuid, err := datastore.UUIDFromVersion(ancestor) 324 if err != nil { 325 return "", err 326 } 327 str := fmt.Sprintf(`"%s",`, uuid) 328 splitstrs := make([]string, len(splitops)) 329 for i, splitop := range splitops { 330 splitstrs[i] = fmt.Sprintf("[%d,%d,%d,%d]", splitop.Mutid, splitop.Supervoxel, splitop.Remainlabel, splitop.Splitlabel) 331 } 332 str += "[" + strings.Join(splitstrs, ",") + "]" 333 items = append(items, str) 334 } 335 return "[" + strings.Join(items, ",") + "]", nil 336 } 337 338 // MappedLabel returns the mapped label and a boolean: true if 339 // a mapping was found and false if none was found. For faster mapping, 340 // large scale transformations, e.g. block-level output, should not use this 341 // routine but work directly with mapLabel() doing locking and ancestry lookup 342 // outside loops. 343 func (vc *VCache) MappedLabel(v dvid.VersionID, label uint64) (uint64, bool) { 344 if vc == nil || !vc.hasMapping(label) { 345 return label, false 346 } 347 vc.mappedVersionsMu.RLock() 348 mappedVersions := vc.mappedVersions[v] 349 vc.mappedVersionsMu.RUnlock() 350 351 return vc.mapLabel(label, mappedVersions) 352 } 353 354 // MappedLabels returns an array of mapped labels, which could be the same as the passed slice. 355 func (vc *VCache) MappedLabels(v dvid.VersionID, supervoxels []uint64) (mapped []uint64, found []bool, err error) { 356 found = make([]bool, len(supervoxels)) 357 mapped = make([]uint64, len(supervoxels)) 358 copy(mapped, supervoxels) 359 360 if vc == nil || !vc.mapUsed { 361 return 362 } 363 dist := vc.getMappedVersionsDist(v) 364 for i, supervoxel := range supervoxels { 365 label, wasMapped := vc.mapLabel(supervoxel, dist) 366 if wasMapped { 367 mapped[i] = label 368 found[i] = wasMapped 369 } 370 } 371 return 372 } 373 374 // ApplyMappingToBlock applies label mapping (given an ancestry path) to the passed labels.Block. 375 func (vc *VCache) ApplyMappingToBlock(mappedVersions distFromRoot, block *labels.Block) { 376 for i, label := range block.Labels { 377 mapped, found := vc.mapLabel(label, mappedVersions) 378 if found { 379 block.Labels[i] = mapped 380 } 381 } 382 } 383 384 // a cache of the index of each version in an ancestry 385 // where the root is 1 and leaf is len(ancestry). 386 type distFromRoot map[dvid.VersionID]uint32 387 388 func getDistFromRoot(ancestry []dvid.VersionID) distFromRoot { 389 distMap := make(distFromRoot, len(ancestry)) 390 for i, v := range ancestry { 391 distMap[v] = uint32(len(ancestry) - i) 392 } 393 return distMap 394 } 395 396 // Versioned map entries for a given supervoxel, corresponding to 397 // varint encodings of (VersionID, uint64) pairs. 398 type vmap []byte 399 400 func createEncodedMapping(v dvid.VersionID, label uint64) []byte { 401 buf := make([]byte, binary.MaxVarintLen32+binary.MaxVarintLen64) 402 n := binary.PutUvarint(buf, uint64(v)) 403 n += binary.PutUvarint(buf[n:], label) 404 return buf[:n] 405 } 406 407 func readEncodedMapping(buf []byte) (v dvid.VersionID, label uint64, nbytes int) { 408 vid, vlen := binary.Uvarint(buf) 409 v = dvid.VersionID(vid) 410 label, llen := binary.Uvarint(buf[vlen:]) 411 nbytes = vlen + llen 412 return 413 } 414 415 func (vm vmap) decodeMappings() map[dvid.VersionID]uint64 { 416 fm := map[dvid.VersionID]uint64{} 417 n := 0 418 for n < len(vm) { 419 v, label, nbytes := readEncodedMapping(vm[n:]) 420 fm[v] = label 421 n += nbytes 422 } 423 return fm 424 } 425 426 func (vm vmap) String() string { 427 fm := vm.decodeMappings() 428 var out string 429 for v, label := range fm { 430 out += fmt.Sprintf("%d:%d ", v, label) 431 } 432 return out 433 } 434 435 // Returns the mapping for a given version given its mappedVersions. 436 // Typically there will be fewer mappings for a given label than the 437 // number of versions in the mappedVersions, so we cache the priority of 438 // versions in the mappedVersions and iterate over the decoded mappings. 439 // While this is O(N), N = modified mappings and should be small. 440 func (vm vmap) value(mappedVersions distFromRoot) (label uint64, present bool) { 441 sz := len(vm) 442 if sz == 0 || len(mappedVersions) == 0 { 443 return 0, false 444 } 445 mapping := vm.decodeMappings() 446 var farthest uint32 447 for v, curLabel := range mapping { 448 rootDist, found := mappedVersions[v] 449 if found && rootDist > farthest { 450 farthest = rootDist 451 label = curLabel 452 present = true 453 } 454 } 455 return 456 } 457 458 // Returns the vmap encoding with the given version excised. 459 func (vm vmap) excludeVersion(v dvid.VersionID) (out vmap) { 460 var exciseStart, exciseStop int 461 n := 0 462 for n < len(vm) { 463 vid, _, nbytes := readEncodedMapping(vm[n:]) 464 if v == vid { 465 exciseStart = n 466 exciseStop = n + nbytes 467 break 468 } 469 n += nbytes 470 } 471 if exciseStop == 0 { 472 return vm 473 } 474 if exciseStart == 0 { 475 return vm[exciseStop:] 476 } 477 if exciseStop == len(vm) { 478 return vm[0:exciseStart] 479 } 480 return append(vm[0:exciseStart], vm[exciseStop:]...) 481 } 482 483 // Adds a unique version id and mapped label. If replace is true, 484 // the mappings are checked for the given version and updated. 485 // If it is known that the mappings don't include a version, like 486 // when ingesting during initialization, then replace should be set 487 // to false. 488 func (vm vmap) modify(v dvid.VersionID, label uint64, replace bool) (out vmap) { 489 if len(vm) == 0 { 490 out = createEncodedMapping(v, label) 491 return 492 } 493 if replace { 494 out = vm.excludeVersion(v) 495 } else { 496 out = vm 497 } 498 return append(out, createEncodedMapping(v, label)...) 499 }