github.com/janelia-flyem/dvid@v1.0.0/datatype/neuronjson/memstore.go (about) 1 package neuronjson 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "sort" 7 "strconv" 8 "strings" 9 "sync" 10 11 "github.com/janelia-flyem/dvid/datastore" 12 "github.com/janelia-flyem/dvid/datatype/keyvalue" 13 "github.com/janelia-flyem/dvid/dvid" 14 "github.com/janelia-flyem/dvid/storage" 15 ) 16 17 // memdbs is a map of all in-memory key-value stores for a given data instance. 18 // This data structure has to handle both branch HEAD dbs that will allow 19 // mutations and track the HEAD of the branch, as well as UUID dbs that are 20 // read-only. 21 type memdbs struct { 22 static map[dvid.UUID]*memdb 23 head map[string]*memdb 24 mu sync.RWMutex 25 } 26 27 func (d *Data) getMemDBbyVersion(v dvid.VersionID) (db *memdb, found bool) { 28 if d.dbs == nil { 29 return 30 } 31 d.dbs.mu.RLock() 32 defer d.dbs.mu.RUnlock() 33 uuid, err := datastore.UUIDFromVersion(v) 34 if err != nil { 35 return 36 } 37 38 db, found = d.dbs.static[uuid] 39 if found { 40 dvid.Infof("Found static memdb for version %d, uuid %s\n", v, uuid) 41 return 42 } 43 for branch := range d.dbs.head { 44 _, branchV, err := datastore.GetBranchHead(uuid, branch) 45 if err == nil && branchV == v { 46 dvid.Infof("Found head memdb for branch %s, version %d, uuid %s\n", branch, v, uuid) 47 return d.dbs.head[branch], true 48 } 49 dvid.Infof("Didn't find memdb for branch %s, version %d, uuid %s, found branch %d %t\n", branch, v, uuid, branchV, found) 50 } 51 return 52 } 53 54 // in-memory neuron annotations with sorted body id list for optional sorted iteration. 55 type memdb struct { 56 data map[uint64]NeuronJSON 57 ids []uint64 // sorted list of body ids 58 fields map[string]struct{} // list of all fields among the annotations 59 mu sync.RWMutex 60 } 61 62 // initializes the in-memory dbs for the given list of UUIDs + branch names in 63 // addition to the default HEAD of main/master branch. 64 func (d *Data) initMemoryDB(versions []string) error { 65 dbs := &memdbs{ 66 static: make(map[dvid.UUID]*memdb), 67 head: make(map[string]*memdb), 68 } 69 versions = append(versions, ":master") 70 dvid.Infof("Initializing in-memory dbs for neuronjson %q with versions %v\n", d.DataName(), versions) 71 for _, versionSpec := range versions { 72 mdb := &memdb{ 73 data: make(map[uint64]NeuronJSON), 74 fields: make(map[string]struct{}), 75 ids: []uint64{}, 76 } 77 if strings.HasPrefix(versionSpec, ":") { 78 branch := strings.TrimPrefix(versionSpec, ":") 79 dbs.head[branch] = mdb 80 _, v, err := datastore.GetBranchHead(d.RootUUID(), branch) 81 if err != nil { 82 dvid.Infof("could not find branch %q specified for neuronjson %q in-memory db: %v", 83 branch, d.DataName(), err) 84 } else if err := d.loadMemDB(v, mdb); err != nil { 85 return err 86 } 87 } else { 88 uuid, v, err := datastore.MatchingUUID(versionSpec) 89 if err != nil { 90 return err 91 } 92 if err := d.loadMemDB(v, mdb); err != nil { 93 return err 94 } 95 dbs.static[uuid] = mdb 96 } 97 } 98 d.dbsMu.Lock() 99 d.dbs = dbs 100 d.dbsMu.Unlock() 101 return nil 102 } 103 104 func (d *Data) loadMemDB(v dvid.VersionID, mdb *memdb) error { 105 ctx := datastore.NewVersionedCtx(d, v) 106 db, err := datastore.GetOrderedKeyValueDB(d) 107 if err != nil { 108 return fmt.Errorf("can't setup ordered keyvalue db for neuronjson %q: %v", d.DataName(), err) 109 } 110 111 tlog := dvid.NewTimeLog() 112 numLoaded := 0 113 err = db.ProcessRange(ctx, MinAnnotationTKey, MaxAnnotationTKey, &storage.ChunkOp{}, func(c *storage.Chunk) error { 114 if c == nil || c.TKeyValue == nil { 115 return nil 116 } 117 kv := c.TKeyValue 118 if kv.V == nil { 119 return nil 120 } 121 key, err := DecodeTKey(kv.K) 122 if err != nil { 123 return err 124 } 125 126 bodyid, err := strconv.ParseUint(key, 10, 64) 127 if err != nil { 128 return fmt.Errorf("received non-integer key %q during neuronjson load from database: %v", key, err) 129 } 130 131 var annotation NeuronJSON 132 if err := json.Unmarshal(kv.V, &annotation); err != nil { 133 return fmt.Errorf("unable to decode annotation for bodyid %d, skipping: %v", bodyid, err) 134 } 135 mdb.addAnnotation(bodyid, annotation) 136 137 numLoaded++ 138 if numLoaded%1000 == 0 { 139 tlog.Infof("Loaded %d annotations into neuronjson instance %q, version id %d", 140 numLoaded, d.DataName(), v) 141 } 142 return nil 143 }) 144 if err != nil { 145 return fmt.Errorf("error loading neuron annotations into in-memory db for neuronjson %q, version id %d: %v", 146 d.DataName(), v, err) 147 } 148 sort.Slice(mdb.ids, func(i, j int) bool { return mdb.ids[i] < mdb.ids[j] }) 149 tlog.Infof("Completed loading of %d annotations into neuronjson instance %q version %d in-memory db", 150 numLoaded, d.DataName(), v) 151 return nil 152 } 153 154 // add bodyid to sorted in-memory list of bodyids 155 func (mdb *memdb) addBodyID(bodyid uint64) { 156 i := sort.Search(len(mdb.ids), func(i int) bool { return mdb.ids[i] >= bodyid }) 157 if i < len(mdb.ids) && mdb.ids[i] == bodyid { 158 return 159 } 160 mdb.ids = append(mdb.ids, 0) 161 copy(mdb.ids[i+1:], mdb.ids[i:]) 162 mdb.ids[i] = bodyid 163 } 164 165 // delete bodyid from sorted in-memory list of bodyids 166 func (mdb *memdb) deleteBodyID(bodyid uint64) { 167 i := sort.Search(len(mdb.ids), func(i int) bool { return mdb.ids[i] == bodyid }) 168 if i == len(mdb.ids) { 169 return 170 } 171 mdb.ids = append(mdb.ids[:i], mdb.ids[i+1:]...) 172 } 173 174 // add an annotation to the in-memory DB in batch mode assuming ids are sorted later 175 func (mdb *memdb) addAnnotation(bodyid uint64, annotation NeuronJSON) { 176 mdb.data[bodyid] = annotation 177 mdb.ids = append(mdb.ids, bodyid) 178 for field := range annotation { 179 mdb.fields[field] = struct{}{} 180 } 181 } 182 183 // importKV imports a keyvalue instance into the neuronjson instance. 184 func (d *Data) importKV(request datastore.Request, reply *datastore.Response) error { 185 if len(request.Command) < 5 { 186 return fmt.Errorf("keyvalue instance name must be specified after importKV") 187 } 188 var uuidStr, dataName, cmdStr, kvName string 189 request.CommandArgs(1, &uuidStr, &dataName, &cmdStr, &kvName) 190 191 uuid, versionID, err := datastore.MatchingUUID(uuidStr) 192 if err != nil { 193 return err 194 } 195 196 sourceKV, err := keyvalue.GetByUUIDName(uuid, dvid.InstanceName(kvName)) 197 if err != nil { 198 return err 199 } 200 go d.loadFromKV(versionID, sourceKV) 201 202 reply.Output = []byte(fmt.Sprintf("Started loading from keyvalue instance %q into neuronjson instance %q, uuid %s\n", 203 kvName, d.DataName(), uuidStr)) 204 return nil 205 } 206 207 // kvType is an interface for keyvalue instances we wish to migrate to neuronjson. 208 type kvType interface { 209 DataName() dvid.InstanceName 210 StreamKV(v dvid.VersionID) (chan storage.KeyValue, error) 211 } 212 213 // goroutine-friendly ingest from a keyvalue instance into main HEAD of neuronjson. 214 func (d *Data) loadFromKV(v dvid.VersionID, kvData kvType) { 215 tlog := dvid.NewTimeLog() 216 217 db, err := datastore.GetKeyValueDB(d) 218 if err != nil { 219 dvid.Criticalf("unable to get keyvalue database: %v", err) 220 return 221 } 222 mdb, found := d.getMemDBbyVersion(v) 223 if !found { 224 dvid.Criticalf("unable to get in-memory database for neuronjson %q, version %d", d.DataName(), v) 225 return 226 } 227 228 ch, err := kvData.StreamKV(v) 229 if err != nil { 230 dvid.Errorf("Error in getting stream of data from keyvalue instance %q: %v\n", kvData.DataName(), err) 231 return 232 } 233 ctx := datastore.NewVersionedCtx(d, v) 234 numLoaded := 0 235 numFromKV := 0 236 for kv := range ch { 237 key := string(kv.K) 238 numFromKV++ 239 240 // Handle metadata string keys 241 switch key { 242 case JSONSchema.String(): 243 dvid.Infof("Transferring metadata %q from keyvalue instance %q to neuronjson instance %q", 244 key, kvData.DataName(), d.DataName()) 245 if err := d.putMetadata(ctx, kv.V, JSONSchema); err != nil { 246 dvid.Errorf("Unable to handle JSON schema metadata transfer, skipping: %v\n", err) 247 } 248 continue 249 case NeuSchema.String(): 250 dvid.Infof("Transferring metadata %q from keyvalue instance %q to neuronjson instance %q", 251 key, kvData.DataName(), d.DataName()) 252 if err := d.putMetadata(ctx, kv.V, NeuSchema); err != nil { 253 dvid.Errorf("Unable to handle neutu/neu3 schema metadata transfer, skipping: %v\n", err) 254 } 255 continue 256 case NeuSchemaBatch.String(): 257 dvid.Infof("Transferring metadata %q from keyvalue instance %q to neuronjson instance %q", 258 key, kvData.DataName(), d.DataName()) 259 if err := d.putMetadata(ctx, kv.V, NeuSchemaBatch); err != nil { 260 dvid.Errorf("Unable to handle neutu/neu3 batch schema metadata transfer, skipping: %v\n", err) 261 } 262 continue 263 } 264 265 // Handle numeric keys for neuron annotations 266 bodyid, err := strconv.ParseUint(key, 10, 64) 267 if err != nil { 268 dvid.Errorf("Received non-integer key %q during neuronjson load from keyvalue: ignored\n", key) 269 continue 270 } 271 272 // a) Persist to storage first 273 tk, err := NewTKey(key) 274 if err != nil { 275 dvid.Errorf("unable to encode neuronjson %q key %q, skipping: %v\n", d.DataName(), key, err) 276 continue 277 } 278 if err := db.Put(ctx, tk, kv.V); err != nil { 279 dvid.Errorf("unable to persist neuronjson %q key %s annotation, skipping: %v\n", d.DataName(), key, err) 280 continue 281 } 282 283 // b) Add to in-memory annotations db 284 var annotation NeuronJSON 285 if err := json.Unmarshal(kv.V, &annotation); err != nil { 286 dvid.Errorf("Unable to decode annotation for bodyid %d, skipping: %v\n", bodyid, err) 287 continue 288 } 289 mdb.addAnnotation(bodyid, annotation) 290 291 numLoaded++ 292 if numLoaded%1000 == 0 { 293 tlog.Infof("Loaded %d annotations into neuronjson instance %q", numLoaded, d.DataName()) 294 } 295 } 296 sort.Slice(mdb.ids, func(i, j int) bool { return mdb.ids[i] < mdb.ids[j] }) 297 errored := numFromKV - numLoaded 298 tlog.Infof("Completed loading of %d annotations into neuronjson instance %q (%d skipped)", 299 numLoaded, d.DataName(), errored) 300 }