github.com/janelia-flyem/dvid@v1.0.0/datatype/neuronjson/memstore.go (about)

     1  package neuronjson
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"sort"
     7  	"strconv"
     8  	"strings"
     9  	"sync"
    10  
    11  	"github.com/janelia-flyem/dvid/datastore"
    12  	"github.com/janelia-flyem/dvid/datatype/keyvalue"
    13  	"github.com/janelia-flyem/dvid/dvid"
    14  	"github.com/janelia-flyem/dvid/storage"
    15  )
    16  
    17  // memdbs is a map of all in-memory key-value stores for a given data instance.
    18  // This data structure has to handle both branch HEAD dbs that will allow
    19  // mutations and track the HEAD of the branch, as well as UUID dbs that are
    20  // read-only.
    21  type memdbs struct {
    22  	static map[dvid.UUID]*memdb
    23  	head   map[string]*memdb
    24  	mu     sync.RWMutex
    25  }
    26  
    27  func (d *Data) getMemDBbyVersion(v dvid.VersionID) (db *memdb, found bool) {
    28  	if d.dbs == nil {
    29  		return
    30  	}
    31  	d.dbs.mu.RLock()
    32  	defer d.dbs.mu.RUnlock()
    33  	uuid, err := datastore.UUIDFromVersion(v)
    34  	if err != nil {
    35  		return
    36  	}
    37  
    38  	db, found = d.dbs.static[uuid]
    39  	if found {
    40  		dvid.Infof("Found static memdb for version %d, uuid %s\n", v, uuid)
    41  		return
    42  	}
    43  	for branch := range d.dbs.head {
    44  		_, branchV, err := datastore.GetBranchHead(uuid, branch)
    45  		if err == nil && branchV == v {
    46  			dvid.Infof("Found head memdb for branch %s, version %d, uuid %s\n", branch, v, uuid)
    47  			return d.dbs.head[branch], true
    48  		}
    49  		dvid.Infof("Didn't find memdb for branch %s, version %d, uuid %s, found branch %d %t\n", branch, v, uuid, branchV, found)
    50  	}
    51  	return
    52  }
    53  
    54  // in-memory neuron annotations with sorted body id list for optional sorted iteration.
    55  type memdb struct {
    56  	data   map[uint64]NeuronJSON
    57  	ids    []uint64            // sorted list of body ids
    58  	fields map[string]struct{} // list of all fields among the annotations
    59  	mu     sync.RWMutex
    60  }
    61  
    62  // initializes the in-memory dbs for the given list of UUIDs + branch names in
    63  // addition to the default HEAD of main/master branch.
    64  func (d *Data) initMemoryDB(versions []string) error {
    65  	dbs := &memdbs{
    66  		static: make(map[dvid.UUID]*memdb),
    67  		head:   make(map[string]*memdb),
    68  	}
    69  	versions = append(versions, ":master")
    70  	dvid.Infof("Initializing in-memory dbs for neuronjson %q with versions %v\n", d.DataName(), versions)
    71  	for _, versionSpec := range versions {
    72  		mdb := &memdb{
    73  			data:   make(map[uint64]NeuronJSON),
    74  			fields: make(map[string]struct{}),
    75  			ids:    []uint64{},
    76  		}
    77  		if strings.HasPrefix(versionSpec, ":") {
    78  			branch := strings.TrimPrefix(versionSpec, ":")
    79  			dbs.head[branch] = mdb
    80  			_, v, err := datastore.GetBranchHead(d.RootUUID(), branch)
    81  			if err != nil {
    82  				dvid.Infof("could not find branch %q specified for neuronjson %q in-memory db: %v",
    83  					branch, d.DataName(), err)
    84  			} else if err := d.loadMemDB(v, mdb); err != nil {
    85  				return err
    86  			}
    87  		} else {
    88  			uuid, v, err := datastore.MatchingUUID(versionSpec)
    89  			if err != nil {
    90  				return err
    91  			}
    92  			if err := d.loadMemDB(v, mdb); err != nil {
    93  				return err
    94  			}
    95  			dbs.static[uuid] = mdb
    96  		}
    97  	}
    98  	d.dbsMu.Lock()
    99  	d.dbs = dbs
   100  	d.dbsMu.Unlock()
   101  	return nil
   102  }
   103  
   104  func (d *Data) loadMemDB(v dvid.VersionID, mdb *memdb) error {
   105  	ctx := datastore.NewVersionedCtx(d, v)
   106  	db, err := datastore.GetOrderedKeyValueDB(d)
   107  	if err != nil {
   108  		return fmt.Errorf("can't setup ordered keyvalue db for neuronjson %q: %v", d.DataName(), err)
   109  	}
   110  
   111  	tlog := dvid.NewTimeLog()
   112  	numLoaded := 0
   113  	err = db.ProcessRange(ctx, MinAnnotationTKey, MaxAnnotationTKey, &storage.ChunkOp{}, func(c *storage.Chunk) error {
   114  		if c == nil || c.TKeyValue == nil {
   115  			return nil
   116  		}
   117  		kv := c.TKeyValue
   118  		if kv.V == nil {
   119  			return nil
   120  		}
   121  		key, err := DecodeTKey(kv.K)
   122  		if err != nil {
   123  			return err
   124  		}
   125  
   126  		bodyid, err := strconv.ParseUint(key, 10, 64)
   127  		if err != nil {
   128  			return fmt.Errorf("received non-integer key %q during neuronjson load from database: %v", key, err)
   129  		}
   130  
   131  		var annotation NeuronJSON
   132  		if err := json.Unmarshal(kv.V, &annotation); err != nil {
   133  			return fmt.Errorf("unable to decode annotation for bodyid %d, skipping: %v", bodyid, err)
   134  		}
   135  		mdb.addAnnotation(bodyid, annotation)
   136  
   137  		numLoaded++
   138  		if numLoaded%1000 == 0 {
   139  			tlog.Infof("Loaded %d annotations into neuronjson instance %q, version id %d",
   140  				numLoaded, d.DataName(), v)
   141  		}
   142  		return nil
   143  	})
   144  	if err != nil {
   145  		return fmt.Errorf("error loading neuron annotations into in-memory db for neuronjson %q, version id %d: %v",
   146  			d.DataName(), v, err)
   147  	}
   148  	sort.Slice(mdb.ids, func(i, j int) bool { return mdb.ids[i] < mdb.ids[j] })
   149  	tlog.Infof("Completed loading of %d annotations into neuronjson instance %q version %d in-memory db",
   150  		numLoaded, d.DataName(), v)
   151  	return nil
   152  }
   153  
   154  // add bodyid to sorted in-memory list of bodyids
   155  func (mdb *memdb) addBodyID(bodyid uint64) {
   156  	i := sort.Search(len(mdb.ids), func(i int) bool { return mdb.ids[i] >= bodyid })
   157  	if i < len(mdb.ids) && mdb.ids[i] == bodyid {
   158  		return
   159  	}
   160  	mdb.ids = append(mdb.ids, 0)
   161  	copy(mdb.ids[i+1:], mdb.ids[i:])
   162  	mdb.ids[i] = bodyid
   163  }
   164  
   165  // delete bodyid from sorted in-memory list of bodyids
   166  func (mdb *memdb) deleteBodyID(bodyid uint64) {
   167  	i := sort.Search(len(mdb.ids), func(i int) bool { return mdb.ids[i] == bodyid })
   168  	if i == len(mdb.ids) {
   169  		return
   170  	}
   171  	mdb.ids = append(mdb.ids[:i], mdb.ids[i+1:]...)
   172  }
   173  
   174  // add an annotation to the in-memory DB in batch mode assuming ids are sorted later
   175  func (mdb *memdb) addAnnotation(bodyid uint64, annotation NeuronJSON) {
   176  	mdb.data[bodyid] = annotation
   177  	mdb.ids = append(mdb.ids, bodyid)
   178  	for field := range annotation {
   179  		mdb.fields[field] = struct{}{}
   180  	}
   181  }
   182  
   183  // importKV imports a keyvalue instance into the neuronjson instance.
   184  func (d *Data) importKV(request datastore.Request, reply *datastore.Response) error {
   185  	if len(request.Command) < 5 {
   186  		return fmt.Errorf("keyvalue instance name must be specified after importKV")
   187  	}
   188  	var uuidStr, dataName, cmdStr, kvName string
   189  	request.CommandArgs(1, &uuidStr, &dataName, &cmdStr, &kvName)
   190  
   191  	uuid, versionID, err := datastore.MatchingUUID(uuidStr)
   192  	if err != nil {
   193  		return err
   194  	}
   195  
   196  	sourceKV, err := keyvalue.GetByUUIDName(uuid, dvid.InstanceName(kvName))
   197  	if err != nil {
   198  		return err
   199  	}
   200  	go d.loadFromKV(versionID, sourceKV)
   201  
   202  	reply.Output = []byte(fmt.Sprintf("Started loading from keyvalue instance %q into neuronjson instance %q, uuid %s\n",
   203  		kvName, d.DataName(), uuidStr))
   204  	return nil
   205  }
   206  
   207  // kvType is an interface for keyvalue instances we wish to migrate to neuronjson.
   208  type kvType interface {
   209  	DataName() dvid.InstanceName
   210  	StreamKV(v dvid.VersionID) (chan storage.KeyValue, error)
   211  }
   212  
   213  // goroutine-friendly ingest from a keyvalue instance into main HEAD of neuronjson.
   214  func (d *Data) loadFromKV(v dvid.VersionID, kvData kvType) {
   215  	tlog := dvid.NewTimeLog()
   216  
   217  	db, err := datastore.GetKeyValueDB(d)
   218  	if err != nil {
   219  		dvid.Criticalf("unable to get keyvalue database: %v", err)
   220  		return
   221  	}
   222  	mdb, found := d.getMemDBbyVersion(v)
   223  	if !found {
   224  		dvid.Criticalf("unable to get in-memory database for neuronjson %q, version %d", d.DataName(), v)
   225  		return
   226  	}
   227  
   228  	ch, err := kvData.StreamKV(v)
   229  	if err != nil {
   230  		dvid.Errorf("Error in getting stream of data from keyvalue instance %q: %v\n", kvData.DataName(), err)
   231  		return
   232  	}
   233  	ctx := datastore.NewVersionedCtx(d, v)
   234  	numLoaded := 0
   235  	numFromKV := 0
   236  	for kv := range ch {
   237  		key := string(kv.K)
   238  		numFromKV++
   239  
   240  		// Handle metadata string keys
   241  		switch key {
   242  		case JSONSchema.String():
   243  			dvid.Infof("Transferring metadata %q from keyvalue instance %q to neuronjson instance %q",
   244  				key, kvData.DataName(), d.DataName())
   245  			if err := d.putMetadata(ctx, kv.V, JSONSchema); err != nil {
   246  				dvid.Errorf("Unable to handle JSON schema metadata transfer, skipping: %v\n", err)
   247  			}
   248  			continue
   249  		case NeuSchema.String():
   250  			dvid.Infof("Transferring metadata %q from keyvalue instance %q to neuronjson instance %q",
   251  				key, kvData.DataName(), d.DataName())
   252  			if err := d.putMetadata(ctx, kv.V, NeuSchema); err != nil {
   253  				dvid.Errorf("Unable to handle neutu/neu3 schema metadata transfer, skipping: %v\n", err)
   254  			}
   255  			continue
   256  		case NeuSchemaBatch.String():
   257  			dvid.Infof("Transferring metadata %q from keyvalue instance %q to neuronjson instance %q",
   258  				key, kvData.DataName(), d.DataName())
   259  			if err := d.putMetadata(ctx, kv.V, NeuSchemaBatch); err != nil {
   260  				dvid.Errorf("Unable to handle neutu/neu3 batch schema metadata transfer, skipping: %v\n", err)
   261  			}
   262  			continue
   263  		}
   264  
   265  		// Handle numeric keys for neuron annotations
   266  		bodyid, err := strconv.ParseUint(key, 10, 64)
   267  		if err != nil {
   268  			dvid.Errorf("Received non-integer key %q during neuronjson load from keyvalue: ignored\n", key)
   269  			continue
   270  		}
   271  
   272  		// a) Persist to storage first
   273  		tk, err := NewTKey(key)
   274  		if err != nil {
   275  			dvid.Errorf("unable to encode neuronjson %q key %q, skipping: %v\n", d.DataName(), key, err)
   276  			continue
   277  		}
   278  		if err := db.Put(ctx, tk, kv.V); err != nil {
   279  			dvid.Errorf("unable to persist neuronjson %q key %s annotation, skipping: %v\n", d.DataName(), key, err)
   280  			continue
   281  		}
   282  
   283  		// b) Add to in-memory annotations db
   284  		var annotation NeuronJSON
   285  		if err := json.Unmarshal(kv.V, &annotation); err != nil {
   286  			dvid.Errorf("Unable to decode annotation for bodyid %d, skipping: %v\n", bodyid, err)
   287  			continue
   288  		}
   289  		mdb.addAnnotation(bodyid, annotation)
   290  
   291  		numLoaded++
   292  		if numLoaded%1000 == 0 {
   293  			tlog.Infof("Loaded %d annotations into neuronjson instance %q", numLoaded, d.DataName())
   294  		}
   295  	}
   296  	sort.Slice(mdb.ids, func(i, j int) bool { return mdb.ids[i] < mdb.ids[j] })
   297  	errored := numFromKV - numLoaded
   298  	tlog.Infof("Completed loading of %d annotations into neuronjson instance %q (%d skipped)",
   299  		numLoaded, d.DataName(), errored)
   300  }