github.com/janelia-flyem/dvid@v1.0.0/datatype/neuronjson/version_files.go (about)

     1  package neuronjson
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path/filepath"
     7  	"sync"
     8  
     9  	"github.com/janelia-flyem/dvid/datastore"
    10  	"github.com/janelia-flyem/dvid/dvid"
    11  	"github.com/janelia-flyem/dvid/storage"
    12  )
    13  
    14  // versionChanges writes JSON file for all changes by versions, including tombstones.
    15  func (d *Data) versionChanges(request datastore.Request, reply *datastore.Response) error {
    16  	if len(request.Command) < 5 {
    17  		return fmt.Errorf("path to output file must be specified after 'versionchanges'")
    18  	}
    19  	var uuidStr, dataName, cmdStr, filePath string
    20  	request.CommandArgs(1, &uuidStr, &dataName, &cmdStr, &filePath)
    21  
    22  	go d.writeVersions(filePath)
    23  
    24  	reply.Output = []byte(fmt.Sprintf("Started writing version changes of neuronjson instance %q into %s ...\n",
    25  		d.DataName(), filePath))
    26  	return nil
    27  }
    28  
    29  type versionFiles struct {
    30  	fmap map[dvid.UUID]*os.File
    31  	path string
    32  }
    33  
    34  func initVersionFiles(path string) (vf *versionFiles, err error) {
    35  	if _, err = os.Stat(path); os.IsNotExist(err) {
    36  		dvid.Infof("creating path for version files: %s\n", path)
    37  		if err = os.MkdirAll(path, 0744); err != nil {
    38  			err = fmt.Errorf("can't make directory at %s: %v", path, err)
    39  			return
    40  		}
    41  	} else if err != nil {
    42  		err = fmt.Errorf("error initializing version files directory: %v", err)
    43  		return
    44  	}
    45  	vf = &versionFiles{
    46  		fmap: make(map[dvid.UUID]*os.File),
    47  		path: path,
    48  	}
    49  	return
    50  }
    51  
    52  func (vf *versionFiles) write(uuid dvid.UUID, data string) (err error) {
    53  	f, found := vf.fmap[uuid]
    54  	if !found {
    55  		path := filepath.Join(vf.path, string(uuid)+".json")
    56  		f, err = os.OpenFile(path, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0755)
    57  		if err != nil {
    58  			return
    59  		}
    60  		vf.fmap[uuid] = f
    61  		data = "[" + data
    62  	} else {
    63  		data = "," + data
    64  	}
    65  	_, err = f.Write([]byte(data))
    66  	return
    67  }
    68  
    69  func (vf *versionFiles) close() {
    70  	for uuid, f := range vf.fmap {
    71  		if _, err := f.Write([]byte("]")); err != nil {
    72  			dvid.Errorf("unable to close list for uuid %s version file: %v\n", uuid, err)
    73  		}
    74  		f.Close()
    75  	}
    76  }
    77  
    78  // writeVersions creates a file per version with all changes, including tombstones, for that version.
    79  // Because the data is streamed to appropriate files during full database scan, very little has to
    80  // be kept in memory.
    81  func (d *Data) writeVersions(filePath string) error {
    82  	timedLog := dvid.NewTimeLog()
    83  	db, err := datastore.GetOrderedKeyValueDB(d)
    84  	if err != nil {
    85  		return err
    86  	}
    87  
    88  	vf, err := initVersionFiles(filePath)
    89  	if err != nil {
    90  		return err
    91  	}
    92  	wg := new(sync.WaitGroup)
    93  	wg.Add(1)
    94  	ch := make(chan *storage.KeyValue, 100)
    95  
    96  	var numAnnotations, numTombstones uint64
    97  	go func(wg *sync.WaitGroup, ch chan *storage.KeyValue) {
    98  		for {
    99  			kv := <-ch
   100  			if kv == nil {
   101  				wg.Done()
   102  				break
   103  			}
   104  
   105  			_, versionID, _, err := storage.DataKeyToLocalIDs(kv.K)
   106  			if err != nil {
   107  				dvid.Errorf("GetAllVersions error trying to parse data key %x: %v\n", kv.K, err)
   108  				continue
   109  			}
   110  			uuid, err := datastore.UUIDFromVersion(versionID)
   111  			if err != nil {
   112  				dvid.Errorf("GetAllVersions error trying to get UUID from version %d: %v", versionID, err)
   113  				continue
   114  			}
   115  
   116  			// append to the appropriate uuid in map of annotations by version
   117  			if kv.K.IsTombstone() {
   118  				numTombstones++
   119  				tk, err := storage.TKeyFromKey(kv.K)
   120  				if err != nil {
   121  					dvid.Errorf("GetAllVersions error trying to parse tombstone key %x: %v\n", kv.K, err)
   122  					continue
   123  				}
   124  				bodyid, err := DecodeTKey(tk)
   125  				if err != nil {
   126  					dvid.Errorf("GetAllVersions error trying to decode tombstone key %x: %v\n", kv.K, err)
   127  					continue
   128  				}
   129  				vf.write(uuid, fmt.Sprintf(`{"bodyid":%s, "tombstone":true}`, bodyid))
   130  			} else {
   131  				numAnnotations++
   132  				vf.write(uuid, string(kv.V))
   133  			}
   134  
   135  			if (numTombstones+numAnnotations)%10000 == 0 {
   136  				timedLog.Infof("Getting all neuronjson versions, instance %q, %d annotations, %d tombstones across %d versions",
   137  					d.DataName(), numAnnotations, numTombstones, len(vf.fmap))
   138  			}
   139  		}
   140  		vf.close()
   141  	}(wg, ch)
   142  
   143  	ctx := storage.NewDataContext(d, 0)
   144  	begKey := ctx.ConstructKeyVersion(MinAnnotationTKey, 0)
   145  	endKey := ctx.ConstructKeyVersion(MaxAnnotationTKey, 0) // version doesn't matter due to max prefix
   146  	if err := db.RawRangeQuery(begKey, endKey, false, ch, nil); err != nil {
   147  		return err
   148  	}
   149  	wg.Wait()
   150  
   151  	timedLog.Infof("Finished GetAllVersions for neuronjson %q, %d annotations, %d tombstones across %d versions",
   152  		d.DataName(), numAnnotations, numTombstones, len(vf.fmap))
   153  	return nil
   154  }