github.com/janelia-flyem/dvid@v1.0.0/datatype/labelarray/ingest.go (about)

     1  /*
     2  	Functions that support ingestion of data into persistent label blocks.
     3  
     4  	TODO: DRY this up compared to imageblk ingest once that gets converted to more general nD.
     5  */
     6  
     7  package labelarray
     8  
     9  import (
    10  	"fmt"
    11  	"sync"
    12  
    13  	"github.com/janelia-flyem/dvid/datastore"
    14  	"github.com/janelia-flyem/dvid/datatype/imageblk"
    15  	"github.com/janelia-flyem/dvid/dvid"
    16  	"github.com/janelia-flyem/dvid/server"
    17  	"github.com/janelia-flyem/dvid/storage"
    18  )
    19  
    20  // LoadImages bulk loads images using different techniques if it is a multidimensional
    21  // file like HDF5 or a sequence of PNG/JPG/TIF images.
    22  func (d *Data) LoadImages(v dvid.VersionID, offset dvid.Point, filenames []string) error {
    23  	if len(filenames) == 0 {
    24  		return nil
    25  	}
    26  	timedLog := dvid.NewTimeLog()
    27  
    28  	// We only want one PUT on given version for given data to prevent interleaved
    29  	// chunk PUTs that could potentially overwrite slice modifications.
    30  	ctx := storage.NewDataContext(d, v)
    31  	loadMutex := ctx.Mutex()
    32  	loadMutex.Lock()
    33  	vctx := datastore.NewVersionedCtx(d, v)
    34  
    35  	// Handle cleanup given multiple goroutines still writing data.
    36  	load := &bulkLoadInfo{filenames: filenames, versionID: v, offset: offset}
    37  	defer func() {
    38  		loadMutex.Unlock()
    39  
    40  		if load.extentChanged.Value() {
    41  			d.PostExtents(vctx, d.Extents().StartPoint(), d.Extents().EndPoint())
    42  			err := datastore.SaveDataByVersion(v, d)
    43  			if err != nil {
    44  				dvid.Errorf("Error in trying to save repo for voxel extent change: %v\n", err)
    45  			}
    46  		}
    47  	}()
    48  
    49  	// Use different loading techniques if we have a potentially multidimensional HDF5 file
    50  	// or many 2d images.
    51  	var err error
    52  	if dvid.Filename(filenames[0]).HasExtensionPrefix("hdf", "h5") {
    53  		err = d.loadHDF(load)
    54  	} else {
    55  		err = d.loadXYImages(load)
    56  	}
    57  	if err != nil {
    58  		timedLog.Infof("RPC load of %d files had error: %v\n", err)
    59  	} else {
    60  		timedLog.Infof("RPC load of %d files completed.\n", len(filenames))
    61  	}
    62  	return err
    63  }
    64  
    65  // Optimized bulk loading of XY images by loading all slices for a block before processing.
    66  // Trades off memory for speed.
    67  func (d *Data) loadXYImages(load *bulkLoadInfo) error {
    68  	// Load first slice, get dimensions, allocate blocks for whole slice.
    69  	// Note: We don't need to lock the block slices because goroutines do NOT
    70  	// access the same elements of a slice.
    71  	const numLayers = 2
    72  	var numBlocks int
    73  	var blocks [numLayers]storage.TKeyValues
    74  	var layerTransferred, layerWritten [numLayers]sync.WaitGroup
    75  	var waitForWrites sync.WaitGroup
    76  
    77  	curBlocks := 0
    78  	blockSize := d.BlockSize()
    79  	blockBytes := blockSize.Prod() * int64(d.Values.BytesPerElement())
    80  
    81  	// Iterate through XY slices batched into the Z length of blocks.
    82  	fileNum := 1
    83  	errs := make(chan error, 10) // keep track of async errors.
    84  	for _, filename := range load.filenames {
    85  		server.BlockOnInteractiveRequests("imageblk.loadXYImages")
    86  
    87  		timedLog := dvid.NewTimeLog()
    88  
    89  		zInBlock := load.offset.Value(2) % blockSize.Value(2)
    90  		firstSlice := fileNum == 1
    91  		lastSlice := fileNum == len(load.filenames)
    92  		firstSliceInBlock := firstSlice || zInBlock == 0
    93  		lastSliceInBlock := lastSlice || zInBlock == blockSize.Value(2)-1
    94  		lastBlocks := fileNum+int(blockSize.Value(2)) > len(load.filenames)
    95  
    96  		// Load images synchronously
    97  		vox, err := d.loadXYImage(filename, load.offset)
    98  		if err != nil {
    99  			return err
   100  		}
   101  
   102  		// Allocate blocks and/or load old block data if first/last XY blocks.
   103  		// Note: Slices are only zeroed out on first and last slice with assumption
   104  		// that ExtData is packed in XY footprint (values cover full extent).
   105  		// If that is NOT the case, we need to zero out blocks for each block layer.
   106  		if fileNum == 1 || (lastBlocks && firstSliceInBlock) {
   107  			numBlocks = dvid.GetNumBlocks(vox, blockSize)
   108  			if fileNum == 1 {
   109  				for layer := 0; layer < numLayers; layer++ {
   110  					blocks[layer] = make(storage.TKeyValues, numBlocks, numBlocks)
   111  					for b := 0; b < numBlocks; b++ {
   112  						blocks[layer][b].V = d.BackgroundBlock()
   113  					}
   114  				}
   115  				var bufSize uint64 = uint64(blockBytes) * uint64(numBlocks) * uint64(numLayers) / 1000000
   116  				dvid.Debugf("Allocated %d MB for buffers.\n", bufSize)
   117  			} else {
   118  				blocks[curBlocks] = make(storage.TKeyValues, numBlocks, numBlocks)
   119  				for b := 0; b < numBlocks; b++ {
   120  					blocks[curBlocks][b].V = d.BackgroundBlock()
   121  				}
   122  			}
   123  			err = d.LoadOldBlocks(load.versionID, vox, blocks[curBlocks])
   124  			if err != nil {
   125  				return err
   126  			}
   127  		}
   128  
   129  		// Transfer data between external<->internal blocks asynchronously
   130  		layerTransferred[curBlocks].Add(1)
   131  		go func(vox *imageblk.Voxels, curBlocks int) {
   132  			// Track point extents
   133  			if d.Extents().AdjustPoints(vox.StartPoint(), vox.EndPoint()) {
   134  				load.extentChanged.SetTrue()
   135  			}
   136  
   137  			// Process an XY image (slice).
   138  			changed, err := d.writeXYImage(load.versionID, vox, blocks[curBlocks])
   139  			if err != nil {
   140  				err = fmt.Errorf("Error writing XY image: %v\n", err)
   141  				if len(errs) < 10 {
   142  					errs <- err
   143  				}
   144  				return
   145  			}
   146  			if changed {
   147  				load.extentChanged.SetTrue()
   148  			}
   149  			layerTransferred[curBlocks].Done()
   150  		}(vox, curBlocks)
   151  
   152  		// If this is the end of a block (or filenames), wait until all goroutines complete,
   153  		// then asynchronously write blocks.
   154  		if lastSliceInBlock {
   155  			waitForWrites.Add(1)
   156  			layerWritten[curBlocks].Add(1)
   157  			go func(curBlocks int) {
   158  				layerTransferred[curBlocks].Wait()
   159  				dvid.Debugf("Writing block buffer %d using %s and %s...\n",
   160  					curBlocks, d.Compression(), d.Checksum())
   161  				err := d.writeBlocks(load.versionID, blocks[curBlocks], &layerWritten[curBlocks], &waitForWrites)
   162  				if err != nil {
   163  					err = fmt.Errorf("Error in async write of voxel blocks: %v", err)
   164  					if len(errs) < 10 {
   165  						errs <- err
   166  					}
   167  				}
   168  			}(curBlocks)
   169  			// We can't move to buffer X until all blocks from buffer X have already been written.
   170  			curBlocks = (curBlocks + 1) % numLayers
   171  			dvid.Debugf("Waiting for layer %d to be written before reusing layer %d blocks\n",
   172  				curBlocks, curBlocks)
   173  			layerWritten[curBlocks].Wait()
   174  			dvid.Debugf("Using layer %d...\n", curBlocks)
   175  		}
   176  
   177  		fileNum++
   178  		load.offset = load.offset.Add(dvid.Point3d{0, 0, 1})
   179  		timedLog.Infof("Loaded %s slice %s", d.DataName(), vox)
   180  	}
   181  	waitForWrites.Wait()
   182  	var firsterr error
   183  	if len(errs) > 0 {
   184  		dvid.Errorf("Had at least %d errors in image loading:\n", len(errs))
   185  		for err := range errs {
   186  			dvid.Errorf("  Error: %v\n", err)
   187  			if firsterr == nil {
   188  				firsterr = err
   189  			}
   190  		}
   191  	}
   192  	return firsterr
   193  }
   194  
   195  // Loads a XY oriented image at given offset, returning Voxels.
   196  func (d *Data) loadXYImage(filename string, offset dvid.Point) (*imageblk.Voxels, error) {
   197  	img, _, err := dvid.GoImageFromFile(filename)
   198  	if err != nil {
   199  		return nil, err
   200  	}
   201  	slice, err := dvid.NewOrthogSlice(dvid.XY, offset, dvid.RectSize(img.Bounds()))
   202  	if err != nil {
   203  		return nil, fmt.Errorf("Unable to determine slice: %v", err)
   204  	}
   205  	vox, err := d.NewVoxels(slice, img)
   206  	if err != nil {
   207  		return nil, err
   208  	}
   209  	storage.FileBytesRead <- len(vox.Data())
   210  	return vox, nil
   211  }
   212  
   213  func (d *Data) loadHDF(load *bulkLoadInfo) error {
   214  	return fmt.Errorf("DVID currently does not support HDF5 image import.")
   215  }