github.com/janelia-flyem/dvid@v1.0.0/datatype/imageblk/ingest.go (about)

     1  /*
     2  	Functions that support ingestion of data into persistent image blocks.
     3  */
     4  
     5  package imageblk
     6  
     7  import (
     8  	"fmt"
     9  	"sync"
    10  
    11  	"github.com/janelia-flyem/dvid/datastore"
    12  	"github.com/janelia-flyem/dvid/dvid"
    13  	"github.com/janelia-flyem/dvid/server"
    14  	"github.com/janelia-flyem/dvid/storage"
    15  )
    16  
    17  // LoadImages bulk loads images using different techniques if it is a multidimensional
    18  // file like HDF5 or a sequence of PNG/JPG/TIF images.
    19  func (d *Data) LoadImages(v dvid.VersionID, offset dvid.Point, filenames []string) error {
    20  	if len(filenames) == 0 {
    21  		return nil
    22  	}
    23  	timedLog := dvid.NewTimeLog()
    24  
    25  	// We only want one PUT on given version for given data to prevent interleaved
    26  	// chunk PUTs that could potentially overwrite slice modifications.
    27  	ctx := storage.NewDataContext(d, v)
    28  	loadMutex := ctx.Mutex()
    29  	loadMutex.Lock()
    30  
    31  	// default extents
    32  	vctx := datastore.NewVersionedCtx(d, v)
    33  	extents, err := d.GetExtents(vctx)
    34  	if err != nil {
    35  		loadMutex.Unlock()
    36  		return err
    37  	}
    38  
    39  	// Handle cleanup given multiple goroutines still writing data.
    40  	load := &bulkLoadInfo{filenames: filenames, versionID: v, offset: offset}
    41  	defer func() {
    42  		loadMutex.Unlock()
    43  
    44  		if load.extentChanged.Value() {
    45  			// blocking call to save extents
    46  			d.PostExtents(vctx, extents.StartPoint(), extents.EndPoint())
    47  		}
    48  	}()
    49  
    50  	// Use different loading techniques if we have a potentially multidimensional HDF5 file
    51  	// or many 2d images.
    52  	if dvid.Filename(filenames[0]).HasExtensionPrefix("hdf", "h5") {
    53  		err = d.loadHDF(load)
    54  	} else {
    55  		err = d.loadXYImages(load, &extents)
    56  	}
    57  
    58  	if err != nil {
    59  		timedLog.Infof("RPC load of %d files had error: %v\n", err)
    60  	} else {
    61  		timedLog.Infof("RPC load of %d files completed.\n", len(filenames))
    62  	}
    63  	return err
    64  }
    65  
    66  // Optimized bulk loading of XY images by loading all slices for a block before processing.
    67  // Trades off memory for speed.
    68  func (d *Data) loadXYImages(load *bulkLoadInfo, extents *dvid.Extents) error {
    69  	// Load first slice, get dimensions, allocate blocks for whole slice.
    70  	// Note: We don't need to lock the block slices because goroutines do NOT
    71  	// access the same elements of a slice.
    72  	const numLayers = 2
    73  	var numBlocks int
    74  	var blocks [numLayers]storage.TKeyValues
    75  	var layerTransferred, layerWritten [numLayers]sync.WaitGroup
    76  	var waitForWrites sync.WaitGroup
    77  
    78  	curBlocks := 0
    79  	blockSize := d.BlockSize()
    80  	blockBytes := blockSize.Prod() * int64(d.Values.BytesPerElement())
    81  
    82  	// Iterate through XY slices batched into the Z length of blocks.
    83  	fileNum := 1
    84  	errs := make(chan error, 10) // keep track of async errors.
    85  	for _, filename := range load.filenames {
    86  		server.BlockOnInteractiveRequests("imageblk.loadXYImages")
    87  
    88  		timedLog := dvid.NewTimeLog()
    89  
    90  		zInBlock := load.offset.Value(2) % blockSize.Value(2)
    91  		firstSlice := fileNum == 1
    92  		lastSlice := fileNum == len(load.filenames)
    93  		firstSliceInBlock := firstSlice || zInBlock == 0
    94  		lastSliceInBlock := lastSlice || zInBlock == blockSize.Value(2)-1
    95  		lastBlocks := fileNum+int(blockSize.Value(2)) > len(load.filenames)
    96  
    97  		// Load images synchronously
    98  		vox, err := d.loadXYImage(filename, load.offset)
    99  		if err != nil {
   100  			return err
   101  		}
   102  
   103  		// Allocate blocks and/or load old block data if first/last XY blocks.
   104  		// Note: Slices are only zeroed out on first and last slice with assumption
   105  		// that ExtData is packed in XY footprint (values cover full extent).
   106  		// If that is NOT the case, we need to zero out blocks for each block layer.
   107  		if fileNum == 1 || (lastBlocks && firstSliceInBlock) {
   108  			numBlocks = dvid.GetNumBlocks(vox, blockSize)
   109  			if fileNum == 1 {
   110  				for layer := 0; layer < numLayers; layer++ {
   111  					blocks[layer] = make(storage.TKeyValues, numBlocks, numBlocks)
   112  					for b := 0; b < numBlocks; b++ {
   113  						blocks[layer][b].V = d.BackgroundBlock()
   114  					}
   115  				}
   116  				var bufSize uint64 = uint64(blockBytes) * uint64(numBlocks) * uint64(numLayers) / 1000000
   117  				dvid.Debugf("Allocated %d MB for buffers.\n", bufSize)
   118  			} else {
   119  				blocks[curBlocks] = make(storage.TKeyValues, numBlocks, numBlocks)
   120  				for b := 0; b < numBlocks; b++ {
   121  					blocks[curBlocks][b].V = d.BackgroundBlock()
   122  				}
   123  			}
   124  			err = d.LoadOldBlocks(load.versionID, vox, blocks[curBlocks])
   125  			if err != nil {
   126  				return err
   127  			}
   128  		}
   129  
   130  		// Transfer data between external<->internal blocks asynchronously
   131  		layerTransferred[curBlocks].Add(1)
   132  		go func(vox *Voxels, curBlocks int) {
   133  			// Track point extents
   134  			if extents.AdjustPoints(vox.StartPoint(), vox.EndPoint()) {
   135  				load.extentChanged.SetTrue()
   136  			}
   137  
   138  			// Process an XY image (slice).
   139  			err := d.writeXYImage(load.versionID, vox, blocks[curBlocks])
   140  			if err != nil {
   141  				err = fmt.Errorf("Error writing XY image: %v\n", err)
   142  				if len(errs) < 10 {
   143  					errs <- err
   144  				}
   145  				return
   146  			}
   147  			layerTransferred[curBlocks].Done()
   148  		}(vox, curBlocks)
   149  
   150  		// If this is the end of a block (or filenames), wait until all goroutines complete,
   151  		// then asynchronously write blocks.
   152  		if lastSliceInBlock {
   153  			waitForWrites.Add(1)
   154  			layerWritten[curBlocks].Add(1)
   155  			go func(curBlocks int) {
   156  				layerTransferred[curBlocks].Wait()
   157  				dvid.Debugf("Writing block buffer %d using %s and %s...\n",
   158  					curBlocks, d.Compression(), d.Checksum())
   159  				err := d.writeBlocks(load.versionID, blocks[curBlocks], &layerWritten[curBlocks], &waitForWrites)
   160  				if err != nil {
   161  					err = fmt.Errorf("Error in async write of voxel blocks: %v", err)
   162  					if len(errs) < 10 {
   163  						errs <- err
   164  					}
   165  				}
   166  			}(curBlocks)
   167  			// We can't move to buffer X until all blocks from buffer X have already been written.
   168  			curBlocks = (curBlocks + 1) % numLayers
   169  			dvid.Debugf("Waiting for layer %d to be written before reusing layer %d blocks\n",
   170  				curBlocks, curBlocks)
   171  			layerWritten[curBlocks].Wait()
   172  			dvid.Debugf("Using layer %d...\n", curBlocks)
   173  		}
   174  
   175  		fileNum++
   176  		load.offset = load.offset.Add(dvid.Point3d{0, 0, 1})
   177  		timedLog.Infof("Loaded %s slice %s", d.DataName(), vox)
   178  	}
   179  	waitForWrites.Wait()
   180  	var firsterr error
   181  	if len(errs) > 0 {
   182  		dvid.Errorf("Had at least %d errors in image loading:\n", len(errs))
   183  		for err := range errs {
   184  			dvid.Errorf("  Error: %v\n", err)
   185  			if firsterr == nil {
   186  				firsterr = err
   187  			}
   188  		}
   189  	}
   190  	return firsterr
   191  }
   192  
   193  // Loads a XY oriented image at given offset, returning Voxels.
   194  func (d *Data) loadXYImage(filename string, offset dvid.Point) (*Voxels, error) {
   195  	img, _, err := dvid.GoImageFromFile(filename)
   196  	if err != nil {
   197  		return nil, err
   198  	}
   199  	slice, err := dvid.NewOrthogSlice(dvid.XY, offset, dvid.RectSize(img.Bounds()))
   200  	if err != nil {
   201  		return nil, fmt.Errorf("Unable to determine slice: %v", err)
   202  	}
   203  	vox, err := d.NewVoxels(slice, img)
   204  	if err != nil {
   205  		return nil, err
   206  	}
   207  	storage.FileBytesRead <- len(vox.Data())
   208  	return vox, nil
   209  }
   210  
   211  func (d *Data) loadHDF(load *bulkLoadInfo) error {
   212  	return fmt.Errorf("DVID currently does not support HDF5 image import.")
   213  	// TODO: Use a DVID-specific HDF5 loader that works off HDF5 C library.
   214  	/*
   215  			for _, filename := range load.filenames {
   216  				f, err := hdf5.OpenFile(filename, hdf5.F_ACC_RDONLY)
   217  				if err != nil {
   218  					return err
   219  				}
   220  				defer f.Close()
   221  
   222  				fmt.Printf("Opened HDF5 file: %s\n", filename)
   223  				numobj, err := f.NumObjects()
   224  				fmt.Printf("Number of objects: %d\n", numobj)
   225  				for n := uint(0); n < numobj; n++ {
   226  					name, err := f.ObjectNameByIndex(n)
   227  					if err != nil {
   228  						return err
   229  					}
   230  					fmt.Printf("Object name %d: %s\n", n, name)
   231  					repo, err := f.OpenRepo(name)
   232  					if err != nil {
   233  						return err
   234  					}
   235  					dtype, err := repo.Datatype()
   236  					if err != nil {
   237  						return err
   238  					}
   239  					fmt.Printf("Type size: %d\n", dtype.Size())
   240  					dataspace := repo.Space()
   241  					dims, maxdims, err := dataspace.SimpleExtentDims()
   242  					if err != nil {
   243  						return err
   244  					}
   245  					fmt.Printf("Dims: %s\n", dims)
   246  					fmt.Printf("Maxdims: %s\n", maxdims)
   247  					data := make([]uint8, dims[0]*dims[1]*dims[2])
   248  					err = repo.Read(&data)
   249  					if err != nil {
   250  						return err
   251  					}
   252  					fmt.Printf("Read %d bytes\n", len(data))
   253  				}
   254  			}
   255  		return nil
   256  	*/
   257  }