github.com/janelia-flyem/dvid@v1.0.0/datatype/labelarray/ingest.go (about) 1 /* 2 Functions that support ingestion of data into persistent label blocks. 3 4 TODO: DRY this up compared to imageblk ingest once that gets converted to more general nD. 5 */ 6 7 package labelarray 8 9 import ( 10 "fmt" 11 "sync" 12 13 "github.com/janelia-flyem/dvid/datastore" 14 "github.com/janelia-flyem/dvid/datatype/imageblk" 15 "github.com/janelia-flyem/dvid/dvid" 16 "github.com/janelia-flyem/dvid/server" 17 "github.com/janelia-flyem/dvid/storage" 18 ) 19 20 // LoadImages bulk loads images using different techniques if it is a multidimensional 21 // file like HDF5 or a sequence of PNG/JPG/TIF images. 22 func (d *Data) LoadImages(v dvid.VersionID, offset dvid.Point, filenames []string) error { 23 if len(filenames) == 0 { 24 return nil 25 } 26 timedLog := dvid.NewTimeLog() 27 28 // We only want one PUT on given version for given data to prevent interleaved 29 // chunk PUTs that could potentially overwrite slice modifications. 30 ctx := storage.NewDataContext(d, v) 31 loadMutex := ctx.Mutex() 32 loadMutex.Lock() 33 vctx := datastore.NewVersionedCtx(d, v) 34 35 // Handle cleanup given multiple goroutines still writing data. 36 load := &bulkLoadInfo{filenames: filenames, versionID: v, offset: offset} 37 defer func() { 38 loadMutex.Unlock() 39 40 if load.extentChanged.Value() { 41 d.PostExtents(vctx, d.Extents().StartPoint(), d.Extents().EndPoint()) 42 err := datastore.SaveDataByVersion(v, d) 43 if err != nil { 44 dvid.Errorf("Error in trying to save repo for voxel extent change: %v\n", err) 45 } 46 } 47 }() 48 49 // Use different loading techniques if we have a potentially multidimensional HDF5 file 50 // or many 2d images. 51 var err error 52 if dvid.Filename(filenames[0]).HasExtensionPrefix("hdf", "h5") { 53 err = d.loadHDF(load) 54 } else { 55 err = d.loadXYImages(load) 56 } 57 if err != nil { 58 timedLog.Infof("RPC load of %d files had error: %v\n", err) 59 } else { 60 timedLog.Infof("RPC load of %d files completed.\n", len(filenames)) 61 } 62 return err 63 } 64 65 // Optimized bulk loading of XY images by loading all slices for a block before processing. 66 // Trades off memory for speed. 67 func (d *Data) loadXYImages(load *bulkLoadInfo) error { 68 // Load first slice, get dimensions, allocate blocks for whole slice. 69 // Note: We don't need to lock the block slices because goroutines do NOT 70 // access the same elements of a slice. 71 const numLayers = 2 72 var numBlocks int 73 var blocks [numLayers]storage.TKeyValues 74 var layerTransferred, layerWritten [numLayers]sync.WaitGroup 75 var waitForWrites sync.WaitGroup 76 77 curBlocks := 0 78 blockSize := d.BlockSize() 79 blockBytes := blockSize.Prod() * int64(d.Values.BytesPerElement()) 80 81 // Iterate through XY slices batched into the Z length of blocks. 82 fileNum := 1 83 errs := make(chan error, 10) // keep track of async errors. 84 for _, filename := range load.filenames { 85 server.BlockOnInteractiveRequests("imageblk.loadXYImages") 86 87 timedLog := dvid.NewTimeLog() 88 89 zInBlock := load.offset.Value(2) % blockSize.Value(2) 90 firstSlice := fileNum == 1 91 lastSlice := fileNum == len(load.filenames) 92 firstSliceInBlock := firstSlice || zInBlock == 0 93 lastSliceInBlock := lastSlice || zInBlock == blockSize.Value(2)-1 94 lastBlocks := fileNum+int(blockSize.Value(2)) > len(load.filenames) 95 96 // Load images synchronously 97 vox, err := d.loadXYImage(filename, load.offset) 98 if err != nil { 99 return err 100 } 101 102 // Allocate blocks and/or load old block data if first/last XY blocks. 103 // Note: Slices are only zeroed out on first and last slice with assumption 104 // that ExtData is packed in XY footprint (values cover full extent). 105 // If that is NOT the case, we need to zero out blocks for each block layer. 106 if fileNum == 1 || (lastBlocks && firstSliceInBlock) { 107 numBlocks = dvid.GetNumBlocks(vox, blockSize) 108 if fileNum == 1 { 109 for layer := 0; layer < numLayers; layer++ { 110 blocks[layer] = make(storage.TKeyValues, numBlocks, numBlocks) 111 for b := 0; b < numBlocks; b++ { 112 blocks[layer][b].V = d.BackgroundBlock() 113 } 114 } 115 var bufSize uint64 = uint64(blockBytes) * uint64(numBlocks) * uint64(numLayers) / 1000000 116 dvid.Debugf("Allocated %d MB for buffers.\n", bufSize) 117 } else { 118 blocks[curBlocks] = make(storage.TKeyValues, numBlocks, numBlocks) 119 for b := 0; b < numBlocks; b++ { 120 blocks[curBlocks][b].V = d.BackgroundBlock() 121 } 122 } 123 err = d.LoadOldBlocks(load.versionID, vox, blocks[curBlocks]) 124 if err != nil { 125 return err 126 } 127 } 128 129 // Transfer data between external<->internal blocks asynchronously 130 layerTransferred[curBlocks].Add(1) 131 go func(vox *imageblk.Voxels, curBlocks int) { 132 // Track point extents 133 if d.Extents().AdjustPoints(vox.StartPoint(), vox.EndPoint()) { 134 load.extentChanged.SetTrue() 135 } 136 137 // Process an XY image (slice). 138 changed, err := d.writeXYImage(load.versionID, vox, blocks[curBlocks]) 139 if err != nil { 140 err = fmt.Errorf("Error writing XY image: %v\n", err) 141 if len(errs) < 10 { 142 errs <- err 143 } 144 return 145 } 146 if changed { 147 load.extentChanged.SetTrue() 148 } 149 layerTransferred[curBlocks].Done() 150 }(vox, curBlocks) 151 152 // If this is the end of a block (or filenames), wait until all goroutines complete, 153 // then asynchronously write blocks. 154 if lastSliceInBlock { 155 waitForWrites.Add(1) 156 layerWritten[curBlocks].Add(1) 157 go func(curBlocks int) { 158 layerTransferred[curBlocks].Wait() 159 dvid.Debugf("Writing block buffer %d using %s and %s...\n", 160 curBlocks, d.Compression(), d.Checksum()) 161 err := d.writeBlocks(load.versionID, blocks[curBlocks], &layerWritten[curBlocks], &waitForWrites) 162 if err != nil { 163 err = fmt.Errorf("Error in async write of voxel blocks: %v", err) 164 if len(errs) < 10 { 165 errs <- err 166 } 167 } 168 }(curBlocks) 169 // We can't move to buffer X until all blocks from buffer X have already been written. 170 curBlocks = (curBlocks + 1) % numLayers 171 dvid.Debugf("Waiting for layer %d to be written before reusing layer %d blocks\n", 172 curBlocks, curBlocks) 173 layerWritten[curBlocks].Wait() 174 dvid.Debugf("Using layer %d...\n", curBlocks) 175 } 176 177 fileNum++ 178 load.offset = load.offset.Add(dvid.Point3d{0, 0, 1}) 179 timedLog.Infof("Loaded %s slice %s", d.DataName(), vox) 180 } 181 waitForWrites.Wait() 182 var firsterr error 183 if len(errs) > 0 { 184 dvid.Errorf("Had at least %d errors in image loading:\n", len(errs)) 185 for err := range errs { 186 dvid.Errorf(" Error: %v\n", err) 187 if firsterr == nil { 188 firsterr = err 189 } 190 } 191 } 192 return firsterr 193 } 194 195 // Loads a XY oriented image at given offset, returning Voxels. 196 func (d *Data) loadXYImage(filename string, offset dvid.Point) (*imageblk.Voxels, error) { 197 img, _, err := dvid.GoImageFromFile(filename) 198 if err != nil { 199 return nil, err 200 } 201 slice, err := dvid.NewOrthogSlice(dvid.XY, offset, dvid.RectSize(img.Bounds())) 202 if err != nil { 203 return nil, fmt.Errorf("Unable to determine slice: %v", err) 204 } 205 vox, err := d.NewVoxels(slice, img) 206 if err != nil { 207 return nil, err 208 } 209 storage.FileBytesRead <- len(vox.Data()) 210 return vox, nil 211 } 212 213 func (d *Data) loadHDF(load *bulkLoadInfo) error { 214 return fmt.Errorf("DVID currently does not support HDF5 image import.") 215 }