github.com/janelia-flyem/dvid@v1.0.0/datatype/imageblk/ingest.go (about) 1 /* 2 Functions that support ingestion of data into persistent image blocks. 3 */ 4 5 package imageblk 6 7 import ( 8 "fmt" 9 "sync" 10 11 "github.com/janelia-flyem/dvid/datastore" 12 "github.com/janelia-flyem/dvid/dvid" 13 "github.com/janelia-flyem/dvid/server" 14 "github.com/janelia-flyem/dvid/storage" 15 ) 16 17 // LoadImages bulk loads images using different techniques if it is a multidimensional 18 // file like HDF5 or a sequence of PNG/JPG/TIF images. 19 func (d *Data) LoadImages(v dvid.VersionID, offset dvid.Point, filenames []string) error { 20 if len(filenames) == 0 { 21 return nil 22 } 23 timedLog := dvid.NewTimeLog() 24 25 // We only want one PUT on given version for given data to prevent interleaved 26 // chunk PUTs that could potentially overwrite slice modifications. 27 ctx := storage.NewDataContext(d, v) 28 loadMutex := ctx.Mutex() 29 loadMutex.Lock() 30 31 // default extents 32 vctx := datastore.NewVersionedCtx(d, v) 33 extents, err := d.GetExtents(vctx) 34 if err != nil { 35 loadMutex.Unlock() 36 return err 37 } 38 39 // Handle cleanup given multiple goroutines still writing data. 40 load := &bulkLoadInfo{filenames: filenames, versionID: v, offset: offset} 41 defer func() { 42 loadMutex.Unlock() 43 44 if load.extentChanged.Value() { 45 // blocking call to save extents 46 d.PostExtents(vctx, extents.StartPoint(), extents.EndPoint()) 47 } 48 }() 49 50 // Use different loading techniques if we have a potentially multidimensional HDF5 file 51 // or many 2d images. 52 if dvid.Filename(filenames[0]).HasExtensionPrefix("hdf", "h5") { 53 err = d.loadHDF(load) 54 } else { 55 err = d.loadXYImages(load, &extents) 56 } 57 58 if err != nil { 59 timedLog.Infof("RPC load of %d files had error: %v\n", err) 60 } else { 61 timedLog.Infof("RPC load of %d files completed.\n", len(filenames)) 62 } 63 return err 64 } 65 66 // Optimized bulk loading of XY images by loading all slices for a block before processing. 67 // Trades off memory for speed. 68 func (d *Data) loadXYImages(load *bulkLoadInfo, extents *dvid.Extents) error { 69 // Load first slice, get dimensions, allocate blocks for whole slice. 70 // Note: We don't need to lock the block slices because goroutines do NOT 71 // access the same elements of a slice. 72 const numLayers = 2 73 var numBlocks int 74 var blocks [numLayers]storage.TKeyValues 75 var layerTransferred, layerWritten [numLayers]sync.WaitGroup 76 var waitForWrites sync.WaitGroup 77 78 curBlocks := 0 79 blockSize := d.BlockSize() 80 blockBytes := blockSize.Prod() * int64(d.Values.BytesPerElement()) 81 82 // Iterate through XY slices batched into the Z length of blocks. 83 fileNum := 1 84 errs := make(chan error, 10) // keep track of async errors. 85 for _, filename := range load.filenames { 86 server.BlockOnInteractiveRequests("imageblk.loadXYImages") 87 88 timedLog := dvid.NewTimeLog() 89 90 zInBlock := load.offset.Value(2) % blockSize.Value(2) 91 firstSlice := fileNum == 1 92 lastSlice := fileNum == len(load.filenames) 93 firstSliceInBlock := firstSlice || zInBlock == 0 94 lastSliceInBlock := lastSlice || zInBlock == blockSize.Value(2)-1 95 lastBlocks := fileNum+int(blockSize.Value(2)) > len(load.filenames) 96 97 // Load images synchronously 98 vox, err := d.loadXYImage(filename, load.offset) 99 if err != nil { 100 return err 101 } 102 103 // Allocate blocks and/or load old block data if first/last XY blocks. 104 // Note: Slices are only zeroed out on first and last slice with assumption 105 // that ExtData is packed in XY footprint (values cover full extent). 106 // If that is NOT the case, we need to zero out blocks for each block layer. 107 if fileNum == 1 || (lastBlocks && firstSliceInBlock) { 108 numBlocks = dvid.GetNumBlocks(vox, blockSize) 109 if fileNum == 1 { 110 for layer := 0; layer < numLayers; layer++ { 111 blocks[layer] = make(storage.TKeyValues, numBlocks, numBlocks) 112 for b := 0; b < numBlocks; b++ { 113 blocks[layer][b].V = d.BackgroundBlock() 114 } 115 } 116 var bufSize uint64 = uint64(blockBytes) * uint64(numBlocks) * uint64(numLayers) / 1000000 117 dvid.Debugf("Allocated %d MB for buffers.\n", bufSize) 118 } else { 119 blocks[curBlocks] = make(storage.TKeyValues, numBlocks, numBlocks) 120 for b := 0; b < numBlocks; b++ { 121 blocks[curBlocks][b].V = d.BackgroundBlock() 122 } 123 } 124 err = d.LoadOldBlocks(load.versionID, vox, blocks[curBlocks]) 125 if err != nil { 126 return err 127 } 128 } 129 130 // Transfer data between external<->internal blocks asynchronously 131 layerTransferred[curBlocks].Add(1) 132 go func(vox *Voxels, curBlocks int) { 133 // Track point extents 134 if extents.AdjustPoints(vox.StartPoint(), vox.EndPoint()) { 135 load.extentChanged.SetTrue() 136 } 137 138 // Process an XY image (slice). 139 err := d.writeXYImage(load.versionID, vox, blocks[curBlocks]) 140 if err != nil { 141 err = fmt.Errorf("Error writing XY image: %v\n", err) 142 if len(errs) < 10 { 143 errs <- err 144 } 145 return 146 } 147 layerTransferred[curBlocks].Done() 148 }(vox, curBlocks) 149 150 // If this is the end of a block (or filenames), wait until all goroutines complete, 151 // then asynchronously write blocks. 152 if lastSliceInBlock { 153 waitForWrites.Add(1) 154 layerWritten[curBlocks].Add(1) 155 go func(curBlocks int) { 156 layerTransferred[curBlocks].Wait() 157 dvid.Debugf("Writing block buffer %d using %s and %s...\n", 158 curBlocks, d.Compression(), d.Checksum()) 159 err := d.writeBlocks(load.versionID, blocks[curBlocks], &layerWritten[curBlocks], &waitForWrites) 160 if err != nil { 161 err = fmt.Errorf("Error in async write of voxel blocks: %v", err) 162 if len(errs) < 10 { 163 errs <- err 164 } 165 } 166 }(curBlocks) 167 // We can't move to buffer X until all blocks from buffer X have already been written. 168 curBlocks = (curBlocks + 1) % numLayers 169 dvid.Debugf("Waiting for layer %d to be written before reusing layer %d blocks\n", 170 curBlocks, curBlocks) 171 layerWritten[curBlocks].Wait() 172 dvid.Debugf("Using layer %d...\n", curBlocks) 173 } 174 175 fileNum++ 176 load.offset = load.offset.Add(dvid.Point3d{0, 0, 1}) 177 timedLog.Infof("Loaded %s slice %s", d.DataName(), vox) 178 } 179 waitForWrites.Wait() 180 var firsterr error 181 if len(errs) > 0 { 182 dvid.Errorf("Had at least %d errors in image loading:\n", len(errs)) 183 for err := range errs { 184 dvid.Errorf(" Error: %v\n", err) 185 if firsterr == nil { 186 firsterr = err 187 } 188 } 189 } 190 return firsterr 191 } 192 193 // Loads a XY oriented image at given offset, returning Voxels. 194 func (d *Data) loadXYImage(filename string, offset dvid.Point) (*Voxels, error) { 195 img, _, err := dvid.GoImageFromFile(filename) 196 if err != nil { 197 return nil, err 198 } 199 slice, err := dvid.NewOrthogSlice(dvid.XY, offset, dvid.RectSize(img.Bounds())) 200 if err != nil { 201 return nil, fmt.Errorf("Unable to determine slice: %v", err) 202 } 203 vox, err := d.NewVoxels(slice, img) 204 if err != nil { 205 return nil, err 206 } 207 storage.FileBytesRead <- len(vox.Data()) 208 return vox, nil 209 } 210 211 func (d *Data) loadHDF(load *bulkLoadInfo) error { 212 return fmt.Errorf("DVID currently does not support HDF5 image import.") 213 // TODO: Use a DVID-specific HDF5 loader that works off HDF5 C library. 214 /* 215 for _, filename := range load.filenames { 216 f, err := hdf5.OpenFile(filename, hdf5.F_ACC_RDONLY) 217 if err != nil { 218 return err 219 } 220 defer f.Close() 221 222 fmt.Printf("Opened HDF5 file: %s\n", filename) 223 numobj, err := f.NumObjects() 224 fmt.Printf("Number of objects: %d\n", numobj) 225 for n := uint(0); n < numobj; n++ { 226 name, err := f.ObjectNameByIndex(n) 227 if err != nil { 228 return err 229 } 230 fmt.Printf("Object name %d: %s\n", n, name) 231 repo, err := f.OpenRepo(name) 232 if err != nil { 233 return err 234 } 235 dtype, err := repo.Datatype() 236 if err != nil { 237 return err 238 } 239 fmt.Printf("Type size: %d\n", dtype.Size()) 240 dataspace := repo.Space() 241 dims, maxdims, err := dataspace.SimpleExtentDims() 242 if err != nil { 243 return err 244 } 245 fmt.Printf("Dims: %s\n", dims) 246 fmt.Printf("Maxdims: %s\n", maxdims) 247 data := make([]uint8, dims[0]*dims[1]*dims[2]) 248 err = repo.Read(&data) 249 if err != nil { 250 return err 251 } 252 fmt.Printf("Read %d bytes\n", len(data)) 253 } 254 } 255 return nil 256 */ 257 }