github.com/janelia-flyem/dvid@v1.0.0/datatype/imageblk/write.go (about)

     1  package imageblk
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"log"
     7  	"sync"
     8  
     9  	"github.com/janelia-flyem/dvid/datastore"
    10  	"github.com/janelia-flyem/dvid/dvid"
    11  	"github.com/janelia-flyem/dvid/server"
    12  	"github.com/janelia-flyem/dvid/storage"
    13  )
    14  
    15  // WriteBlock writes a subvolume or 2d image into a possibly intersecting block.
    16  func (v *Voxels) WriteBlock(block *storage.TKeyValue, blockSize dvid.Point) error {
    17  	return v.writeBlock(block, blockSize)
    18  }
    19  func (v *Voxels) writeBlock(block *storage.TKeyValue, blockSize dvid.Point) error {
    20  	if blockSize.NumDims() > 3 {
    21  		return fmt.Errorf("DVID voxel blocks currently only supports up to 3d, not 4+ dimensions")
    22  	}
    23  	blockBeg, dataBeg, dataEnd, err := v.ComputeTransform(block, blockSize)
    24  	if err != nil {
    25  		return err
    26  	}
    27  	data := v.Data()
    28  	bytesPerVoxel := int64(v.Values().BytesPerElement())
    29  
    30  	// Compute the strides (in bytes)
    31  	bX := int64(blockSize.Value(0)) * bytesPerVoxel
    32  	bY := int64(blockSize.Value(1)) * bX
    33  	dX := int64(v.Stride())
    34  
    35  	blockBegX := int64(blockBeg.Value(0))
    36  	blockBegY := int64(blockBeg.Value(1))
    37  	blockBegZ := int64(blockBeg.Value(2))
    38  
    39  	// Do the transfers depending on shape of the external voxels.
    40  	switch {
    41  	case v.DataShape().Equals(dvid.XY):
    42  		dataI := int64(dataBeg.Value(1))*dX + int64(dataBeg.Value(0))*bytesPerVoxel
    43  		blockI := blockBegZ*bY + blockBegY*bX + blockBegX*bytesPerVoxel
    44  		bytes := int64(dataEnd.Value(0)-dataBeg.Value(0)+1) * bytesPerVoxel
    45  		for y := dataBeg.Value(1); y <= dataEnd.Value(1); y++ {
    46  			copy(block.V[blockI:blockI+bytes], data[dataI:dataI+bytes])
    47  			blockI += bX
    48  			dataI += dX
    49  		}
    50  
    51  	case v.DataShape().Equals(dvid.XZ):
    52  		dataI := int64(dataBeg.Value(2))*dX + int64(dataBeg.Value(0))*bytesPerVoxel
    53  		blockI := blockBegZ*bY + blockBegY*bX + blockBegX*bytesPerVoxel
    54  		bytes := int64(dataEnd.Value(0)-dataBeg.Value(0)+1) * bytesPerVoxel
    55  		for y := dataBeg.Value(2); y <= dataEnd.Value(2); y++ {
    56  			copy(block.V[blockI:blockI+bytes], data[dataI:dataI+bytes])
    57  			blockI += bY
    58  			dataI += dX
    59  		}
    60  
    61  	case v.DataShape().Equals(dvid.YZ):
    62  		bz := blockBegZ
    63  		for y := int64(dataBeg.Value(2)); y <= int64(dataEnd.Value(2)); y++ {
    64  			dataI := y*dX + int64(dataBeg.Value(1))*bytesPerVoxel
    65  			blockI := bz*bY + blockBegY*bX + blockBegX*bytesPerVoxel
    66  			for x := dataBeg.Value(1); x <= dataEnd.Value(1); x++ {
    67  				copy(block.V[blockI:blockI+bytesPerVoxel], data[dataI:dataI+bytesPerVoxel])
    68  				blockI += bX
    69  				dataI += bytesPerVoxel
    70  			}
    71  			bz++
    72  		}
    73  
    74  	case v.DataShape().ShapeDimensions() == 2:
    75  		// TODO: General code for handling 2d ExtData in n-d space.
    76  		return fmt.Errorf("DVID currently does not support 2d in n-d space.")
    77  
    78  	case v.DataShape().Equals(dvid.Vol3d):
    79  		blockOffset := blockBegX * bytesPerVoxel
    80  		dX := int64(v.Size().Value(0)) * bytesPerVoxel
    81  		dY := int64(v.Size().Value(1)) * dX
    82  		dataOffset := int64(dataBeg.Value(0)) * bytesPerVoxel
    83  		bytes := int64(dataEnd.Value(0)-dataBeg.Value(0)+1) * bytesPerVoxel
    84  		blockZ := blockBegZ
    85  
    86  		for dataZ := int64(dataBeg.Value(2)); dataZ <= int64(dataEnd.Value(2)); dataZ++ {
    87  			blockY := blockBegY
    88  			for dataY := int64(dataBeg.Value(1)); dataY <= int64(dataEnd.Value(1)); dataY++ {
    89  				dataI := dataZ*dY + dataY*dX + dataOffset
    90  				blockI := blockZ*bY + blockY*bX + blockOffset
    91  				copy(block.V[blockI:blockI+bytes], data[dataI:dataI+bytes])
    92  				blockY++
    93  			}
    94  			blockZ++
    95  		}
    96  
    97  	default:
    98  		return fmt.Errorf("Cannot writeBlock() unsupported voxels data shape %s", v.DataShape())
    99  	}
   100  	return nil
   101  }
   102  
   103  type putOperation struct {
   104  	voxels   *Voxels
   105  	indexZYX dvid.IndexZYX
   106  	version  dvid.VersionID
   107  	mutate   bool   // if false, we just ingest without needing to GET previous value
   108  	mutID    uint64 // should be unique within a server's uptime.
   109  }
   110  
   111  type patchGeo struct {
   112  	patchstart dvid.Point3d // offset in block where patch data begins
   113  	patchend   dvid.Point3d // location in block where patch data ends
   114  }
   115  
   116  // IngestVoxels ingests voxels from a subvolume into the storage engine.
   117  // The subvolume must be aligned to blocks of the data instance, which simplifies
   118  // the routine since we are simply replacing a value instead of modifying values (GET + PUT).
   119  func (d *Data) IngestVoxels(v dvid.VersionID, mutID uint64, vox *Voxels, roiname dvid.InstanceName) error {
   120  	return d.PutVoxels(v, mutID, vox, roiname, false)
   121  }
   122  
   123  // MutateVoxels mutates voxels from a subvolume into the storage engine.  This differs from
   124  // the IngestVoxels function in firing off a MutateBlockEvent instead of an IngestBlockEvent,
   125  // which tells subscribers that a previous value has changed instead of a completely new
   126  // key/value being inserted.  There will be some decreased performance due to cleanup of prior
   127  // denormalizations compared to IngestVoxels.
   128  func (d *Data) MutateVoxels(v dvid.VersionID, mutID uint64, vox *Voxels, roiname dvid.InstanceName) error {
   129  	return d.PutVoxels(v, mutID, vox, roiname, true)
   130  }
   131  
   132  // PutVoxels persists voxels from a subvolume into the storage engine.
   133  // The subvolume must be aligned to blocks of the data instance, which simplifies
   134  // the routine if the PUT is a mutation (signals MutateBlockEvent) instead of ingestion.
   135  func (d *Data) PutVoxels(v dvid.VersionID, mutID uint64, vox *Voxels, roiname dvid.InstanceName, mutate bool) error {
   136  	r, err := GetROI(v, roiname, vox)
   137  	if err != nil {
   138  		return err
   139  	}
   140  
   141  	// extract buffer interface if it exists
   142  	store, err := datastore.GetOrderedKeyValueDB(d)
   143  	if err != nil {
   144  		return fmt.Errorf("Data type imageblk had error initializing store: %v\n", err)
   145  	}
   146  
   147  	// extract buffer interface
   148  	_, hasbuffer := store.(storage.KeyValueRequester)
   149  
   150  	// Make sure vox is block-aligned
   151  	if !dvid.BlockAligned(vox, d.BlockSize()) {
   152  		return fmt.Errorf("cannot store voxels in non-block aligned geometry %s -> %s", vox.StartPoint(), vox.EndPoint())
   153  	}
   154  
   155  	// Only do one request at a time, although each request can start many goroutines.
   156  	if !hasbuffer {
   157  		if vox.NumVoxels() > 256*256*256 {
   158  			server.LargeMutationMutex.Lock()
   159  			defer server.LargeMutationMutex.Unlock()
   160  		}
   161  	}
   162  
   163  	// create some buffer for handling requests
   164  	finishedRequests := make(chan error, 1000)
   165  	putrequests := 0
   166  
   167  	// Post new extents if there was a change (will always require 1 GET which should
   168  	// not be a big deal for large posts or for distributed back-ends)
   169  	// (assumes rest of the command will finish correctly which seems reasonable)
   170  	ctx := datastore.NewVersionedCtx(d, v)
   171  	putrequests++
   172  	go func() {
   173  		err := d.PostExtents(ctx, vox.StartPoint(), vox.EndPoint())
   174  		finishedRequests <- err
   175  	}()
   176  
   177  	// Iterate through index space for this data.
   178  	for it, err := vox.NewIndexIterator(d.BlockSize()); err == nil && it.Valid(); it.NextSpan() {
   179  		i0, i1, err := it.IndexSpan()
   180  		if err != nil {
   181  			return err
   182  		}
   183  		ptBeg := i0.Duplicate().(dvid.ChunkIndexer)
   184  		ptEnd := i1.Duplicate().(dvid.ChunkIndexer)
   185  
   186  		begX := ptBeg.Value(0)
   187  		endX := ptEnd.Value(0)
   188  
   189  		c := dvid.ChunkPoint3d{begX, ptBeg.Value(1), ptBeg.Value(2)}
   190  		for x := begX; x <= endX; x++ {
   191  			c[0] = x
   192  			curIndex := dvid.IndexZYX(c)
   193  
   194  			// Don't PUT if this index is outside a specified ROI
   195  			if r != nil && r.Iter != nil && !r.Iter.InsideFast(curIndex) {
   196  				continue
   197  			}
   198  
   199  			kv := &storage.TKeyValue{K: NewTKey(&curIndex)}
   200  			putOp := &putOperation{vox, curIndex, v, mutate, mutID}
   201  			op := &storage.ChunkOp{putOp, nil}
   202  			putrequests++
   203  			d.PutChunk(&storage.Chunk{op, kv}, hasbuffer, finishedRequests)
   204  		}
   205  	}
   206  	// wait for everything to finish
   207  	for i := 0; i < putrequests; i++ {
   208  		errjob := <-finishedRequests
   209  		if errjob != nil {
   210  			err = errjob
   211  		}
   212  	}
   213  	return err
   214  }
   215  
   216  // PutBlocks stores blocks of data in a span along X
   217  func (d *Data) PutBlocks(v dvid.VersionID, mutID uint64, start dvid.ChunkPoint3d, span int, data io.ReadCloser, mutate bool) error {
   218  	batcher, err := datastore.GetKeyValueBatcher(d)
   219  	if err != nil {
   220  		return err
   221  	}
   222  
   223  	ctx := datastore.NewVersionedCtx(d, v)
   224  	batch := batcher.NewBatch(ctx)
   225  
   226  	// Read blocks from the stream until we can output a batch put.
   227  	const BatchSize = 1000
   228  	var readBlocks int
   229  	numBlockBytes := d.BlockSize().Prod()
   230  	chunkPt := start
   231  	buf := make([]byte, numBlockBytes)
   232  	for {
   233  		// Read a block's worth of data
   234  		readBytes := int64(0)
   235  		for {
   236  			n, err := data.Read(buf[readBytes:])
   237  			readBytes += int64(n)
   238  			if readBytes == numBlockBytes {
   239  				break
   240  			}
   241  			if err == io.EOF {
   242  				return fmt.Errorf("block data ceased before all block data read")
   243  			}
   244  			if err != nil {
   245  				return fmt.Errorf("error reading blocks: %v", err)
   246  			}
   247  		}
   248  
   249  		if readBytes != numBlockBytes {
   250  			return fmt.Errorf("expected %d bytes in block read, got %d instead, aborting", numBlockBytes, readBytes)
   251  		}
   252  
   253  		serialization, err := dvid.SerializeData(buf, d.Compression(), d.Checksum())
   254  		if err != nil {
   255  			return err
   256  		}
   257  		zyx := dvid.IndexZYX(chunkPt)
   258  		tk := NewTKey(&zyx)
   259  
   260  		// If we are mutating, get the previous block of data.
   261  		var oldBlock []byte
   262  		if mutate {
   263  			oldBlock, err = d.GetBlock(v, tk)
   264  			if err != nil {
   265  				return fmt.Errorf("unable to load previous block in %q, key %v: %v", d.DataName(), tk, err)
   266  			}
   267  		}
   268  
   269  		// Write the new block
   270  		batch.Put(tk, serialization)
   271  
   272  		// Notify any subscribers that you've changed block.
   273  		var event string
   274  		var delta interface{}
   275  		if mutate {
   276  			event = MutateBlockEvent
   277  			delta = MutatedBlock{&zyx, oldBlock, buf, mutID}
   278  		} else {
   279  			event = IngestBlockEvent
   280  			delta = Block{&zyx, buf, mutID}
   281  		}
   282  		evt := datastore.SyncEvent{d.DataUUID(), event}
   283  		msg := datastore.SyncMessage{event, v, delta}
   284  		if err := datastore.NotifySubscribers(evt, msg); err != nil {
   285  			return err
   286  		}
   287  
   288  		// Advance to next block
   289  		chunkPt[0]++
   290  		readBlocks++
   291  		finish := (readBlocks == span)
   292  		if finish || readBlocks%BatchSize == 0 {
   293  			if err := batch.Commit(); err != nil {
   294  				return fmt.Errorf("error on batch commit, block %d: %v", readBlocks, err)
   295  			}
   296  			if finish {
   297  				break
   298  			} else {
   299  				batch = batcher.NewBatch(ctx)
   300  			}
   301  		}
   302  	}
   303  	return nil
   304  }
   305  
   306  // PutChunk puts a chunk of data as part of a mapped operation.
   307  // Only some multiple of the # of CPU cores can be used for chunk handling before
   308  // it waits for chunk processing to abate via the buffered server.HandlerToken channel.
   309  func (d *Data) PutChunk(chunk *storage.Chunk, hasbuffer bool, finishedRequests chan error) error {
   310  	if !hasbuffer {
   311  		// if storage engine handles buffering, limited advantage to throttling here
   312  		server.CheckChunkThrottling()
   313  	}
   314  
   315  	go d.putChunk(chunk, hasbuffer, finishedRequests)
   316  	return nil
   317  }
   318  
   319  func (d *Data) putChunk(chunk *storage.Chunk, hasbuffer bool, finishedRequests chan error) {
   320  	var err error
   321  	defer func() {
   322  		// After processing a chunk, return the token.
   323  		if !hasbuffer {
   324  			server.HandlerToken <- 1
   325  		}
   326  		// Notify the requestor that this chunk is done.
   327  		if chunk.Wg != nil {
   328  			chunk.Wg.Done()
   329  		}
   330  
   331  		finishedRequests <- err
   332  	}()
   333  
   334  	op, ok := chunk.Op.(*putOperation)
   335  
   336  	if !ok {
   337  		log.Fatalf("Illegal operation passed to ProcessChunk() for data %s\n", d.DataName())
   338  	}
   339  
   340  	// Make sure our received chunk is valid.
   341  	if chunk == nil {
   342  		dvid.Errorf("Received nil chunk in ProcessChunk.  Ignoring chunk.\n")
   343  		err = fmt.Errorf("Received nil chunk in ProcessChunk.  Ignoring chunk.\n")
   344  		return
   345  	}
   346  	if chunk.K == nil {
   347  		dvid.Errorf("Received nil chunk key in ProcessChunk.  Ignoring chunk.\n")
   348  		err = fmt.Errorf("Received nil chunk key in ProcessChunk.  Ignoring chunk.\n")
   349  		return
   350  	}
   351  
   352  	// Initialize the block buffer using the chunk of data.  For voxels, this chunk of
   353  	// data needs to be uncompressed and deserialized.
   354  	var blockData []byte
   355  	if chunk.V == nil {
   356  		blockData = d.BackgroundBlock()
   357  	} else {
   358  		blockData, _, err = dvid.DeserializeData(chunk.V, true)
   359  		if err != nil {
   360  			dvid.Errorf("Unable to deserialize block in %q: %v\n", d.DataName(), err)
   361  			return
   362  		}
   363  	}
   364  
   365  	// If we are mutating, get the previous block of data.
   366  	var oldBlock []byte
   367  	if op.mutate {
   368  		oldBlock, err = d.GetBlock(op.version, chunk.K)
   369  		if err != nil {
   370  			dvid.Errorf("Unable to load previous block in %q, key %v: %v\n", d.DataName(), chunk.K, err)
   371  			return
   372  		}
   373  	}
   374  
   375  	// Perform the operation.
   376  	block := &storage.TKeyValue{K: chunk.K, V: blockData}
   377  	if err = op.voxels.WriteBlock(block, d.BlockSize()); err != nil {
   378  		dvid.Errorf("Unable to WriteBlock() in %q: %v\n", d.DataName(), err)
   379  		return
   380  	}
   381  	var serialization []byte
   382  	serialization, err = dvid.SerializeData(blockData, d.Compression(), d.Checksum())
   383  	if err != nil {
   384  		dvid.Errorf("Unable to serialize block in %q: %v\n", d.DataName(), err)
   385  		return
   386  	}
   387  	store, err := datastore.GetOrderedKeyValueDB(d)
   388  	if err != nil {
   389  		dvid.Errorf("Data type imageblk had error initializing store: %v\n", err)
   390  		return
   391  	}
   392  
   393  	ready := make(chan error, 1)
   394  	callback := func() {
   395  		// Notify any subscribers that you've changed block.
   396  		resperr := <-ready
   397  		if resperr != nil {
   398  			dvid.Errorf("Unable to PUT voxel data for key %v: %v\n", chunk.K, resperr)
   399  			err = fmt.Errorf("Unable to PUT voxel data for key %v: %v", chunk.K, resperr)
   400  			return
   401  		}
   402  		var event string
   403  		var delta interface{}
   404  		if op.mutate {
   405  			event = MutateBlockEvent
   406  			delta = MutatedBlock{&op.indexZYX, oldBlock, block.V, op.mutID}
   407  		} else {
   408  			event = IngestBlockEvent
   409  			delta = Block{&op.indexZYX, block.V, op.mutID}
   410  		}
   411  		evt := datastore.SyncEvent{d.DataUUID(), event}
   412  		msg := datastore.SyncMessage{event, op.version, delta}
   413  		if err = datastore.NotifySubscribers(evt, msg); err != nil {
   414  			dvid.Errorf("Unable to notify subscribers of event %s in %s\n", event, d.DataName())
   415  		}
   416  	}
   417  
   418  	// put data -- use buffer if available
   419  	ctx := datastore.NewVersionedCtx(d, op.version)
   420  	if err = store.Put(ctx, chunk.K, serialization); err != nil {
   421  		dvid.Errorf("Unable to PUT voxel data for key %v: %v\n", chunk.K, err)
   422  		return
   423  	}
   424  	ready <- nil
   425  	callback()
   426  }
   427  
   428  // Writes a XY image into the blocks that intersect it.  This function assumes the
   429  // blocks have been allocated and if necessary, filled with old data.
   430  func (d *Data) writeXYImage(v dvid.VersionID, vox *Voxels, b storage.TKeyValues) (err error) {
   431  
   432  	// Setup concurrency in image -> block transfers.
   433  	var wg sync.WaitGroup
   434  	defer wg.Wait()
   435  
   436  	// Iterate through index space for this data using ZYX ordering.
   437  	blockSize := d.BlockSize()
   438  	var startingBlock int32
   439  
   440  	for it, err := vox.NewIndexIterator(blockSize); err == nil && it.Valid(); it.NextSpan() {
   441  		indexBeg, indexEnd, err := it.IndexSpan()
   442  		if err != nil {
   443  			return err
   444  		}
   445  
   446  		ptBeg := indexBeg.Duplicate().(dvid.ChunkIndexer)
   447  		ptEnd := indexEnd.Duplicate().(dvid.ChunkIndexer)
   448  
   449  		// Do image -> block transfers in concurrent goroutines.
   450  		begX := ptBeg.Value(0)
   451  		endX := ptEnd.Value(0)
   452  
   453  		server.CheckChunkThrottling()
   454  		wg.Add(1)
   455  		go func(blockNum int32) {
   456  			c := dvid.ChunkPoint3d{begX, ptBeg.Value(1), ptBeg.Value(2)}
   457  			for x := begX; x <= endX; x++ {
   458  				c[0] = x
   459  				curIndex := dvid.IndexZYX(c)
   460  				b[blockNum].K = NewTKey(&curIndex)
   461  
   462  				// Write this slice data into the block.
   463  				vox.WriteBlock(&(b[blockNum]), blockSize)
   464  				blockNum++
   465  			}
   466  			server.HandlerToken <- 1
   467  			wg.Done()
   468  		}(startingBlock)
   469  
   470  		startingBlock += (endX - begX + 1)
   471  	}
   472  	return
   473  }
   474  
   475  // KVWriteSize is the # of key-value pairs we will write as one atomic batch write.
   476  const KVWriteSize = 500
   477  
   478  // TODO -- Clean up all the writing and simplify now that we have block-aligned writes.
   479  // writeBlocks ingests blocks of voxel data asynchronously using batch writes.
   480  func (d *Data) writeBlocks(v dvid.VersionID, b storage.TKeyValues, wg1, wg2 *sync.WaitGroup) error {
   481  	batcher, err := datastore.GetKeyValueBatcher(d)
   482  	if err != nil {
   483  		return err
   484  	}
   485  
   486  	preCompress, postCompress := 0, 0
   487  
   488  	ctx := datastore.NewVersionedCtx(d, v)
   489  	evt := datastore.SyncEvent{d.DataUUID(), IngestBlockEvent}
   490  
   491  	server.CheckChunkThrottling()
   492  	go func() {
   493  		defer func() {
   494  			wg1.Done()
   495  			wg2.Done()
   496  			dvid.Debugf("Wrote voxel blocks.  Before %s: %d bytes.  After: %d bytes\n", d.Compression(), preCompress, postCompress)
   497  			server.HandlerToken <- 1
   498  		}()
   499  
   500  		mutID := d.NewMutationID()
   501  		batch := batcher.NewBatch(ctx)
   502  		for i, block := range b {
   503  			serialization, err := dvid.SerializeData(block.V, d.Compression(), d.Checksum())
   504  			preCompress += len(block.V)
   505  			postCompress += len(serialization)
   506  			if err != nil {
   507  				dvid.Errorf("Unable to serialize block: %v\n", err)
   508  				return
   509  			}
   510  			batch.Put(block.K, serialization)
   511  
   512  			indexZYX, err := DecodeTKey(block.K)
   513  			if err != nil {
   514  				dvid.Errorf("Unable to recover index from block key: %v\n", block.K)
   515  				return
   516  			}
   517  			msg := datastore.SyncMessage{IngestBlockEvent, v, Block{indexZYX, block.V, mutID}}
   518  			if err := datastore.NotifySubscribers(evt, msg); err != nil {
   519  				dvid.Errorf("Unable to notify subscribers of ChangeBlockEvent in %s\n", d.DataName())
   520  				return
   521  			}
   522  
   523  			// Check if we should commit
   524  			if i%KVWriteSize == KVWriteSize-1 {
   525  				if err := batch.Commit(); err != nil {
   526  					dvid.Errorf("Error on trying to write batch: %v\n", err)
   527  					return
   528  				}
   529  				batch = batcher.NewBatch(ctx)
   530  			}
   531  		}
   532  		if err := batch.Commit(); err != nil {
   533  			dvid.Errorf("Error on trying to write batch: %v\n", err)
   534  			return
   535  		}
   536  	}()
   537  	return nil
   538  }