github.com/janelia-flyem/dvid@v1.0.0/datatype/labelmap/compression.go (about)

     1  package labelmap
     2  
     3  import (
     4  	"compress/gzip"
     5  	"encoding/binary"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"math"
    10  	"net/http"
    11  
    12  	"github.com/janelia-flyem/dvid/dvid"
    13  	lz4 "github.com/janelia-flyem/go/golz4-updated"
    14  )
    15  
    16  // uncompressReaderData returns label data from a potentially compressed ("lz4", "gzip") reader.
    17  func uncompressReaderData(compression string, in io.ReadCloser, estsize int64) ([]byte, error) {
    18  	var err error
    19  	var data []byte
    20  	switch compression {
    21  	case "":
    22  		tlog := dvid.NewTimeLog()
    23  		data, err = ioutil.ReadAll(in)
    24  		if err != nil {
    25  			return nil, err
    26  		}
    27  		tlog.Debugf("read 3d uncompressed POST")
    28  	case "lz4":
    29  		tlog := dvid.NewTimeLog()
    30  		data, err = ioutil.ReadAll(in)
    31  		if err != nil {
    32  			return nil, err
    33  		}
    34  		tlog.Debugf("read 3d lz4 POST: %d bytes", len(data))
    35  		if len(data) == 0 {
    36  			return nil, fmt.Errorf("received 0 LZ4 compressed bytes")
    37  		}
    38  		tlog = dvid.NewTimeLog()
    39  		uncompressed := make([]byte, estsize)
    40  		if err = lz4.Uncompress(data, uncompressed); err != nil {
    41  			return nil, err
    42  		}
    43  		data = uncompressed
    44  		tlog.Debugf("uncompressed 3d lz4 POST: %d bytes", len(data))
    45  	case "gzip":
    46  		tlog := dvid.NewTimeLog()
    47  		gr, err := gzip.NewReader(in)
    48  		if err != nil {
    49  			return nil, err
    50  		}
    51  		data, err = ioutil.ReadAll(gr)
    52  		if err != nil {
    53  			return nil, err
    54  		}
    55  		if err = gr.Close(); err != nil {
    56  			return nil, err
    57  		}
    58  		tlog.Debugf("read and uncompress 3d gzip POST: %d bytes", len(data))
    59  	default:
    60  		return nil, fmt.Errorf("unknown compression type %q", compression)
    61  	}
    62  	return data, nil
    63  }
    64  
    65  // compressGoogle uses the neuroglancer compression format
    66  func compressGoogle(data []byte, subvol *dvid.Subvolume) ([]byte, error) {
    67  	// TODO: share table between blocks
    68  	subvolsizes := subvol.Size()
    69  
    70  	// must <= 32
    71  	BLKSIZE := int32(8)
    72  
    73  	xsize := subvolsizes.Value(0)
    74  	ysize := subvolsizes.Value(1)
    75  	zsize := subvolsizes.Value(2)
    76  	gx := subvolsizes.Value(0) / BLKSIZE
    77  	gy := subvolsizes.Value(1) / BLKSIZE
    78  	gz := subvolsizes.Value(2) / BLKSIZE
    79  	if xsize%BLKSIZE > 0 || ysize%BLKSIZE > 0 || zsize%BLKSIZE > 0 {
    80  		return nil, fmt.Errorf("volume must be a multiple of the block size")
    81  	}
    82  
    83  	// add initial 4 byte to designate as a header for the compressed data
    84  	// 64 bit headers for each 8x8x8 block and pre-allocate some data based on expected data size
    85  	dword := 4
    86  	globaloffset := dword
    87  
    88  	datagoogle := make([]byte, gx*gy*gz*8+int32(globaloffset), xsize*ysize*zsize*8/10)
    89  	datagoogle[0] = byte(globaloffset / dword) // compressed data starts after first 4 bytes
    90  
    91  	// everything is written out little-endian
    92  	for gziter := int32(0); gziter < gz; gziter++ {
    93  		for gyiter := int32(0); gyiter < gy; gyiter++ {
    94  			for gxiter := int32(0); gxiter < gx; gxiter++ {
    95  				unique_vals := make(map[uint64]uint32)
    96  				unique_list := make([]uint64, 0)
    97  
    98  				currpos := (gziter*BLKSIZE*(xsize*ysize) + gyiter*BLKSIZE*xsize + gxiter*BLKSIZE) * 8
    99  
   100  				// extract unique values in the 8x8x8 block
   101  				for z := int32(0); z < BLKSIZE; z++ {
   102  					for y := int32(0); y < BLKSIZE; y++ {
   103  						for x := int32(0); x < BLKSIZE; x++ {
   104  							if _, ok := unique_vals[binary.LittleEndian.Uint64(data[currpos:currpos+8])]; !ok {
   105  								unique_vals[binary.LittleEndian.Uint64(data[currpos:currpos+8])] = 0
   106  								unique_list = append(unique_list, binary.LittleEndian.Uint64(data[currpos:currpos+8]))
   107  							}
   108  							currpos += 8
   109  						}
   110  						currpos += ((xsize - BLKSIZE) * 8)
   111  					}
   112  					currpos += (xsize*ysize - (xsize * (BLKSIZE))) * 8
   113  				}
   114  				// write out mapping
   115  				for pos, val := range unique_list {
   116  					unique_vals[val] = uint32(pos)
   117  				}
   118  
   119  				// write-out compressed data
   120  				encodedBits := uint32(math.Ceil(math.Log2(float64(len(unique_vals)))))
   121  				switch {
   122  				case encodedBits == 0, encodedBits == 1, encodedBits == 2:
   123  				case encodedBits <= 4:
   124  					encodedBits = 4
   125  				case encodedBits <= 8:
   126  					encodedBits = 8
   127  				case encodedBits <= 16:
   128  					encodedBits = 16
   129  				}
   130  
   131  				// starting location for writing out data
   132  				currpos2 := len(datagoogle)
   133  				compressstart := len(datagoogle) / dword // in 4-byte units
   134  				// number of bytes to add (encode bytes + table size of 8 byte numbers)
   135  				addedBytes := uint32(encodedBits*uint32(BLKSIZE*BLKSIZE*BLKSIZE)/8) + uint32(len(unique_vals)*8) // will always be a multiple of 4 bytes
   136  				datagoogle = append(datagoogle, make([]byte, addedBytes)...)
   137  
   138  				// do not need to write-out anything if there is only one entry
   139  				if encodedBits > 0 {
   140  					currpos := (gziter*BLKSIZE*(xsize*ysize) + gyiter*BLKSIZE*xsize + gxiter*BLKSIZE) * 8
   141  
   142  					for z := uint32(0); z < uint32(BLKSIZE); z++ {
   143  						for y := uint32(0); y < uint32(BLKSIZE); y++ {
   144  							for x := uint32(0); x < uint32(BLKSIZE); x++ {
   145  								mappedval := unique_vals[binary.LittleEndian.Uint64(data[currpos:currpos+8])]
   146  								currpos += 8
   147  
   148  								// write out encoding
   149  								startbit := uint32((encodedBits * x) % uint32(8))
   150  								if encodedBits == 16 {
   151  									// write two bytes worth of data
   152  									datagoogle[currpos2] = byte(255 & mappedval)
   153  									currpos2++
   154  									datagoogle[currpos2] = byte(255 & (mappedval >> 8))
   155  									currpos2++
   156  								} else {
   157  									// write bit-shifted data
   158  									datagoogle[currpos2] |= byte(255 & (mappedval << startbit))
   159  								}
   160  								if int(startbit) == (8 - int(encodedBits)) {
   161  									currpos2++
   162  								}
   163  
   164  							}
   165  							currpos += ((xsize - BLKSIZE) * 8)
   166  						}
   167  						currpos += (xsize*ysize - (xsize * (BLKSIZE))) * 8
   168  					}
   169  				}
   170  				tablestart := currpos2 / dword // in 4-byte units
   171  				// write-out lookup table
   172  				for _, val := range unique_list {
   173  					for bytespot := uint32(0); bytespot < uint32(8); bytespot++ {
   174  						datagoogle[currpos2] = byte(255 & (val >> (bytespot * 8)))
   175  						currpos2++
   176  					}
   177  				}
   178  
   179  				// write-out block header
   180  				// 8 bytes per header entry
   181  				headerpos := (gziter*(gy*gx)+gyiter*gx+gxiter)*8 + int32(globaloffset) // shift start by global offset
   182  
   183  				// write out lookup table start
   184  				tablestart -= (globaloffset / dword) // relative to the start of the compressed data
   185  				datagoogle[headerpos] = byte(255 & tablestart)
   186  				headerpos++
   187  				datagoogle[headerpos] = byte(255 & (tablestart >> 8))
   188  				headerpos++
   189  				datagoogle[headerpos] = byte(255 & (tablestart >> 16))
   190  				headerpos++
   191  
   192  				// write out number of encoded bits
   193  				datagoogle[headerpos] = byte(255 & encodedBits)
   194  				headerpos++
   195  
   196  				// write out block compress start
   197  				compressstart -= (globaloffset / dword) // relative to the start of the compressed data
   198  				datagoogle[headerpos] = byte(255 & compressstart)
   199  				headerpos++
   200  				datagoogle[headerpos] = byte(255 & (compressstart >> 8))
   201  				headerpos++
   202  				datagoogle[headerpos] = byte(255 & (compressstart >> 16))
   203  				headerpos++
   204  				datagoogle[headerpos] = byte(255 & (compressstart >> 24))
   205  			}
   206  		}
   207  	}
   208  
   209  	return datagoogle, nil
   210  }
   211  
   212  // Given an array of label data (little-endian uint64) of dimensions related by subvol, write
   213  // via HTTP with the appropriate compression schemes.
   214  func writeCompressedToHTTP(compression string, data []byte, subvol *dvid.Subvolume, w http.ResponseWriter) error {
   215  	var err error
   216  	w.Header().Set("Content-type", "application/octet-stream")
   217  	switch compression {
   218  	case "":
   219  		_, err = w.Write(data)
   220  		if err != nil {
   221  			return err
   222  		}
   223  	case "lz4":
   224  		compressed := make([]byte, lz4.CompressBound(data))
   225  		var n, outSize int
   226  		if outSize, err = lz4.Compress(data, compressed); err != nil {
   227  			return err
   228  		}
   229  		compressed = compressed[:outSize]
   230  		if n, err = w.Write(compressed); err != nil {
   231  			return err
   232  		}
   233  		if n != outSize {
   234  			errmsg := fmt.Sprintf("Only able to write %d of %d lz4 compressed bytes\n", n, outSize)
   235  			dvid.Errorf(errmsg)
   236  			return err
   237  		}
   238  	case "gzip":
   239  		gw := gzip.NewWriter(w)
   240  		if _, err = gw.Write(data); err != nil {
   241  			return err
   242  		}
   243  		if err = gw.Close(); err != nil {
   244  			return err
   245  		}
   246  	case "google", "googlegzip": // see neuroglancer for details of compressed segmentation format
   247  		datagoogle, err := compressGoogle(data, subvol)
   248  		if err != nil {
   249  			return err
   250  		}
   251  		if compression == "googlegzip" {
   252  			w.Header().Set("Content-encoding", "gzip")
   253  			gw := gzip.NewWriter(w)
   254  			if _, err = gw.Write(datagoogle); err != nil {
   255  				return err
   256  			}
   257  			if err = gw.Close(); err != nil {
   258  				return err
   259  			}
   260  
   261  		} else {
   262  			_, err = w.Write(datagoogle)
   263  			if err != nil {
   264  				return err
   265  			}
   266  		}
   267  	default:
   268  		return fmt.Errorf("unknown compression type %q", compression)
   269  	}
   270  	return nil
   271  }