github.com/janelia-flyem/dvid@v1.0.0/datatype/labelmap/compression.go (about) 1 package labelmap 2 3 import ( 4 "compress/gzip" 5 "encoding/binary" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "math" 10 "net/http" 11 12 "github.com/janelia-flyem/dvid/dvid" 13 lz4 "github.com/janelia-flyem/go/golz4-updated" 14 ) 15 16 // uncompressReaderData returns label data from a potentially compressed ("lz4", "gzip") reader. 17 func uncompressReaderData(compression string, in io.ReadCloser, estsize int64) ([]byte, error) { 18 var err error 19 var data []byte 20 switch compression { 21 case "": 22 tlog := dvid.NewTimeLog() 23 data, err = ioutil.ReadAll(in) 24 if err != nil { 25 return nil, err 26 } 27 tlog.Debugf("read 3d uncompressed POST") 28 case "lz4": 29 tlog := dvid.NewTimeLog() 30 data, err = ioutil.ReadAll(in) 31 if err != nil { 32 return nil, err 33 } 34 tlog.Debugf("read 3d lz4 POST: %d bytes", len(data)) 35 if len(data) == 0 { 36 return nil, fmt.Errorf("received 0 LZ4 compressed bytes") 37 } 38 tlog = dvid.NewTimeLog() 39 uncompressed := make([]byte, estsize) 40 if err = lz4.Uncompress(data, uncompressed); err != nil { 41 return nil, err 42 } 43 data = uncompressed 44 tlog.Debugf("uncompressed 3d lz4 POST: %d bytes", len(data)) 45 case "gzip": 46 tlog := dvid.NewTimeLog() 47 gr, err := gzip.NewReader(in) 48 if err != nil { 49 return nil, err 50 } 51 data, err = ioutil.ReadAll(gr) 52 if err != nil { 53 return nil, err 54 } 55 if err = gr.Close(); err != nil { 56 return nil, err 57 } 58 tlog.Debugf("read and uncompress 3d gzip POST: %d bytes", len(data)) 59 default: 60 return nil, fmt.Errorf("unknown compression type %q", compression) 61 } 62 return data, nil 63 } 64 65 // compressGoogle uses the neuroglancer compression format 66 func compressGoogle(data []byte, subvol *dvid.Subvolume) ([]byte, error) { 67 // TODO: share table between blocks 68 subvolsizes := subvol.Size() 69 70 // must <= 32 71 BLKSIZE := int32(8) 72 73 xsize := subvolsizes.Value(0) 74 ysize := subvolsizes.Value(1) 75 zsize := subvolsizes.Value(2) 76 gx := subvolsizes.Value(0) / BLKSIZE 77 gy := subvolsizes.Value(1) / BLKSIZE 78 gz := subvolsizes.Value(2) / BLKSIZE 79 if xsize%BLKSIZE > 0 || ysize%BLKSIZE > 0 || zsize%BLKSIZE > 0 { 80 return nil, fmt.Errorf("volume must be a multiple of the block size") 81 } 82 83 // add initial 4 byte to designate as a header for the compressed data 84 // 64 bit headers for each 8x8x8 block and pre-allocate some data based on expected data size 85 dword := 4 86 globaloffset := dword 87 88 datagoogle := make([]byte, gx*gy*gz*8+int32(globaloffset), xsize*ysize*zsize*8/10) 89 datagoogle[0] = byte(globaloffset / dword) // compressed data starts after first 4 bytes 90 91 // everything is written out little-endian 92 for gziter := int32(0); gziter < gz; gziter++ { 93 for gyiter := int32(0); gyiter < gy; gyiter++ { 94 for gxiter := int32(0); gxiter < gx; gxiter++ { 95 unique_vals := make(map[uint64]uint32) 96 unique_list := make([]uint64, 0) 97 98 currpos := (gziter*BLKSIZE*(xsize*ysize) + gyiter*BLKSIZE*xsize + gxiter*BLKSIZE) * 8 99 100 // extract unique values in the 8x8x8 block 101 for z := int32(0); z < BLKSIZE; z++ { 102 for y := int32(0); y < BLKSIZE; y++ { 103 for x := int32(0); x < BLKSIZE; x++ { 104 if _, ok := unique_vals[binary.LittleEndian.Uint64(data[currpos:currpos+8])]; !ok { 105 unique_vals[binary.LittleEndian.Uint64(data[currpos:currpos+8])] = 0 106 unique_list = append(unique_list, binary.LittleEndian.Uint64(data[currpos:currpos+8])) 107 } 108 currpos += 8 109 } 110 currpos += ((xsize - BLKSIZE) * 8) 111 } 112 currpos += (xsize*ysize - (xsize * (BLKSIZE))) * 8 113 } 114 // write out mapping 115 for pos, val := range unique_list { 116 unique_vals[val] = uint32(pos) 117 } 118 119 // write-out compressed data 120 encodedBits := uint32(math.Ceil(math.Log2(float64(len(unique_vals))))) 121 switch { 122 case encodedBits == 0, encodedBits == 1, encodedBits == 2: 123 case encodedBits <= 4: 124 encodedBits = 4 125 case encodedBits <= 8: 126 encodedBits = 8 127 case encodedBits <= 16: 128 encodedBits = 16 129 } 130 131 // starting location for writing out data 132 currpos2 := len(datagoogle) 133 compressstart := len(datagoogle) / dword // in 4-byte units 134 // number of bytes to add (encode bytes + table size of 8 byte numbers) 135 addedBytes := uint32(encodedBits*uint32(BLKSIZE*BLKSIZE*BLKSIZE)/8) + uint32(len(unique_vals)*8) // will always be a multiple of 4 bytes 136 datagoogle = append(datagoogle, make([]byte, addedBytes)...) 137 138 // do not need to write-out anything if there is only one entry 139 if encodedBits > 0 { 140 currpos := (gziter*BLKSIZE*(xsize*ysize) + gyiter*BLKSIZE*xsize + gxiter*BLKSIZE) * 8 141 142 for z := uint32(0); z < uint32(BLKSIZE); z++ { 143 for y := uint32(0); y < uint32(BLKSIZE); y++ { 144 for x := uint32(0); x < uint32(BLKSIZE); x++ { 145 mappedval := unique_vals[binary.LittleEndian.Uint64(data[currpos:currpos+8])] 146 currpos += 8 147 148 // write out encoding 149 startbit := uint32((encodedBits * x) % uint32(8)) 150 if encodedBits == 16 { 151 // write two bytes worth of data 152 datagoogle[currpos2] = byte(255 & mappedval) 153 currpos2++ 154 datagoogle[currpos2] = byte(255 & (mappedval >> 8)) 155 currpos2++ 156 } else { 157 // write bit-shifted data 158 datagoogle[currpos2] |= byte(255 & (mappedval << startbit)) 159 } 160 if int(startbit) == (8 - int(encodedBits)) { 161 currpos2++ 162 } 163 164 } 165 currpos += ((xsize - BLKSIZE) * 8) 166 } 167 currpos += (xsize*ysize - (xsize * (BLKSIZE))) * 8 168 } 169 } 170 tablestart := currpos2 / dword // in 4-byte units 171 // write-out lookup table 172 for _, val := range unique_list { 173 for bytespot := uint32(0); bytespot < uint32(8); bytespot++ { 174 datagoogle[currpos2] = byte(255 & (val >> (bytespot * 8))) 175 currpos2++ 176 } 177 } 178 179 // write-out block header 180 // 8 bytes per header entry 181 headerpos := (gziter*(gy*gx)+gyiter*gx+gxiter)*8 + int32(globaloffset) // shift start by global offset 182 183 // write out lookup table start 184 tablestart -= (globaloffset / dword) // relative to the start of the compressed data 185 datagoogle[headerpos] = byte(255 & tablestart) 186 headerpos++ 187 datagoogle[headerpos] = byte(255 & (tablestart >> 8)) 188 headerpos++ 189 datagoogle[headerpos] = byte(255 & (tablestart >> 16)) 190 headerpos++ 191 192 // write out number of encoded bits 193 datagoogle[headerpos] = byte(255 & encodedBits) 194 headerpos++ 195 196 // write out block compress start 197 compressstart -= (globaloffset / dword) // relative to the start of the compressed data 198 datagoogle[headerpos] = byte(255 & compressstart) 199 headerpos++ 200 datagoogle[headerpos] = byte(255 & (compressstart >> 8)) 201 headerpos++ 202 datagoogle[headerpos] = byte(255 & (compressstart >> 16)) 203 headerpos++ 204 datagoogle[headerpos] = byte(255 & (compressstart >> 24)) 205 } 206 } 207 } 208 209 return datagoogle, nil 210 } 211 212 // Given an array of label data (little-endian uint64) of dimensions related by subvol, write 213 // via HTTP with the appropriate compression schemes. 214 func writeCompressedToHTTP(compression string, data []byte, subvol *dvid.Subvolume, w http.ResponseWriter) error { 215 var err error 216 w.Header().Set("Content-type", "application/octet-stream") 217 switch compression { 218 case "": 219 _, err = w.Write(data) 220 if err != nil { 221 return err 222 } 223 case "lz4": 224 compressed := make([]byte, lz4.CompressBound(data)) 225 var n, outSize int 226 if outSize, err = lz4.Compress(data, compressed); err != nil { 227 return err 228 } 229 compressed = compressed[:outSize] 230 if n, err = w.Write(compressed); err != nil { 231 return err 232 } 233 if n != outSize { 234 errmsg := fmt.Sprintf("Only able to write %d of %d lz4 compressed bytes\n", n, outSize) 235 dvid.Errorf(errmsg) 236 return err 237 } 238 case "gzip": 239 gw := gzip.NewWriter(w) 240 if _, err = gw.Write(data); err != nil { 241 return err 242 } 243 if err = gw.Close(); err != nil { 244 return err 245 } 246 case "google", "googlegzip": // see neuroglancer for details of compressed segmentation format 247 datagoogle, err := compressGoogle(data, subvol) 248 if err != nil { 249 return err 250 } 251 if compression == "googlegzip" { 252 w.Header().Set("Content-encoding", "gzip") 253 gw := gzip.NewWriter(w) 254 if _, err = gw.Write(datagoogle); err != nil { 255 return err 256 } 257 if err = gw.Close(); err != nil { 258 return err 259 } 260 261 } else { 262 _, err = w.Write(datagoogle) 263 if err != nil { 264 return err 265 } 266 } 267 default: 268 return fmt.Errorf("unknown compression type %q", compression) 269 } 270 return nil 271 }