go-hep.org/x/hep@v0.38.1/groot/internal/rcompress/rcompress.go (about) 1 // Copyright ©2019 The go-hep Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // rcompress provides types and functions to compress and decompress 6 // ROOT data payloads. 7 package rcompress // import "go-hep.org/x/hep/groot/internal/rcompress" 8 9 //go:generate go tool golang.org/x/tools/cmd/stringer -type Kind 10 11 import ( 12 "encoding/binary" 13 "errors" 14 "fmt" 15 "io" 16 17 "github.com/klauspost/compress/flate" 18 "github.com/klauspost/compress/zlib" 19 "github.com/klauspost/compress/zstd" 20 "github.com/pierrec/lz4/v4" 21 "github.com/pierrec/xxHash/xxHash64" 22 "github.com/ulikunitz/xz" 23 ) 24 25 // Kind specifies the compression algorithm 26 // to be used during reading or writing ROOT files. 27 type Kind int 28 29 // constants for compression/decompression 30 const ( 31 Inherit Kind = -1 32 UseGlobal Kind = 0 33 ZLIB Kind = +1 34 LZMA Kind = +2 35 OldCompression Kind = +3 36 LZ4 Kind = +4 37 ZSTD Kind = +5 38 UndefinedCompression Kind = +6 39 ) 40 41 const ( 42 zstdVersion = 1 // keep in sync with klauspost/compress/zstd and ROOT 43 lz4Version = 1 // keep in sync with pierrec/lz4 44 ) 45 46 var ( 47 // errNoCompression is returned when the compression algorithm 48 // couldn't compress the input or when the compressed output is bigger 49 // than the input 50 errNoCompression = fmt.Errorf("rcompress: no compression") 51 ) 52 53 // Settings encodes the ROOT way of specifying a compression mechanism 54 // and its compression level. 55 type Settings struct { 56 Alg Kind 57 Lvl int 58 } 59 60 // SettingsFrom create a Settings value from the provided compression 61 // configuration (compression algorithm and compression level), using 62 // ROOT's encoding. 63 func SettingsFrom(compr int32) Settings { 64 alg, lvl := rootCompressAlgLvl(compr) 65 return Settings{Alg: alg, Lvl: lvl} 66 } 67 68 // DefaultSettings is the default compression algorithm and level used 69 // in ROOT files and trees. 70 var DefaultSettings = Settings{Alg: ZLIB, Lvl: flate.BestSpeed} 71 72 func (set Settings) Compression() int32 { 73 var ( 74 lvl = set.Lvl 75 alg = set.Alg 76 ) 77 switch { 78 case lvl == flate.DefaultCompression: 79 switch alg { 80 case ZLIB: 81 lvl = 6 82 case LZ4: 83 lvl = 1 84 case LZMA: 85 lvl = 1 86 case ZSTD: 87 lvl = 1 // FIXME(sbinet): check with ROOT-6.20.00 default 88 default: 89 panic(fmt.Errorf("rcompress: unknown compression algorithm: %v", alg)) 90 } 91 case lvl == flate.BestSpeed: 92 if alg == ZSTD { 93 lvl = int(zstd.SpeedFastest) 94 } 95 case lvl == flate.BestCompression: 96 if alg == ZSTD { 97 lvl = int(zstd.SpeedBestCompression) 98 } 99 case lvl > 99: 100 lvl = 99 101 } 102 return int32(alg*100) + int32(lvl) 103 104 } 105 106 // Note: this contains ZL[src][dst] where src and dst are 3 bytes each. 107 const HeaderSize = 9 108 109 // because each zipped block contains: 110 // - the size of the input data 111 // - the size of the compressed data 112 // where each size is saved on 3 bytes, the maximal size 113 // of each block can not be bigger than 16Mb. 114 const kMaxCompressedBlockSize = 0xffffff 115 116 // kindOf returns the kind of compression algorithm. 117 func kindOf(buf []byte) Kind { 118 _ = buf[HeaderSize-1] // bound-check 119 switch { 120 case buf[0] == 'Z' && buf[1] == 'L': 121 return ZLIB 122 case buf[0] == 'X' && buf[1] == 'Z': 123 return LZMA 124 case buf[0] == 'L' && buf[1] == '4': 125 return LZ4 126 case buf[0] == 'Z' && buf[1] == 'S': 127 return ZSTD 128 case buf[0] == 'C' && buf[1] == 'S': 129 return OldCompression 130 default: 131 return UndefinedCompression 132 } 133 } 134 135 func rootCompressAlgLvl(v int32) (Kind, int) { 136 var ( 137 alg = Kind(v / 100) 138 lvl = int(v % 100) 139 ) 140 141 return alg, lvl 142 } 143 144 // Compress compresses src, using the compression kind and level encoded into compr. 145 // Users can provide a non-nil dst to reduce allocation. 146 func Compress(dst, src []byte, compr int32) ([]byte, error) { 147 const ( 148 blksz = kMaxCompressedBlockSize // 16Mb 149 ) 150 151 alg, lvl := rootCompressAlgLvl(compr) 152 153 if alg == 0 || lvl == 0 || len(src) < 512 { 154 // no compression 155 return src, nil 156 } 157 158 var ( 159 nblocks = len(src)/blksz + 1 160 cur = 0 161 beg int 162 end int 163 ) 164 165 size := len(src) + nblocks*HeaderSize 166 if dst == nil || len(dst) < size { 167 dst = append(dst, make([]byte, size-len(dst))...) 168 } 169 170 for beg = 0; beg < len(src); beg += blksz { 171 end = min(beg+blksz, len(src)) 172 // FIXME(sbinet): split out into compressBlock{Zlib,LZ4,...} 173 n, err := compressBlock(alg, lvl, dst[cur:], src[beg:end]) 174 switch err { 175 case nil: 176 cur += n 177 case errNoCompression: 178 return src, nil 179 default: 180 return nil, err 181 } 182 } 183 184 return dst[:cur], nil 185 } 186 187 func compressBlock(alg Kind, lvl int, tgt, src []byte) (int, error) { 188 // FIXME(sbinet): rework tgt/dst to reduce buffer allocation. 189 190 var ( 191 err error 192 193 dst = tgt[HeaderSize:] 194 hdr = tgt[:HeaderSize] 195 buf = &wbuff{p: dst} 196 197 srcsz = int32(len(src)) 198 dstsz int32 199 ) 200 201 switch alg { 202 case ZLIB: 203 hdr[0] = 'Z' 204 hdr[1] = 'L' 205 hdr[2] = 8 // zlib deflated 206 w, err := zlib.NewWriterLevel(buf, lvl) 207 if err != nil { 208 return 0, fmt.Errorf("rcompress: could not create ZLIB compressor: %w", err) 209 } 210 211 _, err = w.Write(src) 212 if err != nil { 213 _ = w.Close() 214 return 0, fmt.Errorf("rcompress: could not write ZLIB compressed bytes: %w", err) 215 } 216 err = w.Close() 217 switch { 218 case err == nil: 219 // ok. 220 case errors.Is(err, errNoCompression): 221 // not compressible. 222 return len(src), errNoCompression 223 default: 224 return 0, fmt.Errorf("rcompress: could not close ZLIB compressor: %w", err) 225 } 226 dstsz = int32(buf.c) 227 228 case LZMA: 229 hdr[0] = 'X' 230 hdr[1] = 'Z' 231 cfg := xz.WriterConfig{ 232 CheckSum: xz.CRC32, 233 } 234 if err := cfg.Verify(); err != nil { 235 return 0, fmt.Errorf("rcompress: could not create LZMA compressor config: %w", err) 236 } 237 w, err := cfg.NewWriter(buf) 238 if err != nil { 239 return 0, fmt.Errorf("rcompress: could not create LZMA compressor: %w", err) 240 } 241 defer w.Close() 242 243 _, err = w.Write(src) 244 if err != nil { 245 return 0, fmt.Errorf("rcompress: could not write LZMA compressed bytes: %w", err) 246 } 247 248 err = w.Close() 249 switch { 250 case err == nil: 251 // ok. 252 case errors.Is(err, errNoCompression): 253 // not-compressible. 254 return len(src), errNoCompression 255 default: 256 return 0, fmt.Errorf("rcompress: could not close LZMA compressor: %w", err) 257 } 258 259 dstsz = int32(buf.c) 260 261 case LZ4: 262 hdr[0] = 'L' 263 hdr[1] = '4' 264 hdr[2] = lz4Version 265 266 const chksum = 8 267 var room = int(float64(srcsz) * 2e-4) // lz4 needs some extra scratch space 268 dst := make([]byte, HeaderSize+chksum+len(src)+room) 269 wrk := dst[HeaderSize:] 270 var n int 271 switch { 272 case lvl >= 4: 273 if lvl > 9 { 274 lvl = 9 275 } 276 c := lz4.CompressorHC{Level: lz4.CompressionLevel(lvl)} 277 n, err = c.CompressBlock(src, wrk[chksum:]) 278 default: 279 ht := make([]int, 1<<16) 280 n, err = lz4.CompressBlock(src, wrk[chksum:], ht) 281 } 282 if err != nil { 283 return 0, fmt.Errorf("rcompress: could not compress with LZ4: %w", err) 284 } 285 286 if n == 0 { 287 // not compressible. 288 return len(src), errNoCompression 289 } 290 291 wrk = wrk[:n+chksum] 292 binary.BigEndian.PutUint64(wrk[:chksum], xxHash64.Checksum(wrk[chksum:], 0)) 293 dstsz = int32(n + chksum) 294 n = copy(buf.p, wrk) 295 buf.c += n 296 297 case ZSTD: 298 hdr[0] = 'Z' 299 hdr[1] = 'S' 300 hdr[2] = zstdVersion 301 302 w, err := zstd.NewWriter(buf, zstd.WithEncoderLevel(zstd.EncoderLevel(lvl))) 303 if err != nil { 304 return 0, fmt.Errorf("rcompress: could not create ZSTD compressor: %w", err) 305 } 306 defer w.Close() 307 308 _, err = w.Write(src) 309 if err != nil { 310 return 0, fmt.Errorf("rcompress: could not write ZSTD compressed bytes: %w", err) 311 } 312 313 err = w.Close() 314 switch { 315 case buf.c >= len(src) || errors.Is(err, errNoCompression): 316 // not compressible. 317 return len(src), errNoCompression 318 case err == nil: 319 // ok. 320 default: 321 return 0, fmt.Errorf("rcompress: could not close ZSTD compressor: %w", err) 322 } 323 324 dstsz = int32(buf.c) 325 326 case OldCompression: 327 return 0, fmt.Errorf("rcompress: old compression algorithm unsupported") 328 329 default: 330 return 0, fmt.Errorf("rcompress: unknown algorithm %d", alg) 331 } 332 333 if dstsz > kMaxCompressedBlockSize { 334 return 0, errNoCompression 335 } 336 337 hdr[3] = byte(dstsz) 338 hdr[4] = byte(dstsz >> 8) 339 hdr[5] = byte(dstsz >> 16) 340 341 hdr[6] = byte(srcsz) 342 hdr[7] = byte(srcsz >> 8) 343 hdr[8] = byte(srcsz >> 16) 344 345 n := len(hdr) + int(dstsz) 346 return n, nil 347 } 348 349 // Decompress decompresses src into dst. 350 func Decompress(dst []byte, src io.Reader) error { 351 var ( 352 beg = 0 353 end = 0 354 buflen = len(dst) 355 hdr = make([]byte, HeaderSize) 356 ) 357 358 for end < buflen { 359 _, err := io.ReadFull(src, hdr) 360 if err != nil { 361 return fmt.Errorf("rcompress: could not read compress header: %w", err) 362 } 363 364 _ = hdr[HeaderSize-1] // bound-check 365 srcsz := int64(hdr[3]) | int64(hdr[4])<<8 | int64(hdr[5])<<16 366 tgtsz := int64(hdr[6]) | int64(hdr[7])<<8 | int64(hdr[8])<<16 367 end += int(tgtsz) 368 lr := &io.LimitedReader{R: src, N: srcsz} 369 switch kindOf(hdr) { 370 case ZLIB: 371 rc, err := zlib.NewReader(lr) 372 if err != nil { 373 return fmt.Errorf("rcompress: could not create ZLIB reader: %w", err) 374 } 375 defer rc.Close() 376 377 _, err = io.ReadFull(rc, dst[beg:end]) 378 if err != nil { 379 return fmt.Errorf("rcompress: could not decompress ZLIB buffer: %w", err) 380 } 381 382 case LZ4: 383 src := make([]byte, srcsz) 384 _, err = io.ReadFull(lr, src) 385 if err != nil { 386 return fmt.Errorf("rcompress: could not read LZ4 block: %w", err) 387 } 388 const chksum = 8 389 // FIXME: we skip the 32b checksum. use it! 390 _, err = lz4.UncompressBlock(src[chksum:], dst[beg:end]) 391 if err != nil { 392 switch { 393 case srcsz > tgtsz: 394 // no compression 395 copy(dst[beg:end], src[chksum:]) 396 default: 397 return fmt.Errorf("rcompress: could not decompress LZ4 block: %w", err) 398 } 399 } 400 401 case LZMA: 402 rc, err := xz.NewReader(lr) 403 if err != nil { 404 return fmt.Errorf("rcompress: could not create LZMA reader: %w", err) 405 } 406 _, err = io.ReadFull(rc, dst[beg:end]) 407 if err != nil { 408 return fmt.Errorf("rcompress: could not decompress LZMA block: %w", err) 409 } 410 if lr.N > 0 { 411 // FIXME(sbinet): LZMA leaves some bytes on the floor... 412 _, err = lr.Read(make([]byte, lr.N)) 413 if err != nil { 414 return err 415 } 416 } 417 418 case ZSTD: 419 rc, err := zstd.NewReader(lr) 420 if err != nil { 421 return fmt.Errorf("rcompress: could not create ZSTD reader: %w", err) 422 } 423 _, err = io.ReadFull(rc, dst[beg:end]) 424 if err != nil { 425 return fmt.Errorf("rcompress: could not decompress ZSTD block: %w", err) 426 } 427 if lr.N > 0 { 428 panic("zstd extra bytes") 429 } 430 431 default: 432 panic(fmt.Errorf("rcompress: unknown compression algorithm %q", hdr[:2])) 433 } 434 beg = end 435 } 436 437 return nil 438 } 439 440 type wbuff struct { 441 p []byte // buffer of data to write on 442 c int // current position in buffer of data 443 } 444 445 func (w *wbuff) Write(p []byte) (int, error) { 446 if w.c >= len(w.p) { 447 return 0, errNoCompression 448 } 449 n := copy(w.p[w.c:], p) 450 w.c += n 451 return n, nil 452 } 453 454 var ( 455 _ io.Writer = (*wbuff)(nil) 456 )