go-hep.org/x/hep@v0.38.1/groot/internal/rcompress/rcompress.go (about)

     1  // Copyright ©2019 The go-hep Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // rcompress provides types and functions to compress and decompress
     6  // ROOT data payloads.
     7  package rcompress // import "go-hep.org/x/hep/groot/internal/rcompress"
     8  
     9  //go:generate go tool golang.org/x/tools/cmd/stringer -type Kind
    10  
    11  import (
    12  	"encoding/binary"
    13  	"errors"
    14  	"fmt"
    15  	"io"
    16  
    17  	"github.com/klauspost/compress/flate"
    18  	"github.com/klauspost/compress/zlib"
    19  	"github.com/klauspost/compress/zstd"
    20  	"github.com/pierrec/lz4/v4"
    21  	"github.com/pierrec/xxHash/xxHash64"
    22  	"github.com/ulikunitz/xz"
    23  )
    24  
    25  // Kind specifies the compression algorithm
    26  // to be used during reading or writing ROOT files.
    27  type Kind int
    28  
    29  // constants for compression/decompression
    30  const (
    31  	Inherit              Kind = -1
    32  	UseGlobal            Kind = 0
    33  	ZLIB                 Kind = +1
    34  	LZMA                 Kind = +2
    35  	OldCompression       Kind = +3
    36  	LZ4                  Kind = +4
    37  	ZSTD                 Kind = +5
    38  	UndefinedCompression Kind = +6
    39  )
    40  
    41  const (
    42  	zstdVersion = 1 // keep in sync with klauspost/compress/zstd and ROOT
    43  	lz4Version  = 1 // keep in sync with pierrec/lz4
    44  )
    45  
    46  var (
    47  	// errNoCompression is returned when the compression algorithm
    48  	// couldn't compress the input or when the compressed output is bigger
    49  	// than the input
    50  	errNoCompression = fmt.Errorf("rcompress: no compression")
    51  )
    52  
    53  // Settings encodes the ROOT way of specifying a compression mechanism
    54  // and its compression level.
    55  type Settings struct {
    56  	Alg Kind
    57  	Lvl int
    58  }
    59  
    60  // SettingsFrom create a Settings value from the provided compression
    61  // configuration (compression algorithm and compression level), using
    62  // ROOT's encoding.
    63  func SettingsFrom(compr int32) Settings {
    64  	alg, lvl := rootCompressAlgLvl(compr)
    65  	return Settings{Alg: alg, Lvl: lvl}
    66  }
    67  
    68  // DefaultSettings is the default compression algorithm and level used
    69  // in ROOT files and trees.
    70  var DefaultSettings = Settings{Alg: ZLIB, Lvl: flate.BestSpeed}
    71  
    72  func (set Settings) Compression() int32 {
    73  	var (
    74  		lvl = set.Lvl
    75  		alg = set.Alg
    76  	)
    77  	switch {
    78  	case lvl == flate.DefaultCompression:
    79  		switch alg {
    80  		case ZLIB:
    81  			lvl = 6
    82  		case LZ4:
    83  			lvl = 1
    84  		case LZMA:
    85  			lvl = 1
    86  		case ZSTD:
    87  			lvl = 1 // FIXME(sbinet): check with ROOT-6.20.00 default
    88  		default:
    89  			panic(fmt.Errorf("rcompress: unknown compression algorithm: %v", alg))
    90  		}
    91  	case lvl == flate.BestSpeed:
    92  		if alg == ZSTD {
    93  			lvl = int(zstd.SpeedFastest)
    94  		}
    95  	case lvl == flate.BestCompression:
    96  		if alg == ZSTD {
    97  			lvl = int(zstd.SpeedBestCompression)
    98  		}
    99  	case lvl > 99:
   100  		lvl = 99
   101  	}
   102  	return int32(alg*100) + int32(lvl)
   103  
   104  }
   105  
   106  // Note: this contains ZL[src][dst] where src and dst are 3 bytes each.
   107  const HeaderSize = 9
   108  
   109  // because each zipped block contains:
   110  // - the size of the input data
   111  // - the size of the compressed data
   112  // where each size is saved on 3 bytes, the maximal size
   113  // of each block can not be bigger than 16Mb.
   114  const kMaxCompressedBlockSize = 0xffffff
   115  
   116  // kindOf returns the kind of compression algorithm.
   117  func kindOf(buf []byte) Kind {
   118  	_ = buf[HeaderSize-1] // bound-check
   119  	switch {
   120  	case buf[0] == 'Z' && buf[1] == 'L':
   121  		return ZLIB
   122  	case buf[0] == 'X' && buf[1] == 'Z':
   123  		return LZMA
   124  	case buf[0] == 'L' && buf[1] == '4':
   125  		return LZ4
   126  	case buf[0] == 'Z' && buf[1] == 'S':
   127  		return ZSTD
   128  	case buf[0] == 'C' && buf[1] == 'S':
   129  		return OldCompression
   130  	default:
   131  		return UndefinedCompression
   132  	}
   133  }
   134  
   135  func rootCompressAlgLvl(v int32) (Kind, int) {
   136  	var (
   137  		alg = Kind(v / 100)
   138  		lvl = int(v % 100)
   139  	)
   140  
   141  	return alg, lvl
   142  }
   143  
   144  // Compress compresses src, using the compression kind and level encoded into compr.
   145  // Users can provide a non-nil dst to reduce allocation.
   146  func Compress(dst, src []byte, compr int32) ([]byte, error) {
   147  	const (
   148  		blksz = kMaxCompressedBlockSize // 16Mb
   149  	)
   150  
   151  	alg, lvl := rootCompressAlgLvl(compr)
   152  
   153  	if alg == 0 || lvl == 0 || len(src) < 512 {
   154  		// no compression
   155  		return src, nil
   156  	}
   157  
   158  	var (
   159  		nblocks = len(src)/blksz + 1
   160  		cur     = 0
   161  		beg     int
   162  		end     int
   163  	)
   164  
   165  	size := len(src) + nblocks*HeaderSize
   166  	if dst == nil || len(dst) < size {
   167  		dst = append(dst, make([]byte, size-len(dst))...)
   168  	}
   169  
   170  	for beg = 0; beg < len(src); beg += blksz {
   171  		end = min(beg+blksz, len(src))
   172  		// FIXME(sbinet): split out into compressBlock{Zlib,LZ4,...}
   173  		n, err := compressBlock(alg, lvl, dst[cur:], src[beg:end])
   174  		switch err {
   175  		case nil:
   176  			cur += n
   177  		case errNoCompression:
   178  			return src, nil
   179  		default:
   180  			return nil, err
   181  		}
   182  	}
   183  
   184  	return dst[:cur], nil
   185  }
   186  
   187  func compressBlock(alg Kind, lvl int, tgt, src []byte) (int, error) {
   188  	// FIXME(sbinet): rework tgt/dst to reduce buffer allocation.
   189  
   190  	var (
   191  		err error
   192  
   193  		dst = tgt[HeaderSize:]
   194  		hdr = tgt[:HeaderSize]
   195  		buf = &wbuff{p: dst}
   196  
   197  		srcsz = int32(len(src))
   198  		dstsz int32
   199  	)
   200  
   201  	switch alg {
   202  	case ZLIB:
   203  		hdr[0] = 'Z'
   204  		hdr[1] = 'L'
   205  		hdr[2] = 8 // zlib deflated
   206  		w, err := zlib.NewWriterLevel(buf, lvl)
   207  		if err != nil {
   208  			return 0, fmt.Errorf("rcompress: could not create ZLIB compressor: %w", err)
   209  		}
   210  
   211  		_, err = w.Write(src)
   212  		if err != nil {
   213  			_ = w.Close()
   214  			return 0, fmt.Errorf("rcompress: could not write ZLIB compressed bytes: %w", err)
   215  		}
   216  		err = w.Close()
   217  		switch {
   218  		case err == nil:
   219  			// ok.
   220  		case errors.Is(err, errNoCompression):
   221  			// not compressible.
   222  			return len(src), errNoCompression
   223  		default:
   224  			return 0, fmt.Errorf("rcompress: could not close ZLIB compressor: %w", err)
   225  		}
   226  		dstsz = int32(buf.c)
   227  
   228  	case LZMA:
   229  		hdr[0] = 'X'
   230  		hdr[1] = 'Z'
   231  		cfg := xz.WriterConfig{
   232  			CheckSum: xz.CRC32,
   233  		}
   234  		if err := cfg.Verify(); err != nil {
   235  			return 0, fmt.Errorf("rcompress: could not create LZMA compressor config: %w", err)
   236  		}
   237  		w, err := cfg.NewWriter(buf)
   238  		if err != nil {
   239  			return 0, fmt.Errorf("rcompress: could not create LZMA compressor: %w", err)
   240  		}
   241  		defer w.Close()
   242  
   243  		_, err = w.Write(src)
   244  		if err != nil {
   245  			return 0, fmt.Errorf("rcompress: could not write LZMA compressed bytes: %w", err)
   246  		}
   247  
   248  		err = w.Close()
   249  		switch {
   250  		case err == nil:
   251  			// ok.
   252  		case errors.Is(err, errNoCompression):
   253  			// not-compressible.
   254  			return len(src), errNoCompression
   255  		default:
   256  			return 0, fmt.Errorf("rcompress: could not close LZMA compressor: %w", err)
   257  		}
   258  
   259  		dstsz = int32(buf.c)
   260  
   261  	case LZ4:
   262  		hdr[0] = 'L'
   263  		hdr[1] = '4'
   264  		hdr[2] = lz4Version
   265  
   266  		const chksum = 8
   267  		var room = int(float64(srcsz) * 2e-4) // lz4 needs some extra scratch space
   268  		dst := make([]byte, HeaderSize+chksum+len(src)+room)
   269  		wrk := dst[HeaderSize:]
   270  		var n int
   271  		switch {
   272  		case lvl >= 4:
   273  			if lvl > 9 {
   274  				lvl = 9
   275  			}
   276  			c := lz4.CompressorHC{Level: lz4.CompressionLevel(lvl)}
   277  			n, err = c.CompressBlock(src, wrk[chksum:])
   278  		default:
   279  			ht := make([]int, 1<<16)
   280  			n, err = lz4.CompressBlock(src, wrk[chksum:], ht)
   281  		}
   282  		if err != nil {
   283  			return 0, fmt.Errorf("rcompress: could not compress with LZ4: %w", err)
   284  		}
   285  
   286  		if n == 0 {
   287  			// not compressible.
   288  			return len(src), errNoCompression
   289  		}
   290  
   291  		wrk = wrk[:n+chksum]
   292  		binary.BigEndian.PutUint64(wrk[:chksum], xxHash64.Checksum(wrk[chksum:], 0))
   293  		dstsz = int32(n + chksum)
   294  		n = copy(buf.p, wrk)
   295  		buf.c += n
   296  
   297  	case ZSTD:
   298  		hdr[0] = 'Z'
   299  		hdr[1] = 'S'
   300  		hdr[2] = zstdVersion
   301  
   302  		w, err := zstd.NewWriter(buf, zstd.WithEncoderLevel(zstd.EncoderLevel(lvl)))
   303  		if err != nil {
   304  			return 0, fmt.Errorf("rcompress: could not create ZSTD compressor: %w", err)
   305  		}
   306  		defer w.Close()
   307  
   308  		_, err = w.Write(src)
   309  		if err != nil {
   310  			return 0, fmt.Errorf("rcompress: could not write ZSTD compressed bytes: %w", err)
   311  		}
   312  
   313  		err = w.Close()
   314  		switch {
   315  		case buf.c >= len(src) || errors.Is(err, errNoCompression):
   316  			// not compressible.
   317  			return len(src), errNoCompression
   318  		case err == nil:
   319  			// ok.
   320  		default:
   321  			return 0, fmt.Errorf("rcompress: could not close ZSTD compressor: %w", err)
   322  		}
   323  
   324  		dstsz = int32(buf.c)
   325  
   326  	case OldCompression:
   327  		return 0, fmt.Errorf("rcompress: old compression algorithm unsupported")
   328  
   329  	default:
   330  		return 0, fmt.Errorf("rcompress: unknown algorithm %d", alg)
   331  	}
   332  
   333  	if dstsz > kMaxCompressedBlockSize {
   334  		return 0, errNoCompression
   335  	}
   336  
   337  	hdr[3] = byte(dstsz)
   338  	hdr[4] = byte(dstsz >> 8)
   339  	hdr[5] = byte(dstsz >> 16)
   340  
   341  	hdr[6] = byte(srcsz)
   342  	hdr[7] = byte(srcsz >> 8)
   343  	hdr[8] = byte(srcsz >> 16)
   344  
   345  	n := len(hdr) + int(dstsz)
   346  	return n, nil
   347  }
   348  
   349  // Decompress decompresses src into dst.
   350  func Decompress(dst []byte, src io.Reader) error {
   351  	var (
   352  		beg    = 0
   353  		end    = 0
   354  		buflen = len(dst)
   355  		hdr    = make([]byte, HeaderSize)
   356  	)
   357  
   358  	for end < buflen {
   359  		_, err := io.ReadFull(src, hdr)
   360  		if err != nil {
   361  			return fmt.Errorf("rcompress: could not read compress header: %w", err)
   362  		}
   363  
   364  		_ = hdr[HeaderSize-1] // bound-check
   365  		srcsz := int64(hdr[3]) | int64(hdr[4])<<8 | int64(hdr[5])<<16
   366  		tgtsz := int64(hdr[6]) | int64(hdr[7])<<8 | int64(hdr[8])<<16
   367  		end += int(tgtsz)
   368  		lr := &io.LimitedReader{R: src, N: srcsz}
   369  		switch kindOf(hdr) {
   370  		case ZLIB:
   371  			rc, err := zlib.NewReader(lr)
   372  			if err != nil {
   373  				return fmt.Errorf("rcompress: could not create ZLIB reader: %w", err)
   374  			}
   375  			defer rc.Close()
   376  
   377  			_, err = io.ReadFull(rc, dst[beg:end])
   378  			if err != nil {
   379  				return fmt.Errorf("rcompress: could not decompress ZLIB buffer: %w", err)
   380  			}
   381  
   382  		case LZ4:
   383  			src := make([]byte, srcsz)
   384  			_, err = io.ReadFull(lr, src)
   385  			if err != nil {
   386  				return fmt.Errorf("rcompress: could not read LZ4 block: %w", err)
   387  			}
   388  			const chksum = 8
   389  			// FIXME: we skip the 32b checksum. use it!
   390  			_, err = lz4.UncompressBlock(src[chksum:], dst[beg:end])
   391  			if err != nil {
   392  				switch {
   393  				case srcsz > tgtsz:
   394  					// no compression
   395  					copy(dst[beg:end], src[chksum:])
   396  				default:
   397  					return fmt.Errorf("rcompress: could not decompress LZ4 block: %w", err)
   398  				}
   399  			}
   400  
   401  		case LZMA:
   402  			rc, err := xz.NewReader(lr)
   403  			if err != nil {
   404  				return fmt.Errorf("rcompress: could not create LZMA reader: %w", err)
   405  			}
   406  			_, err = io.ReadFull(rc, dst[beg:end])
   407  			if err != nil {
   408  				return fmt.Errorf("rcompress: could not decompress LZMA block: %w", err)
   409  			}
   410  			if lr.N > 0 {
   411  				// FIXME(sbinet): LZMA leaves some bytes on the floor...
   412  				_, err = lr.Read(make([]byte, lr.N))
   413  				if err != nil {
   414  					return err
   415  				}
   416  			}
   417  
   418  		case ZSTD:
   419  			rc, err := zstd.NewReader(lr)
   420  			if err != nil {
   421  				return fmt.Errorf("rcompress: could not create ZSTD reader: %w", err)
   422  			}
   423  			_, err = io.ReadFull(rc, dst[beg:end])
   424  			if err != nil {
   425  				return fmt.Errorf("rcompress: could not decompress ZSTD block: %w", err)
   426  			}
   427  			if lr.N > 0 {
   428  				panic("zstd extra bytes")
   429  			}
   430  
   431  		default:
   432  			panic(fmt.Errorf("rcompress: unknown compression algorithm %q", hdr[:2]))
   433  		}
   434  		beg = end
   435  	}
   436  
   437  	return nil
   438  }
   439  
   440  type wbuff struct {
   441  	p []byte // buffer of data to write on
   442  	c int    // current position in buffer of data
   443  }
   444  
   445  func (w *wbuff) Write(p []byte) (int, error) {
   446  	if w.c >= len(w.p) {
   447  		return 0, errNoCompression
   448  	}
   449  	n := copy(w.p[w.c:], p)
   450  	w.c += n
   451  	return n, nil
   452  }
   453  
   454  var (
   455  	_ io.Writer = (*wbuff)(nil)
   456  )