github.com/grailbio/base@v0.0.11/recordio/recordioflate/recordioflate.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package recordioflate provides the "flate" transformer. It implements flate
     6  // compression and decompression.  To use:
     7  //
     8  // - Call recordioflate.Init() when the process starts.
     9  //
    10  // - Add "flate" to WriterV2Opts.Transformer. It will compress blocks using
    11  //   flate default compression level. Setting "flate 3" will enable flate
    12  //   compression level 3.
    13  package recordioflate
    14  
    15  import (
    16  	"bytes"
    17  	"io"
    18  	"strconv"
    19  	"sync"
    20  
    21  	"github.com/grailbio/base/recordio"
    22  	"github.com/grailbio/base/recordio/recordioiov"
    23  	"github.com/klauspost/compress/flate"
    24  )
    25  
    26  // Name is the registered name of the flate transformer.
    27  const Name = "flate"
    28  
    29  func flateCompress(level int, bufs [][]byte) ([]byte, error) {
    30  	size := recordioiov.TotalBytes(bufs)
    31  	out := bytes.NewBuffer(make([]byte, 0, size))
    32  	wr, err := flate.NewWriter(out, level)
    33  	if err != nil {
    34  		return nil, err
    35  	}
    36  	for _, b := range bufs {
    37  		n, err := wr.Write(b)
    38  		if err != nil {
    39  			return nil, err
    40  		}
    41  		if n != len(b) {
    42  			panic(b)
    43  		}
    44  	}
    45  	if err := wr.Close(); err != nil {
    46  		return nil, err
    47  	}
    48  	return out.Bytes(), nil
    49  }
    50  
    51  // FlateUncompress is the uncompress transformer for flate.  This is exposed
    52  // only to read legacy files. For regular applications, adding "flate" to
    53  // ScannerOpts.Transformers will enable flate.
    54  func FlateUncompress(scratch []byte, in [][]byte) ([]byte, error) {
    55  	out := bytes.NewBuffer(scratch[:0])
    56  	r := recordioiov.NewIOVecReader(in)
    57  	frd := flate.NewReader(&r)
    58  	if _, err := io.Copy(out, frd); err != nil {
    59  		return nil, err
    60  	}
    61  	if err := frd.Close(); err != nil {
    62  		return nil, err
    63  	}
    64  	return out.Bytes(), nil
    65  }
    66  
    67  var once = sync.Once{}
    68  
    69  // Init installs the zstd transformer in recordio.  It can be called multiple
    70  // times, but 2nd and later calls have no effect.
    71  func Init() {
    72  	once.Do(func() {
    73  		recordio.RegisterTransformer(
    74  			Name,
    75  			func(config string) (recordio.TransformFunc, error) {
    76  				level := flate.DefaultCompression
    77  				if config != "" {
    78  					var err error
    79  					level, err = strconv.Atoi(config)
    80  					if err != nil {
    81  						return nil, err
    82  					}
    83  				}
    84  				return func(scratch []byte, in [][]byte) ([]byte, error) {
    85  					// TODO(saito) use scratch
    86  					return flateCompress(level, in)
    87  				}, nil
    88  			},
    89  			func(string) (recordio.TransformFunc, error) {
    90  				return FlateUncompress, nil
    91  			})
    92  	})
    93  }