github.com/quay/claircore@v1.5.28/internal/zreader/zreader.go (about)

     1  // Package zreader implements a transparently decompressing [io.Reader].
     2  package zreader
     3  
     4  import (
     5  	"bufio"
     6  	"bytes"
     7  	"compress/bzip2"
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  
    12  	"github.com/klauspost/compress/gzip"
    13  	"github.com/klauspost/compress/zstd"
    14  )
    15  
    16  //go:generate go run golang.org/x/tools/cmd/stringer -type Compression
    17  
    18  // Compression marks the scheme that the original Reader contains.
    19  type Compression int
    20  
    21  // Compression constants.
    22  const (
    23  	KindGzip Compression = iota
    24  	KindZstd
    25  	KindBzip2
    26  	KindNone
    27  )
    28  
    29  // Max number of bytes needed to check compression headers. Populated in this
    30  // package's init func to avoid needing to keep some constants manually updated.
    31  var maxSz int
    32  
    33  func init() {
    34  	for _, d := range detectors[:] {
    35  		l := len(d.Mask)
    36  		if l > maxSz {
    37  			maxSz = l
    38  		}
    39  	}
    40  }
    41  
    42  // Detector is the hook to determine if a Reader contains a certain compression
    43  // scheme.
    44  type detector struct {
    45  	// Mask is a bytemask for the bytes passed to Check.
    46  	Mask []byte
    47  	// Check reports if the byte slice is the header for a given compression
    48  	// scheme.
    49  	//
    50  	// The passed byte size is sliced to the same size of Mask, and has been
    51  	// ANDed pairwise with Mask.
    52  	Check func([]byte) bool
    53  }
    54  
    55  // Detectors is the array of detection hooks.
    56  var detectors = [...]detector{
    57  	staticHeader(gzipHeader),
    58  	staticHeader(zstdHeader),
    59  	// Bzip2 header is technically 2 bytes, but the other valid value for byte 3
    60  	// is bzip1-compat format and the fourth byte is required to in a certain
    61  	// range.
    62  	{
    63  		Mask: bytes.Repeat([]byte{0xFF}, 4),
    64  		Check: func(b []byte) bool {
    65  			l := len(bzipHeader)
    66  			return bytes.Equal(bzipHeader, b[:l]) && (b[l] >= '1' && b[l] <= '9')
    67  		},
    68  	},
    69  }
    70  
    71  // StaticHeader is a helper to create a [detector] for has a constant byte
    72  // string.
    73  func staticHeader(h []byte) detector {
    74  	return detector{
    75  		Mask: bytes.Repeat([]byte{0xFF}, len(h)),
    76  		Check: func(b []byte) bool {
    77  			return bytes.Equal(h, b)
    78  		},
    79  	}
    80  }
    81  
    82  // Some static header values.
    83  var (
    84  	gzipHeader = []byte{0x1F, 0x8B, 0x08}
    85  	zstdHeader = []byte{0x28, 0xB5, 0x2F, 0xFD}
    86  	bzipHeader = []byte{'B', 'Z', 'h'}
    87  )
    88  
    89  // DetectCompression reports the compression type indicated based on the header
    90  // contained in the passed byte slice.
    91  //
    92  // "CmpNone" is returned if all detectors report false, but it's possible that
    93  // it's just a scheme unsupported by this package.
    94  func detectCompression(b []byte) Compression {
    95  	t := make([]byte, len(b))
    96  	for c, d := range detectors {
    97  		n, l := copy(t, b), len(d.Mask)
    98  		if n < l {
    99  			continue
   100  		}
   101  		t := t[:l]
   102  		for i := range d.Mask {
   103  			t[i] &= d.Mask[i]
   104  		}
   105  		if d.Check(t) {
   106  			return Compression(c)
   107  		}
   108  	}
   109  	return KindNone
   110  }
   111  
   112  // Reader returns an [io.ReadCloser] that transparently reads bytes compressed with
   113  // one of the following schemes:
   114  //
   115  //   - gzip
   116  //   - zstd
   117  //   - bzip2
   118  //
   119  // If the data does not seem to be one of these schemes, a new [io.ReadCloser]
   120  // equivalent to the provided [io.Reader] is returned.
   121  // The provided [io.Reader] is expected to have any necessary cleanup arranged
   122  // by the caller; that is, it will not arrange for a Close method to be called
   123  // if it also implements [io.Closer].
   124  func Reader(r io.Reader) (rc io.ReadCloser, err error) {
   125  	rc, _, err = detect(r)
   126  	return rc, err
   127  }
   128  
   129  // Detect follows the same procedure as [Reader], but also reports the detected
   130  // compression scheme.
   131  func Detect(r io.Reader) (io.ReadCloser, Compression, error) {
   132  	return detect(r)
   133  }
   134  
   135  // Detect (unexported) does the actual work for both [Detect] and [Reader].
   136  func detect(r io.Reader) (io.ReadCloser, Compression, error) {
   137  	br := bufio.NewReader(r)
   138  	// Populate a buffer with enough bytes to determine what header is at the
   139  	// start of this Reader.
   140  	b, err := br.Peek(maxSz)
   141  	switch {
   142  	case errors.Is(err, nil):
   143  	case errors.Is(err, io.ErrNoProgress):
   144  		return io.NopCloser(br), KindNone, nil
   145  	case errors.Is(err, io.EOF):
   146  		// Not enough bytes, just return a reader containing the bytes.
   147  		return io.NopCloser(bytes.NewReader(b)), KindNone, nil
   148  	default:
   149  		return nil, KindNone, err
   150  	}
   151  
   152  	// Run the detectors.
   153  	//
   154  	// All the return types are a little different, so they're handled in the
   155  	// switch arms.
   156  	switch c := detectCompression(b); c {
   157  	case KindGzip:
   158  		z, err := gzip.NewReader(br)
   159  		return z, c, err
   160  	case KindZstd:
   161  		z, err := zstd.NewReader(br)
   162  		if err != nil {
   163  			return nil, KindNone, err
   164  		}
   165  		return z.IOReadCloser(), c, nil
   166  	case KindBzip2:
   167  		z := bzip2.NewReader(br)
   168  		return io.NopCloser(z), c, nil
   169  	case KindNone:
   170  		// Return the reconstructed Reader.
   171  	default:
   172  		panic(fmt.Sprintf("programmer error: unknown compression type %v (bytes read: %#v)", c, b))
   173  	}
   174  	return io.NopCloser(br), KindNone, nil
   175  }