github.com/quay/claircore@v1.5.28/internal/zreader/zreader.go (about) 1 // Package zreader implements a transparently decompressing [io.Reader]. 2 package zreader 3 4 import ( 5 "bufio" 6 "bytes" 7 "compress/bzip2" 8 "errors" 9 "fmt" 10 "io" 11 12 "github.com/klauspost/compress/gzip" 13 "github.com/klauspost/compress/zstd" 14 ) 15 16 //go:generate go run golang.org/x/tools/cmd/stringer -type Compression 17 18 // Compression marks the scheme that the original Reader contains. 19 type Compression int 20 21 // Compression constants. 22 const ( 23 KindGzip Compression = iota 24 KindZstd 25 KindBzip2 26 KindNone 27 ) 28 29 // Max number of bytes needed to check compression headers. Populated in this 30 // package's init func to avoid needing to keep some constants manually updated. 31 var maxSz int 32 33 func init() { 34 for _, d := range detectors[:] { 35 l := len(d.Mask) 36 if l > maxSz { 37 maxSz = l 38 } 39 } 40 } 41 42 // Detector is the hook to determine if a Reader contains a certain compression 43 // scheme. 44 type detector struct { 45 // Mask is a bytemask for the bytes passed to Check. 46 Mask []byte 47 // Check reports if the byte slice is the header for a given compression 48 // scheme. 49 // 50 // The passed byte size is sliced to the same size of Mask, and has been 51 // ANDed pairwise with Mask. 52 Check func([]byte) bool 53 } 54 55 // Detectors is the array of detection hooks. 56 var detectors = [...]detector{ 57 staticHeader(gzipHeader), 58 staticHeader(zstdHeader), 59 // Bzip2 header is technically 2 bytes, but the other valid value for byte 3 60 // is bzip1-compat format and the fourth byte is required to in a certain 61 // range. 62 { 63 Mask: bytes.Repeat([]byte{0xFF}, 4), 64 Check: func(b []byte) bool { 65 l := len(bzipHeader) 66 return bytes.Equal(bzipHeader, b[:l]) && (b[l] >= '1' && b[l] <= '9') 67 }, 68 }, 69 } 70 71 // StaticHeader is a helper to create a [detector] for has a constant byte 72 // string. 73 func staticHeader(h []byte) detector { 74 return detector{ 75 Mask: bytes.Repeat([]byte{0xFF}, len(h)), 76 Check: func(b []byte) bool { 77 return bytes.Equal(h, b) 78 }, 79 } 80 } 81 82 // Some static header values. 83 var ( 84 gzipHeader = []byte{0x1F, 0x8B, 0x08} 85 zstdHeader = []byte{0x28, 0xB5, 0x2F, 0xFD} 86 bzipHeader = []byte{'B', 'Z', 'h'} 87 ) 88 89 // DetectCompression reports the compression type indicated based on the header 90 // contained in the passed byte slice. 91 // 92 // "CmpNone" is returned if all detectors report false, but it's possible that 93 // it's just a scheme unsupported by this package. 94 func detectCompression(b []byte) Compression { 95 t := make([]byte, len(b)) 96 for c, d := range detectors { 97 n, l := copy(t, b), len(d.Mask) 98 if n < l { 99 continue 100 } 101 t := t[:l] 102 for i := range d.Mask { 103 t[i] &= d.Mask[i] 104 } 105 if d.Check(t) { 106 return Compression(c) 107 } 108 } 109 return KindNone 110 } 111 112 // Reader returns an [io.ReadCloser] that transparently reads bytes compressed with 113 // one of the following schemes: 114 // 115 // - gzip 116 // - zstd 117 // - bzip2 118 // 119 // If the data does not seem to be one of these schemes, a new [io.ReadCloser] 120 // equivalent to the provided [io.Reader] is returned. 121 // The provided [io.Reader] is expected to have any necessary cleanup arranged 122 // by the caller; that is, it will not arrange for a Close method to be called 123 // if it also implements [io.Closer]. 124 func Reader(r io.Reader) (rc io.ReadCloser, err error) { 125 rc, _, err = detect(r) 126 return rc, err 127 } 128 129 // Detect follows the same procedure as [Reader], but also reports the detected 130 // compression scheme. 131 func Detect(r io.Reader) (io.ReadCloser, Compression, error) { 132 return detect(r) 133 } 134 135 // Detect (unexported) does the actual work for both [Detect] and [Reader]. 136 func detect(r io.Reader) (io.ReadCloser, Compression, error) { 137 br := bufio.NewReader(r) 138 // Populate a buffer with enough bytes to determine what header is at the 139 // start of this Reader. 140 b, err := br.Peek(maxSz) 141 switch { 142 case errors.Is(err, nil): 143 case errors.Is(err, io.ErrNoProgress): 144 return io.NopCloser(br), KindNone, nil 145 case errors.Is(err, io.EOF): 146 // Not enough bytes, just return a reader containing the bytes. 147 return io.NopCloser(bytes.NewReader(b)), KindNone, nil 148 default: 149 return nil, KindNone, err 150 } 151 152 // Run the detectors. 153 // 154 // All the return types are a little different, so they're handled in the 155 // switch arms. 156 switch c := detectCompression(b); c { 157 case KindGzip: 158 z, err := gzip.NewReader(br) 159 return z, c, err 160 case KindZstd: 161 z, err := zstd.NewReader(br) 162 if err != nil { 163 return nil, KindNone, err 164 } 165 return z.IOReadCloser(), c, nil 166 case KindBzip2: 167 z := bzip2.NewReader(br) 168 return io.NopCloser(z), c, nil 169 case KindNone: 170 // Return the reconstructed Reader. 171 default: 172 panic(fmt.Sprintf("programmer error: unknown compression type %v (bytes read: %#v)", c, b)) 173 } 174 return io.NopCloser(br), KindNone, nil 175 }