gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/compressio/nocompressio.go (about)

     1  // Copyright 2023 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package compressio
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"crypto/hmac"
    21  	"crypto/sha256"
    22  	"encoding/binary"
    23  	"hash"
    24  	"io"
    25  )
    26  
    27  // nocompressio provides data storage that does not use data compression but
    28  // offers optional data integrity via SHA-256 hashing.
    29  //
    30  // The stream format is defined as follows.
    31  //
    32  // /------------------------------------------------------\
    33  // |                  data size (4-bytes)                 |
    34  // +------------------------------------------------------+
    35  // |                  data                                |
    36  // +------------------------------------------------------+
    37  // |       (optional) hash (32-bytes)                     |
    38  // +------------------------------------------------------+
    39  // |                  data size (4-bytes)                 |
    40  // +------------------------------------------------------+
    41  // |                       ......                         |
    42  // \------------------------------------------------------/
    43  //
    44  // where each hash is calculated from the following items in order
    45  //
    46  //	data
    47  //	data size
    48  
    49  // SimpleReader is a reader from uncompressed image.
    50  type SimpleReader struct {
    51  	// in is the source.
    52  	in io.Reader
    53  
    54  	// key is the key used to create hash objects.
    55  	key []byte
    56  
    57  	// h is the hash object.
    58  	h hash.Hash
    59  
    60  	// current data chunk size
    61  	chunkSize uint32
    62  
    63  	// current chunk position
    64  	done uint32
    65  }
    66  
    67  var _ io.Reader = (*SimpleReader)(nil)
    68  
    69  const (
    70  	defaultBufSize = 256 * 1024
    71  )
    72  
    73  // NewSimpleReader returns a new (uncompressed) reader. If key is non-nil, the data stream
    74  // is assumed to contain expected hash values. See package comments for
    75  // details.
    76  func NewSimpleReader(in io.Reader, key []byte) (*SimpleReader, error) {
    77  	r := &SimpleReader{
    78  		in:  bufio.NewReaderSize(in, defaultBufSize),
    79  		key: key,
    80  	}
    81  
    82  	if key != nil {
    83  		r.h = hmac.New(sha256.New, key)
    84  	}
    85  
    86  	return r, nil
    87  }
    88  
    89  // Read implements io.Reader.Read.
    90  func (r *SimpleReader) Read(p []byte) (int, error) {
    91  	var scratch [4]byte
    92  
    93  	if len(p) == 0 {
    94  		return r.in.Read(p)
    95  	}
    96  
    97  	// need next chunk?
    98  	if r.done >= r.chunkSize {
    99  		if _, err := io.ReadFull(r.in, scratch[:]); err != nil {
   100  			return 0, err
   101  		}
   102  
   103  		r.chunkSize = binary.BigEndian.Uint32(scratch[:])
   104  		r.done = 0
   105  		if r.key != nil {
   106  			r.h.Reset()
   107  		}
   108  
   109  		if r.chunkSize == 0 {
   110  			// this must not happen
   111  			return 0, io.ErrNoProgress
   112  		}
   113  	}
   114  
   115  	toRead := uint32(len(p))
   116  	// can't read more than what's left
   117  	if toRead > r.chunkSize-r.done {
   118  		toRead = r.chunkSize - r.done
   119  	}
   120  
   121  	n, err := r.in.Read(p[:toRead])
   122  	if err != nil {
   123  		if err == io.EOF {
   124  			// this only can happen if storage or data size is corrupted,
   125  			// but we have no other means to detect it earlier as we store
   126  			// hash after the data block.
   127  			return n, ErrHashMismatch
   128  		}
   129  		return n, err
   130  	}
   131  
   132  	if r.key != nil {
   133  		_, _ = r.h.Write(p[:n])
   134  	}
   135  
   136  	r.done += uint32(n)
   137  	if r.done >= r.chunkSize {
   138  		if r.key != nil {
   139  			binary.BigEndian.PutUint32(scratch[:], r.chunkSize)
   140  			r.h.Write(scratch[:4])
   141  
   142  			sum := r.h.Sum(nil)
   143  			readerSum := make([]byte, len(sum))
   144  			if _, err := io.ReadFull(r.in, readerSum); err != nil {
   145  				if err == io.EOF {
   146  					return n, io.ErrUnexpectedEOF
   147  				}
   148  				return n, err
   149  			}
   150  
   151  			if !hmac.Equal(readerSum, sum) {
   152  				return n, ErrHashMismatch
   153  			}
   154  		}
   155  
   156  		r.done = 0
   157  		r.chunkSize = 0
   158  	}
   159  
   160  	return n, nil
   161  }
   162  
   163  // SimpleWriter is a writer that does not compress.
   164  type SimpleWriter struct {
   165  	// base is the underlying writer.
   166  	base io.Writer
   167  
   168  	// out is a buffered writer.
   169  	out *bufio.Writer
   170  
   171  	// key is the key used to create hash objects.
   172  	key []byte
   173  
   174  	// closed indicates whether the file has been closed.
   175  	closed bool
   176  }
   177  
   178  var _ io.Writer = (*SimpleWriter)(nil)
   179  var _ io.Closer = (*SimpleWriter)(nil)
   180  
   181  // NewSimpleWriter returns a new non-compressing writer. If key is non-nil, hash values are
   182  // generated and written out for compressed bytes. See package comments for
   183  // details.
   184  func NewSimpleWriter(out io.Writer, key []byte) (*SimpleWriter, error) {
   185  	return &SimpleWriter{
   186  		base: out,
   187  		out:  bufio.NewWriterSize(out, defaultBufSize),
   188  		key:  key,
   189  	}, nil
   190  }
   191  
   192  // Write implements io.Writer.Write.
   193  func (w *SimpleWriter) Write(p []byte) (int, error) {
   194  	var scratch [4]byte
   195  
   196  	// Did we close already?
   197  	if w.closed {
   198  		return 0, io.ErrUnexpectedEOF
   199  	}
   200  
   201  	l := uint32(len(p))
   202  
   203  	// chunk length
   204  	binary.BigEndian.PutUint32(scratch[:], l)
   205  	if _, err := w.out.Write(scratch[:4]); err != nil {
   206  		return 0, err
   207  	}
   208  
   209  	// Write out to the stream.
   210  	n, err := w.out.Write(p)
   211  	if err != nil {
   212  		return n, err
   213  	}
   214  
   215  	if w.key != nil {
   216  		h := hmac.New(sha256.New, w.key)
   217  
   218  		// chunk data
   219  		_, _ = h.Write(p)
   220  
   221  		// chunk length
   222  		binary.BigEndian.PutUint32(scratch[:], l)
   223  		h.Write(scratch[:4])
   224  
   225  		sum := h.Sum(nil)
   226  		if _, err := io.CopyN(w.out, bytes.NewReader(sum), int64(len(sum))); err != nil {
   227  			return n, err
   228  		}
   229  	}
   230  
   231  	return n, nil
   232  }
   233  
   234  // Close implements io.Closer.Close.
   235  func (w *SimpleWriter) Close() error {
   236  	// Did we already close? After the call to Close, we always mark as
   237  	// closed, regardless of whether the flush is successful.
   238  	if w.closed {
   239  		return io.ErrUnexpectedEOF
   240  	}
   241  	w.closed = true
   242  
   243  	// Flush buffered writer
   244  	if err := w.out.Flush(); err != nil {
   245  		return err
   246  	}
   247  
   248  	// Close the underlying writer (if necessary).
   249  	if closer, ok := w.base.(io.Closer); ok {
   250  		return closer.Close()
   251  	}
   252  
   253  	w.out = nil
   254  	w.base = nil
   255  
   256  	return nil
   257  }