go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/data/recordio/reader.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package recordio
    16  
    17  import (
    18  	"bytes"
    19  	"encoding/binary"
    20  	"fmt"
    21  	"io"
    22  )
    23  
    24  // ErrFrameTooLarge is an error that is returned if a frame that is larger than
    25  // the maximum allowed size (not including the frame header) is read.
    26  var ErrFrameTooLarge = fmt.Errorf("frame: frame size exceeds maximum")
    27  
    28  // Reader reads individual frames from a frame-formatted input Reader.
    29  type Reader interface {
    30  	// ReadFrame reads the next frame, returning the frame's size and an io.Reader
    31  	// for that frame's data. The io.Reader is restricted such that it cannot read
    32  	// past the frame.
    33  	//
    34  	// The frame must be fully read before another Reader call can be made.
    35  	// Failure to do so will cause the Reader to become unsynchronized.
    36  	ReadFrame() (int64, *io.LimitedReader, error)
    37  
    38  	// ReadFrame returns the contents of the next frame. If there are no more
    39  	// frames available, ReadFrame will return io.EOF.
    40  	ReadFrameAll() ([]byte, error)
    41  }
    42  
    43  // reader is an implementation of a Reader that uses an underlying
    44  // io.Reader and io.ByteReader to read frames.
    45  //
    46  // The io.Reader and io.ByteReader must read from the same source.
    47  type reader struct {
    48  	io.Reader
    49  	io.ByteReader
    50  
    51  	maxSize int64
    52  }
    53  
    54  // NewReader creates a new Reader which reads frame data from the
    55  // supplied Reader instance.
    56  //
    57  // If the Reader instance is also an io.ByteReader, its ReadByte method will
    58  // be used directly.
    59  func NewReader(r io.Reader, maxSize int64) Reader {
    60  	br, ok := r.(io.ByteReader)
    61  	if !ok {
    62  		br = &simpleByteReader{Reader: r}
    63  	}
    64  	return &reader{
    65  		Reader:     r,
    66  		ByteReader: br,
    67  		maxSize:    maxSize,
    68  	}
    69  }
    70  
    71  func (r *reader) ReadFrame() (int64, *io.LimitedReader, error) {
    72  	// Read the frame size.
    73  	count, err := binary.ReadUvarint(r)
    74  	if err != nil {
    75  		return 0, nil, err
    76  	}
    77  
    78  	if count > uint64(r.maxSize) {
    79  		return 0, nil, ErrFrameTooLarge
    80  	}
    81  
    82  	lr := &io.LimitedReader{
    83  		R: r.Reader,
    84  		N: int64(count),
    85  	}
    86  	return int64(count), lr, nil
    87  }
    88  
    89  func (r *reader) ReadFrameAll() ([]byte, error) {
    90  	count, fr, err := r.ReadFrame()
    91  	if err != nil {
    92  		return nil, err
    93  	}
    94  	if count == 0 {
    95  		return nil, nil
    96  	}
    97  
    98  	data := make([]byte, count)
    99  	if _, err := io.ReadFull(fr, data); err != nil {
   100  		return nil, err
   101  	}
   102  	return data, nil
   103  }
   104  
   105  // simpleByteReader implements the io.ByteReader interface for an io.Reader.
   106  type simpleByteReader struct {
   107  	io.Reader
   108  
   109  	buf [1]byte
   110  }
   111  
   112  func (r *simpleByteReader) ReadByte() (byte, error) {
   113  	_, err := r.Read(r.buf[:])
   114  	return r.buf[0], err
   115  }
   116  
   117  // Split splits the supplied buffer into its component records.
   118  //
   119  // This method implements zero-copy segmentation, so the individual records are
   120  // slices of the original data set.
   121  func Split(data []byte) (records [][]byte, err error) {
   122  	br := bytes.NewReader(data)
   123  
   124  	for br.Len() > 0 {
   125  		var size uint64
   126  		size, err = binary.ReadUvarint(br)
   127  		if err != nil {
   128  			return
   129  		}
   130  		if size > uint64(br.Len()) {
   131  			err = ErrFrameTooLarge
   132  			return
   133  		}
   134  
   135  		// Pull out the record from the original byte stream without copying.
   136  		// Casting size to an integer is safe at this point, since we have asserted
   137  		// that it is less than the remaining length in the buffer, which is an int.
   138  		offset := len(data) - br.Len()
   139  		records = append(records, data[offset:offset+int(size)])
   140  
   141  		if _, err := br.Seek(int64(size), 1); err != nil {
   142  			// Our measurements should protect us from this being an invalid seek.
   143  			panic(err)
   144  		}
   145  	}
   146  	return records, nil
   147  }