go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/data/recordio/reader.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package recordio 16 17 import ( 18 "bytes" 19 "encoding/binary" 20 "fmt" 21 "io" 22 ) 23 24 // ErrFrameTooLarge is an error that is returned if a frame that is larger than 25 // the maximum allowed size (not including the frame header) is read. 26 var ErrFrameTooLarge = fmt.Errorf("frame: frame size exceeds maximum") 27 28 // Reader reads individual frames from a frame-formatted input Reader. 29 type Reader interface { 30 // ReadFrame reads the next frame, returning the frame's size and an io.Reader 31 // for that frame's data. The io.Reader is restricted such that it cannot read 32 // past the frame. 33 // 34 // The frame must be fully read before another Reader call can be made. 35 // Failure to do so will cause the Reader to become unsynchronized. 36 ReadFrame() (int64, *io.LimitedReader, error) 37 38 // ReadFrame returns the contents of the next frame. If there are no more 39 // frames available, ReadFrame will return io.EOF. 40 ReadFrameAll() ([]byte, error) 41 } 42 43 // reader is an implementation of a Reader that uses an underlying 44 // io.Reader and io.ByteReader to read frames. 45 // 46 // The io.Reader and io.ByteReader must read from the same source. 47 type reader struct { 48 io.Reader 49 io.ByteReader 50 51 maxSize int64 52 } 53 54 // NewReader creates a new Reader which reads frame data from the 55 // supplied Reader instance. 56 // 57 // If the Reader instance is also an io.ByteReader, its ReadByte method will 58 // be used directly. 59 func NewReader(r io.Reader, maxSize int64) Reader { 60 br, ok := r.(io.ByteReader) 61 if !ok { 62 br = &simpleByteReader{Reader: r} 63 } 64 return &reader{ 65 Reader: r, 66 ByteReader: br, 67 maxSize: maxSize, 68 } 69 } 70 71 func (r *reader) ReadFrame() (int64, *io.LimitedReader, error) { 72 // Read the frame size. 73 count, err := binary.ReadUvarint(r) 74 if err != nil { 75 return 0, nil, err 76 } 77 78 if count > uint64(r.maxSize) { 79 return 0, nil, ErrFrameTooLarge 80 } 81 82 lr := &io.LimitedReader{ 83 R: r.Reader, 84 N: int64(count), 85 } 86 return int64(count), lr, nil 87 } 88 89 func (r *reader) ReadFrameAll() ([]byte, error) { 90 count, fr, err := r.ReadFrame() 91 if err != nil { 92 return nil, err 93 } 94 if count == 0 { 95 return nil, nil 96 } 97 98 data := make([]byte, count) 99 if _, err := io.ReadFull(fr, data); err != nil { 100 return nil, err 101 } 102 return data, nil 103 } 104 105 // simpleByteReader implements the io.ByteReader interface for an io.Reader. 106 type simpleByteReader struct { 107 io.Reader 108 109 buf [1]byte 110 } 111 112 func (r *simpleByteReader) ReadByte() (byte, error) { 113 _, err := r.Read(r.buf[:]) 114 return r.buf[0], err 115 } 116 117 // Split splits the supplied buffer into its component records. 118 // 119 // This method implements zero-copy segmentation, so the individual records are 120 // slices of the original data set. 121 func Split(data []byte) (records [][]byte, err error) { 122 br := bytes.NewReader(data) 123 124 for br.Len() > 0 { 125 var size uint64 126 size, err = binary.ReadUvarint(br) 127 if err != nil { 128 return 129 } 130 if size > uint64(br.Len()) { 131 err = ErrFrameTooLarge 132 return 133 } 134 135 // Pull out the record from the original byte stream without copying. 136 // Casting size to an integer is safe at this point, since we have asserted 137 // that it is less than the remaining length in the buffer, which is an int. 138 offset := len(data) - br.Len() 139 records = append(records, data[offset:offset+int(size)]) 140 141 if _, err := br.Seek(int64(size), 1); err != nil { 142 // Our measurements should protect us from this being an invalid seek. 143 panic(err) 144 } 145 } 146 return records, nil 147 }