github.com/apache/arrow/go/v14@v14.0.1/internal/utils/buf_reader.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package utils
    18  
    19  import (
    20  	"bufio"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  )
    25  
    26  // bufferedReader is similar to bufio.Reader except
    27  // it will expand the buffer if necessary when asked to Peek
    28  // more bytes than are in the buffer
    29  type bufferedReader struct {
    30  	bufferSz int
    31  	buf      []byte
    32  	r, w     int
    33  	rd       io.Reader
    34  	err      error
    35  }
    36  
    37  // NewBufferedReader returns a buffered reader with similar semantics to bufio.Reader
    38  // except Peek will expand the internal buffer if needed rather than return
    39  // an error.
    40  func NewBufferedReader(rd io.Reader, sz int) *bufferedReader {
    41  	// if rd is already a buffered reader whose buffer is >= the requested size
    42  	// then just return it as is. no need to make a new object.
    43  	b, ok := rd.(*bufferedReader)
    44  	if ok && len(b.buf) >= sz {
    45  		return b
    46  	}
    47  
    48  	r := &bufferedReader{
    49  		rd: rd,
    50  	}
    51  	r.resizeBuffer(sz)
    52  	return r
    53  }
    54  
    55  func (b *bufferedReader) resetBuffer() {
    56  	if b.buf == nil {
    57  		b.buf = make([]byte, b.bufferSz)
    58  	} else if b.bufferSz > cap(b.buf) {
    59  		buf := b.buf
    60  		b.buf = make([]byte, b.bufferSz)
    61  		copy(b.buf, buf)
    62  	} else {
    63  		b.buf = b.buf[:b.bufferSz]
    64  	}
    65  }
    66  
    67  func (b *bufferedReader) resizeBuffer(newSize int) {
    68  	b.bufferSz = newSize
    69  	b.resetBuffer()
    70  }
    71  
    72  func (b *bufferedReader) fill() error {
    73  	// slide existing data to the beginning
    74  	if b.r > 0 {
    75  		copy(b.buf, b.buf[b.r:b.w])
    76  		b.w -= b.r
    77  		b.r = 0
    78  	}
    79  
    80  	if b.w >= len(b.buf) {
    81  		return fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrBufferFull)
    82  	}
    83  
    84  	n, err := io.ReadAtLeast(b.rd, b.buf[b.w:], 1)
    85  	if n < 0 {
    86  		return fmt.Errorf("arrow/bufferedreader: filling buffer: %w", bufio.ErrNegativeCount)
    87  	}
    88  
    89  	b.w += n
    90  	b.err = err
    91  	return nil
    92  }
    93  
    94  func (b *bufferedReader) readErr() error {
    95  	err := b.err
    96  	b.err = nil
    97  	return err
    98  }
    99  
   100  // Buffered returns the number of bytes currently buffered
   101  func (b *bufferedReader) Buffered() int { return b.w - b.r }
   102  
   103  // SetBufferSize resets the size of the internal buffer to the desired size.
   104  // Will return an error if newSize is <= 0 or if newSize is less than the size
   105  // of the buffered data.
   106  func (b *bufferedReader) SetBufferSize(newSize int) error {
   107  	if newSize <= 0 {
   108  		return errors.New("buffer size should be positive")
   109  	}
   110  
   111  	if b.w >= newSize {
   112  		return errors.New("cannot shrink read buffer if buffered data remains")
   113  	}
   114  
   115  	b.resizeBuffer(newSize)
   116  	return nil
   117  }
   118  
   119  // Peek will buffer and return n bytes from the underlying reader without advancing
   120  // the reader itself. If n is larger than the current buffer size, the buffer will
   121  // be expanded to accommodate the extra bytes rather than error.
   122  func (b *bufferedReader) Peek(n int) ([]byte, error) {
   123  	if n < 0 {
   124  		return nil, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount)
   125  	}
   126  
   127  	if n > len(b.buf) {
   128  		if err := b.SetBufferSize(n); err != nil {
   129  			return nil, err
   130  		}
   131  	}
   132  
   133  	for b.w-b.r < n && b.w-b.r < len(b.buf) && b.err == nil {
   134  		b.fill() // b.w-b.r < len(b.buf) => buffer is not full
   135  	}
   136  
   137  	return b.buf[b.r : b.r+n], b.readErr()
   138  }
   139  
   140  // Discard skips the next n bytes either by advancing the internal buffer
   141  // or by reading that many bytes in and throwing them away.
   142  func (b *bufferedReader) Discard(n int) (discarded int, err error) {
   143  	if n < 0 {
   144  		return 0, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount)
   145  	}
   146  
   147  	if n == 0 {
   148  		return
   149  	}
   150  
   151  	remain := n
   152  	for {
   153  		skip := b.Buffered()
   154  		if skip == 0 {
   155  			b.fill()
   156  			skip = b.Buffered()
   157  		}
   158  		if skip > remain {
   159  			skip = remain
   160  		}
   161  		b.r += skip
   162  		remain -= skip
   163  		if remain == 0 {
   164  			return n, nil
   165  		}
   166  		if b.err != nil {
   167  			return n - remain, b.readErr()
   168  		}
   169  	}
   170  }
   171  
   172  func (b *bufferedReader) Read(p []byte) (n int, err error) {
   173  	n = len(p)
   174  	if n == 0 {
   175  		if b.Buffered() > 0 {
   176  			return 0, nil
   177  		}
   178  		return 0, b.readErr()
   179  	}
   180  
   181  	if b.r == b.w {
   182  		if b.err != nil {
   183  			return 0, b.readErr()
   184  		}
   185  		if len(p) >= len(b.buf) {
   186  			// large read, empty buffer
   187  			// read directly into p to avoid extra copy
   188  			n, b.err = b.rd.Read(p)
   189  			if n < 0 {
   190  				return n, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount)
   191  			}
   192  			return n, b.readErr()
   193  		}
   194  
   195  		// one read
   196  		// don't use b.fill
   197  		b.r, b.w = 0, 0
   198  		n, b.err = b.rd.Read(b.buf)
   199  		if n < 0 {
   200  			return n, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount)
   201  		}
   202  		if n == 0 {
   203  			return 0, b.readErr()
   204  		}
   205  		b.w += n
   206  	}
   207  
   208  	// copy as much as we can
   209  	n = copy(p, b.buf[b.r:b.w])
   210  	b.r += n
   211  	return n, nil
   212  }