github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/utils/iohelp/read.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package iohelp
    16  
    17  import (
    18  	"bufio"
    19  	"encoding/binary"
    20  	"errors"
    21  	"io"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  )
    26  
    27  // ErrPreservingReader is a utility class that provides methods to read from a reader where errors can be ignored and
    28  // handled later.  Once an error occurs subsequent calls to read won't pull data from the io.Reader, will be a noop, and
    29  // the initial error can be retrieved from Err at any time.  ErrPreservingReader implements the io.Reader interface
    30  // itself so it can be used as any other Reader would be.
    31  type ErrPreservingReader struct {
    32  	// R is the reader supplying the actual data.
    33  	R io.Reader
    34  
    35  	// Err is the first error that occurred, or nil
    36  	Err error
    37  }
    38  
    39  // NewErrPreservingReader creates a new instance of an ErrPreservingReader
    40  func NewErrPreservingReader(r io.Reader) *ErrPreservingReader {
    41  	return &ErrPreservingReader{r, nil}
    42  }
    43  
    44  // Read reads data from the underlying io.Reader if no previous errors have occurred.  If an error has already occurred
    45  // then read will simply no-op and return 0 for the number of bytes read and the original error.
    46  func (r *ErrPreservingReader) Read(p []byte) (int, error) {
    47  	n := 0
    48  
    49  	if r.Err == nil {
    50  		n, r.Err = r.R.Read(p)
    51  	}
    52  
    53  	return n, r.Err
    54  }
    55  
    56  // Read
    57  func (r *ErrPreservingReader) ReadUint32(order binary.ByteOrder) (uint32, error) {
    58  	if r.Err != nil {
    59  		return 0, r.Err
    60  	}
    61  
    62  	bytes, err := ReadNBytes(r, 4)
    63  
    64  	if err != nil {
    65  		r.Err = err
    66  		return 0, r.Err
    67  	}
    68  
    69  	return order.Uint32(bytes), nil
    70  }
    71  
    72  // ReadNBytes will read n bytes from the given reader and return a new slice containing the data. ReadNBytes will always
    73  // return n bytes, or it will return no data and an error (So if you request 100 bytes and there are only 99 left before
    74  // the reader returns io.EOF you won't receive any of the data as this is considered an error as it can't read 100 bytes).
    75  func ReadNBytes(r io.Reader, n int) ([]byte, error) {
    76  	bytes := make([]byte, n)
    77  
    78  	var err error
    79  	for totalRead := 0; totalRead < n; {
    80  		if err != nil {
    81  			return nil, err
    82  		}
    83  
    84  		read := 0
    85  		read, err = r.Read(bytes[totalRead:])
    86  
    87  		totalRead += read
    88  	}
    89  
    90  	return bytes, nil
    91  }
    92  
    93  // ReadLineNoBuf will read a line from an unbuffered io.Reader where it considers lines to be separated by newlines (\n).
    94  // The data returned will be a string with \r\n characters removed from the end, a bool which says whether the end of
    95  // the stream has been reached, and any errors that have been encountered (other than eof which is treated as the end of
    96  // the final line). This isn't efficient, so you shouldn't do this if you can use a buffered reader and the
    97  // iohelp.ReadLine method.
    98  func ReadLineNoBuf(r io.Reader) (string, bool, error) {
    99  	var err error
   100  	var dest []byte
   101  	var oneByte [1]byte
   102  
   103  	for {
   104  		var n int
   105  		n, err = r.Read(oneByte[:])
   106  
   107  		if err != nil && err != io.EOF {
   108  			return "", true, err
   109  		}
   110  
   111  		if n == 1 {
   112  			c := oneByte[0]
   113  
   114  			if c == '\n' {
   115  				break
   116  			}
   117  
   118  			dest = append(dest, c)
   119  		}
   120  
   121  		if err == io.EOF {
   122  			break
   123  		}
   124  	}
   125  
   126  	crlfCount := 0
   127  	lineLen := len(dest)
   128  	for i := lineLen - 1; i >= 0; i-- {
   129  		ch := dest[i]
   130  
   131  		if ch == '\r' || ch == '\n' {
   132  			crlfCount++
   133  		} else {
   134  			break
   135  		}
   136  	}
   137  
   138  	return string(dest[:lineLen-crlfCount]), err != nil, nil
   139  }
   140  
   141  // ReadLine will read a line from an unbuffered io.Reader where it considers lines to be separated by newlines (\n).
   142  // The data returned will be a string with \r\n characters removed from the end, a bool which says whether the end of
   143  // the stream has been reached, and any errors that have been encountered (other than eof which is treated as the end of
   144  // the final line)
   145  func ReadLine(br *bufio.Reader) (line string, done bool, err error) {
   146  	line, err = br.ReadString('\n')
   147  	if err != nil {
   148  		if err != io.EOF {
   149  			return "", true, err
   150  		}
   151  	}
   152  
   153  	crlfCount := 0
   154  	lineLen := len(line)
   155  	for i := lineLen - 1; i >= 0; i-- {
   156  		ch := line[i]
   157  
   158  		if ch == '\r' || ch == '\n' {
   159  			crlfCount++
   160  		} else {
   161  			break
   162  		}
   163  	}
   164  
   165  	return line[:lineLen-crlfCount], err != nil, nil
   166  }
   167  
   168  /*func ReadLineFromJSON(br *bufio.Reader) (line map[string]interface{}, done bool, err error) {
   169  	line, err = br.ReadMap()
   170  }*/
   171  
   172  // ErrThroughput is the error that is returned by ReadWithMinThroughput if the throughput drops below the threshold
   173  var ErrThroughput = errors.New("throughput below minimum allowable")
   174  
   175  // MinThroughputCheckParams defines the miminimum throughput, how often it should be checked, and what the time window
   176  // size is
   177  type MinThroughputCheckParams struct {
   178  	// MinBytesPerSec is the minimum throughput.  If ReadWithMinThroughput drops below this value for the most recent
   179  	// time window then it will fail.
   180  	MinBytesPerSec int64
   181  
   182  	// CheckInterval how often should the throughput be checked
   183  	CheckInterval time.Duration
   184  
   185  	// NumIntervals defines the number of intervals that should be considered when looking at the throughput.
   186  	// NumIntervals*CheckInterval defines the window size
   187  	NumIntervals int
   188  }
   189  
   190  type datapoint struct {
   191  	ts  time.Time
   192  	val int64
   193  }
   194  
   195  type datapoints []datapoint
   196  
   197  // getThroughput returns the throughput for the most recent time window
   198  func (initialDps datapoints) getThroughput(duration time.Duration) (datapoints, int64) {
   199  	dps := initialDps
   200  	now := time.Now()
   201  	cutoff := now.Add(-duration)
   202  
   203  	// restrict datapoints to datapoints within the time window
   204  	for len(dps) > 1 {
   205  		if cutoff.After(dps[0].ts) {
   206  			dps = dps[1:]
   207  		} else {
   208  			break
   209  		}
   210  	}
   211  
   212  	if len(dps) <= 1 {
   213  		return dps, 0
   214  	}
   215  
   216  	elapsed := now.Sub(dps[0].ts)
   217  	bytesRead := dps[len(dps)-1].val - dps[0].val
   218  
   219  	return dps, int64(float64(bytesRead) / elapsed.Seconds())
   220  }
   221  
   222  // safeClose closes the provided closer recovering from any errors.
   223  func safeClose(c io.Closer) {
   224  	defer func() {
   225  		recover()
   226  	}()
   227  
   228  	c.Close()
   229  }
   230  
   231  type readResults struct {
   232  	bytes []byte
   233  	err   error
   234  }
   235  
   236  // ReadNWithProgress reads n bytes from reader r.  As it reads it atomically updates the value pointed at by
   237  // bytesRead.  In order to cancel this read the reader should be closed.
   238  func ReadNWithProgress(r io.Reader, n int64, bytesRead *int64) ([]byte, error) {
   239  	var totalRead int64
   240  	bytes := make([]byte, n)
   241  
   242  	var err error
   243  	for totalRead < n && err == nil {
   244  		var read int
   245  		read, err = r.Read(bytes[totalRead:])
   246  
   247  		if err != nil && err != io.EOF {
   248  			break
   249  		}
   250  
   251  		totalRead += int64(read)
   252  
   253  		if bytesRead != nil {
   254  			atomic.StoreInt64(bytesRead, totalRead)
   255  		}
   256  
   257  		if err == io.EOF {
   258  			err = nil
   259  			if totalRead != n {
   260  				err = io.ErrUnexpectedEOF
   261  			}
   262  		}
   263  	}
   264  
   265  	return bytes[:totalRead], err
   266  }
   267  
   268  // ReadWithMinThroughput reads n bytes from reader r erroring if the throughput ever drops below the threshold
   269  // defined by MinThroughputCheckParams.
   270  func ReadWithMinThroughput(r io.ReadCloser, n int64, mtcParams MinThroughputCheckParams) ([]byte, error) {
   271  	resChan := make(chan readResults, 1)
   272  	defer close(resChan)
   273  
   274  	wg := &sync.WaitGroup{}
   275  
   276  	var bytesReadSync int64
   277  
   278  	wg.Add(1)
   279  	go func() {
   280  		defer wg.Done()
   281  		defer func() { recover() }()
   282  
   283  		bytes, err := ReadNWithProgress(r, n, &bytesReadSync)
   284  		res := readResults{bytes, err}
   285  		resChan <- res
   286  	}()
   287  
   288  	checkDuration := mtcParams.CheckInterval * time.Duration(mtcParams.NumIntervals)
   289  	ticker := time.NewTicker(mtcParams.CheckInterval)
   290  	defer ticker.Stop()
   291  
   292  	var points datapoints
   293  	var throughputErr bool
   294  	for !throughputErr {
   295  		select {
   296  		case res := <-resChan:
   297  			return res.bytes, res.err
   298  		case <-ticker.C:
   299  		}
   300  
   301  		read := atomic.LoadInt64(&bytesReadSync)
   302  		points = append(points, datapoint{time.Now(), read})
   303  
   304  		if len(points) >= mtcParams.NumIntervals {
   305  			var bps int64
   306  			points, bps = points.getThroughput(checkDuration)
   307  
   308  			if bps < mtcParams.MinBytesPerSec {
   309  				safeClose(r)
   310  				throughputErr = true
   311  			}
   312  		}
   313  	}
   314  
   315  	wg.Wait()
   316  
   317  	select {
   318  	case res := <-resChan:
   319  		err := res.err
   320  
   321  		if throughputErr {
   322  			err = ErrThroughput
   323  		}
   324  
   325  		return res.bytes, err
   326  	default:
   327  		panic("bug.  Should never reach here.")
   328  	}
   329  }