github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/utils/iohelp/read.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package iohelp 16 17 import ( 18 "bufio" 19 "encoding/binary" 20 "errors" 21 "io" 22 "sync" 23 "sync/atomic" 24 "time" 25 ) 26 27 // ErrPreservingReader is a utility class that provides methods to read from a reader where errors can be ignored and 28 // handled later. Once an error occurs subsequent calls to read won't pull data from the io.Reader, will be a noop, and 29 // the initial error can be retrieved from Err at any time. ErrPreservingReader implements the io.Reader interface 30 // itself so it can be used as any other Reader would be. 31 type ErrPreservingReader struct { 32 // R is the reader supplying the actual data. 33 R io.Reader 34 35 // Err is the first error that occurred, or nil 36 Err error 37 } 38 39 // NewErrPreservingReader creates a new instance of an ErrPreservingReader 40 func NewErrPreservingReader(r io.Reader) *ErrPreservingReader { 41 return &ErrPreservingReader{r, nil} 42 } 43 44 // Read reads data from the underlying io.Reader if no previous errors have occurred. If an error has already occurred 45 // then read will simply no-op and return 0 for the number of bytes read and the original error. 46 func (r *ErrPreservingReader) Read(p []byte) (int, error) { 47 n := 0 48 49 if r.Err == nil { 50 n, r.Err = r.R.Read(p) 51 } 52 53 return n, r.Err 54 } 55 56 // Read 57 func (r *ErrPreservingReader) ReadUint32(order binary.ByteOrder) (uint32, error) { 58 if r.Err != nil { 59 return 0, r.Err 60 } 61 62 bytes, err := ReadNBytes(r, 4) 63 64 if err != nil { 65 r.Err = err 66 return 0, r.Err 67 } 68 69 return order.Uint32(bytes), nil 70 } 71 72 // ReadNBytes will read n bytes from the given reader and return a new slice containing the data. ReadNBytes will always 73 // return n bytes, or it will return no data and an error (So if you request 100 bytes and there are only 99 left before 74 // the reader returns io.EOF you won't receive any of the data as this is considered an error as it can't read 100 bytes). 75 func ReadNBytes(r io.Reader, n int) ([]byte, error) { 76 bytes := make([]byte, n) 77 78 var err error 79 for totalRead := 0; totalRead < n; { 80 if err != nil { 81 return nil, err 82 } 83 84 read := 0 85 read, err = r.Read(bytes[totalRead:]) 86 87 totalRead += read 88 } 89 90 return bytes, nil 91 } 92 93 // ReadLineNoBuf will read a line from an unbuffered io.Reader where it considers lines to be separated by newlines (\n). 94 // The data returned will be a string with \r\n characters removed from the end, a bool which says whether the end of 95 // the stream has been reached, and any errors that have been encountered (other than eof which is treated as the end of 96 // the final line). This isn't efficient, so you shouldn't do this if you can use a buffered reader and the 97 // iohelp.ReadLine method. 98 func ReadLineNoBuf(r io.Reader) (string, bool, error) { 99 var err error 100 var dest []byte 101 var oneByte [1]byte 102 103 for { 104 var n int 105 n, err = r.Read(oneByte[:]) 106 107 if err != nil && err != io.EOF { 108 return "", true, err 109 } 110 111 if n == 1 { 112 c := oneByte[0] 113 114 if c == '\n' { 115 break 116 } 117 118 dest = append(dest, c) 119 } 120 121 if err == io.EOF { 122 break 123 } 124 } 125 126 crlfCount := 0 127 lineLen := len(dest) 128 for i := lineLen - 1; i >= 0; i-- { 129 ch := dest[i] 130 131 if ch == '\r' || ch == '\n' { 132 crlfCount++ 133 } else { 134 break 135 } 136 } 137 138 return string(dest[:lineLen-crlfCount]), err != nil, nil 139 } 140 141 // ReadLine will read a line from an unbuffered io.Reader where it considers lines to be separated by newlines (\n). 142 // The data returned will be a string with \r\n characters removed from the end, a bool which says whether the end of 143 // the stream has been reached, and any errors that have been encountered (other than eof which is treated as the end of 144 // the final line) 145 func ReadLine(br *bufio.Reader) (line string, done bool, err error) { 146 line, err = br.ReadString('\n') 147 if err != nil { 148 if err != io.EOF { 149 return "", true, err 150 } 151 } 152 153 crlfCount := 0 154 lineLen := len(line) 155 for i := lineLen - 1; i >= 0; i-- { 156 ch := line[i] 157 158 if ch == '\r' || ch == '\n' { 159 crlfCount++ 160 } else { 161 break 162 } 163 } 164 165 return line[:lineLen-crlfCount], err != nil, nil 166 } 167 168 /*func ReadLineFromJSON(br *bufio.Reader) (line map[string]interface{}, done bool, err error) { 169 line, err = br.ReadMap() 170 }*/ 171 172 // ErrThroughput is the error that is returned by ReadWithMinThroughput if the throughput drops below the threshold 173 var ErrThroughput = errors.New("throughput below minimum allowable") 174 175 // MinThroughputCheckParams defines the miminimum throughput, how often it should be checked, and what the time window 176 // size is 177 type MinThroughputCheckParams struct { 178 // MinBytesPerSec is the minimum throughput. If ReadWithMinThroughput drops below this value for the most recent 179 // time window then it will fail. 180 MinBytesPerSec int64 181 182 // CheckInterval how often should the throughput be checked 183 CheckInterval time.Duration 184 185 // NumIntervals defines the number of intervals that should be considered when looking at the throughput. 186 // NumIntervals*CheckInterval defines the window size 187 NumIntervals int 188 } 189 190 type datapoint struct { 191 ts time.Time 192 val int64 193 } 194 195 type datapoints []datapoint 196 197 // getThroughput returns the throughput for the most recent time window 198 func (initialDps datapoints) getThroughput(duration time.Duration) (datapoints, int64) { 199 dps := initialDps 200 now := time.Now() 201 cutoff := now.Add(-duration) 202 203 // restrict datapoints to datapoints within the time window 204 for len(dps) > 1 { 205 if cutoff.After(dps[0].ts) { 206 dps = dps[1:] 207 } else { 208 break 209 } 210 } 211 212 if len(dps) <= 1 { 213 return dps, 0 214 } 215 216 elapsed := now.Sub(dps[0].ts) 217 bytesRead := dps[len(dps)-1].val - dps[0].val 218 219 return dps, int64(float64(bytesRead) / elapsed.Seconds()) 220 } 221 222 // safeClose closes the provided closer recovering from any errors. 223 func safeClose(c io.Closer) { 224 defer func() { 225 recover() 226 }() 227 228 c.Close() 229 } 230 231 type readResults struct { 232 bytes []byte 233 err error 234 } 235 236 // ReadNWithProgress reads n bytes from reader r. As it reads it atomically updates the value pointed at by 237 // bytesRead. In order to cancel this read the reader should be closed. 238 func ReadNWithProgress(r io.Reader, n int64, bytesRead *int64) ([]byte, error) { 239 var totalRead int64 240 bytes := make([]byte, n) 241 242 var err error 243 for totalRead < n && err == nil { 244 var read int 245 read, err = r.Read(bytes[totalRead:]) 246 247 if err != nil && err != io.EOF { 248 break 249 } 250 251 totalRead += int64(read) 252 253 if bytesRead != nil { 254 atomic.StoreInt64(bytesRead, totalRead) 255 } 256 257 if err == io.EOF { 258 err = nil 259 if totalRead != n { 260 err = io.ErrUnexpectedEOF 261 } 262 } 263 } 264 265 return bytes[:totalRead], err 266 } 267 268 // ReadWithMinThroughput reads n bytes from reader r erroring if the throughput ever drops below the threshold 269 // defined by MinThroughputCheckParams. 270 func ReadWithMinThroughput(r io.ReadCloser, n int64, mtcParams MinThroughputCheckParams) ([]byte, error) { 271 resChan := make(chan readResults, 1) 272 defer close(resChan) 273 274 wg := &sync.WaitGroup{} 275 276 var bytesReadSync int64 277 278 wg.Add(1) 279 go func() { 280 defer wg.Done() 281 defer func() { recover() }() 282 283 bytes, err := ReadNWithProgress(r, n, &bytesReadSync) 284 res := readResults{bytes, err} 285 resChan <- res 286 }() 287 288 checkDuration := mtcParams.CheckInterval * time.Duration(mtcParams.NumIntervals) 289 ticker := time.NewTicker(mtcParams.CheckInterval) 290 defer ticker.Stop() 291 292 var points datapoints 293 var throughputErr bool 294 for !throughputErr { 295 select { 296 case res := <-resChan: 297 return res.bytes, res.err 298 case <-ticker.C: 299 } 300 301 read := atomic.LoadInt64(&bytesReadSync) 302 points = append(points, datapoint{time.Now(), read}) 303 304 if len(points) >= mtcParams.NumIntervals { 305 var bps int64 306 points, bps = points.getThroughput(checkDuration) 307 308 if bps < mtcParams.MinBytesPerSec { 309 safeClose(r) 310 throughputErr = true 311 } 312 } 313 } 314 315 wg.Wait() 316 317 select { 318 case res := <-resChan: 319 err := res.err 320 321 if throughputErr { 322 err = ErrThroughput 323 } 324 325 return res.bytes, err 326 default: 327 panic("bug. Should never reach here.") 328 } 329 }