github.com/haraldrudell/parl@v0.4.176/pio/line-reader.go (about)

     1  /*
     2  © 2023–present Harald Rudell <harald.rudell@gmail.com> (https://haraldrudell.github.io/haraldrudell/)
     3  ISC License
     4  */
     5  
     6  package pio
     7  
     8  import (
     9  	"errors"
    10  	"io"
    11  
    12  	"github.com/haraldrudell/parl"
    13  	"github.com/haraldrudell/parl/pslices"
    14  	"golang.org/x/exp/slices"
    15  )
    16  
    17  const (
    18  	newLine           = byte('\n')
    19  	notFound          = -1
    20  	defaultAllocation = 1024
    21  	minBuffer         = 512
    22  	maxLine           = 1024 * 1024
    23  )
    24  
    25  // LineReader reads a [io.Reader] stream returing one line per Read invocation
    26  //   - operates on efficient byte
    27  //   - does not implement [io.WriteTo] or [io.Closer]
    28  //   - alternative to using [bufio.Scanner]
    29  type LineReader struct {
    30  	reader           io.Reader
    31  	isEof            bool
    32  	byts             []byte
    33  	searchStartIndex int
    34  	nextNewlineIndex int
    35  }
    36  
    37  // NewLineReader reads a [io.Reader] stream returing one line per Read invocation
    38  //   - operates on efficient byte
    39  //   - does not implement [io.WriteTo] or [io.Closer]
    40  //   - alternative to using [bufio.Scanner]
    41  func NewLineReader(reader io.Reader) (lineReader *LineReader) {
    42  	if reader == nil {
    43  		panic(parl.NilError("reader"))
    44  	}
    45  	return &LineReader{reader: reader, byts: []byte{}, nextNewlineIndex: notFound}
    46  }
    47  
    48  // Read returns a byte-sequence ending with newline if size of p is sufficient.
    49  //   - if size of p is too short, the text will not end with newline
    50  //   - if EOF without newline, text has no newline and err is io.EOF
    51  func (rr *LineReader) Read(p []byte) (n int, err error) {
    52  
    53  	for {
    54  
    55  		// return a line if there is one or whatever fits p
    56  		if len(rr.byts) > 0 {
    57  			var index int
    58  			var isEofLast bool
    59  			if index = rr.nextNewlineIndex; index != notFound {
    60  				rr.nextNewlineIndex = notFound
    61  			} else if index = slices.Index(rr.byts[rr.searchStartIndex:], newLine); index != -1 {
    62  				index += rr.searchStartIndex + 1 // include newline
    63  				rr.searchStartIndex = index + 1
    64  			} else if rr.isEof {
    65  				index = len(rr.byts) // non-terminated lines prior to eof
    66  				isEofLast = true
    67  			}
    68  			if index != -1 {
    69  				if pLength := len(p); pLength < index {
    70  
    71  					// part of rr.byts
    72  					n = pLength
    73  					rr.nextNewlineIndex = index - n // remember where the next end is
    74  				} else {
    75  
    76  					// all of rr.byts
    77  					n = index
    78  					if isEofLast {
    79  						err = io.EOF
    80  					}
    81  				}
    82  				copy(p, rr.byts[:n])
    83  				pslices.TrimLeft(&rr.byts, n)
    84  				rr.searchStartIndex -= n
    85  				return // line found return: n >= 0 err == nil or EOF
    86  			}
    87  		}
    88  
    89  		// return EOF if it is EOF
    90  		if rr.isEof {
    91  			err = io.EOF
    92  			return // eof return: n == 0; err == io.EOF
    93  		}
    94  
    95  		// if rr.byts not empty, read into byts
    96  		if len(rr.byts) > 0 {
    97  			if n, err = rr.readToByts(len(p)); err != nil {
    98  				return
    99  			}
   100  			continue
   101  		}
   102  
   103  		// read from rr.reader into p
   104  		if n, err = rr.reader.Read(p); err != nil {
   105  			if rr.isEof = errors.Is(err, io.EOF); rr.isEof {
   106  				err = nil
   107  			} else {
   108  				return // rr.reader.Read error return
   109  			}
   110  		}
   111  		if index := slices.Index(p[:n], newLine); index != -1 {
   112  			index++ // include newline
   113  			if index < n {
   114  
   115  				// save text beyond newline in rr.byts
   116  				rr.byts = append(rr.byts, p[index:n]...)
   117  				n = index
   118  			}
   119  			return // full line in p return: n > 0, err == nil
   120  		}
   121  
   122  		// save to byts, then read more into byts
   123  		rr.byts = append(rr.byts, p[:n]...)
   124  		rr.searchStartIndex = n // start searching for newline index
   125  	}
   126  }
   127  
   128  // ReadLine returns full lines, extending p as necessary
   129  //   - len(line) is number of bytes
   130  //   - max line length 1 MiB
   131  //   - line will end with newLine unless 1 MiB or isEOF
   132  //   - EOF is returned as isEOF true
   133  func (rr *LineReader) ReadLine(p []byte) (line []byte, isEOF bool, err error) {
   134  
   135  	// get line from p
   136  	if capP := cap(p); capP == 0 {
   137  		line = make([]byte, defaultAllocation)
   138  	} else {
   139  		line = p[:capP]
   140  	}
   141  
   142  	var n int
   143  	defer func() {
   144  		line = line[:n]
   145  	}()
   146  	for {
   147  
   148  		// read appending to line
   149  		var n0 int
   150  		n0, err = rr.Read(line[n:])
   151  		n += n0
   152  		if err != nil {
   153  			if isEOF = errors.Is(err, io.EOF); isEOF {
   154  				err = nil // io.EOF is returned in isEOF, not in err
   155  			}
   156  			return // read error or EOF return
   157  		} else if n0 > 0 && line[n-1] == newLine {
   158  			return // full line return
   159  		} else if cap(line) >= maxLine {
   160  			return // 1 MiB line return
   161  		} else if requiredLength := n + minBuffer; requiredLength > cap(line) {
   162  			newSlice := make([]byte, requiredLength+(defaultAllocation-requiredLength%defaultAllocation)%defaultAllocation)
   163  			copy(newSlice, line[:n])
   164  			line = newSlice
   165  		}
   166  	}
   167  }
   168  
   169  // readToByts returns n and err after reading into rr.byts
   170  //   - lengthP is max number of bytes to be read
   171  //   - rr.isEof is updated. io.EOF is not returned
   172  func (rr *LineReader) readToByts(lengthP int) (n int, err error) {
   173  	lengthByts := len(rr.byts)
   174  	defer func() {
   175  		rr.byts = rr.byts[:lengthByts+n]
   176  	}()
   177  
   178  	requiredLength := lengthByts + lengthP
   179  	if cap(rr.byts) < requiredLength {
   180  		newSlice := make([]byte, requiredLength)
   181  		copy(newSlice, rr.byts)
   182  		rr.byts = newSlice
   183  	} else {
   184  		rr.byts = rr.byts[:requiredLength]
   185  	}
   186  
   187  	if n, err = rr.reader.Read(rr.byts[lengthByts:]); err != nil {
   188  		if rr.isEof = errors.Is(err, io.EOF); rr.isEof {
   189  			err = nil
   190  		}
   191  	}
   192  
   193  	return
   194  }