github.com/haraldrudell/parl@v0.4.176/pio/line-reader.go (about) 1 /* 2 © 2023–present Harald Rudell <harald.rudell@gmail.com> (https://haraldrudell.github.io/haraldrudell/) 3 ISC License 4 */ 5 6 package pio 7 8 import ( 9 "errors" 10 "io" 11 12 "github.com/haraldrudell/parl" 13 "github.com/haraldrudell/parl/pslices" 14 "golang.org/x/exp/slices" 15 ) 16 17 const ( 18 newLine = byte('\n') 19 notFound = -1 20 defaultAllocation = 1024 21 minBuffer = 512 22 maxLine = 1024 * 1024 23 ) 24 25 // LineReader reads a [io.Reader] stream returing one line per Read invocation 26 // - operates on efficient byte 27 // - does not implement [io.WriteTo] or [io.Closer] 28 // - alternative to using [bufio.Scanner] 29 type LineReader struct { 30 reader io.Reader 31 isEof bool 32 byts []byte 33 searchStartIndex int 34 nextNewlineIndex int 35 } 36 37 // NewLineReader reads a [io.Reader] stream returing one line per Read invocation 38 // - operates on efficient byte 39 // - does not implement [io.WriteTo] or [io.Closer] 40 // - alternative to using [bufio.Scanner] 41 func NewLineReader(reader io.Reader) (lineReader *LineReader) { 42 if reader == nil { 43 panic(parl.NilError("reader")) 44 } 45 return &LineReader{reader: reader, byts: []byte{}, nextNewlineIndex: notFound} 46 } 47 48 // Read returns a byte-sequence ending with newline if size of p is sufficient. 49 // - if size of p is too short, the text will not end with newline 50 // - if EOF without newline, text has no newline and err is io.EOF 51 func (rr *LineReader) Read(p []byte) (n int, err error) { 52 53 for { 54 55 // return a line if there is one or whatever fits p 56 if len(rr.byts) > 0 { 57 var index int 58 var isEofLast bool 59 if index = rr.nextNewlineIndex; index != notFound { 60 rr.nextNewlineIndex = notFound 61 } else if index = slices.Index(rr.byts[rr.searchStartIndex:], newLine); index != -1 { 62 index += rr.searchStartIndex + 1 // include newline 63 rr.searchStartIndex = index + 1 64 } else if rr.isEof { 65 index = len(rr.byts) // non-terminated lines prior to eof 66 isEofLast = true 67 } 68 if index != -1 { 69 if pLength := len(p); pLength < index { 70 71 // part of rr.byts 72 n = pLength 73 rr.nextNewlineIndex = index - n // remember where the next end is 74 } else { 75 76 // all of rr.byts 77 n = index 78 if isEofLast { 79 err = io.EOF 80 } 81 } 82 copy(p, rr.byts[:n]) 83 pslices.TrimLeft(&rr.byts, n) 84 rr.searchStartIndex -= n 85 return // line found return: n >= 0 err == nil or EOF 86 } 87 } 88 89 // return EOF if it is EOF 90 if rr.isEof { 91 err = io.EOF 92 return // eof return: n == 0; err == io.EOF 93 } 94 95 // if rr.byts not empty, read into byts 96 if len(rr.byts) > 0 { 97 if n, err = rr.readToByts(len(p)); err != nil { 98 return 99 } 100 continue 101 } 102 103 // read from rr.reader into p 104 if n, err = rr.reader.Read(p); err != nil { 105 if rr.isEof = errors.Is(err, io.EOF); rr.isEof { 106 err = nil 107 } else { 108 return // rr.reader.Read error return 109 } 110 } 111 if index := slices.Index(p[:n], newLine); index != -1 { 112 index++ // include newline 113 if index < n { 114 115 // save text beyond newline in rr.byts 116 rr.byts = append(rr.byts, p[index:n]...) 117 n = index 118 } 119 return // full line in p return: n > 0, err == nil 120 } 121 122 // save to byts, then read more into byts 123 rr.byts = append(rr.byts, p[:n]...) 124 rr.searchStartIndex = n // start searching for newline index 125 } 126 } 127 128 // ReadLine returns full lines, extending p as necessary 129 // - len(line) is number of bytes 130 // - max line length 1 MiB 131 // - line will end with newLine unless 1 MiB or isEOF 132 // - EOF is returned as isEOF true 133 func (rr *LineReader) ReadLine(p []byte) (line []byte, isEOF bool, err error) { 134 135 // get line from p 136 if capP := cap(p); capP == 0 { 137 line = make([]byte, defaultAllocation) 138 } else { 139 line = p[:capP] 140 } 141 142 var n int 143 defer func() { 144 line = line[:n] 145 }() 146 for { 147 148 // read appending to line 149 var n0 int 150 n0, err = rr.Read(line[n:]) 151 n += n0 152 if err != nil { 153 if isEOF = errors.Is(err, io.EOF); isEOF { 154 err = nil // io.EOF is returned in isEOF, not in err 155 } 156 return // read error or EOF return 157 } else if n0 > 0 && line[n-1] == newLine { 158 return // full line return 159 } else if cap(line) >= maxLine { 160 return // 1 MiB line return 161 } else if requiredLength := n + minBuffer; requiredLength > cap(line) { 162 newSlice := make([]byte, requiredLength+(defaultAllocation-requiredLength%defaultAllocation)%defaultAllocation) 163 copy(newSlice, line[:n]) 164 line = newSlice 165 } 166 } 167 } 168 169 // readToByts returns n and err after reading into rr.byts 170 // - lengthP is max number of bytes to be read 171 // - rr.isEof is updated. io.EOF is not returned 172 func (rr *LineReader) readToByts(lengthP int) (n int, err error) { 173 lengthByts := len(rr.byts) 174 defer func() { 175 rr.byts = rr.byts[:lengthByts+n] 176 }() 177 178 requiredLength := lengthByts + lengthP 179 if cap(rr.byts) < requiredLength { 180 newSlice := make([]byte, requiredLength) 181 copy(newSlice, rr.byts) 182 rr.byts = newSlice 183 } else { 184 rr.byts = rr.byts[:requiredLength] 185 } 186 187 if n, err = rr.reader.Read(rr.byts[lengthByts:]); err != nil { 188 if rr.isEof = errors.Is(err, io.EOF); rr.isEof { 189 err = nil 190 } 191 } 192 193 return 194 }