go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/client/butler/bundler/textParser.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bundler 16 17 import ( 18 "bytes" 19 "time" 20 "unicode/utf8" 21 22 "go.chromium.org/luci/logdog/api/logpb" 23 ) 24 25 const ( 26 posixNewline = "\n" 27 windowsNewline = "\r\n" 28 ) 29 30 var ( 31 posixNewlineBytes = []byte(posixNewline) 32 ) 33 34 // textParser is a parser implementation for the LogDog TEXT stream type. 35 type textParser struct { 36 baseParser 37 38 sequence int64 39 buf bytes.Buffer 40 } 41 42 var _ parser = (*textParser)(nil) 43 44 func (p *textParser) nextEntry(c *constraints) (*logpb.LogEntry, error) { 45 limit := int64(c.limit) 46 ts := time.Time{} 47 txt := logpb.Text{} 48 lineCount := 0 49 for limit > 0 { 50 br := p.ViewLimit(limit) 51 if br.Remaining() == 0 { 52 // Exceeded either limit or available buffer data. 53 break 54 } 55 56 // Use the timestamp of the first data chunk. 57 if len(txt.Lines) == 0 { 58 ts, _ = p.firstChunkTime() 59 } else if ct, _ := p.firstChunkTime(); !ct.Equal(ts) { 60 // New timestamp, so need new LogEntry. 61 break 62 } 63 64 // Find the index of our delimiter. 65 // 66 // We do this using a cross-platform approach that works on POSIX systems, 67 // Mac (>=OSX), and Windows: we scan for "\n", then look backwards to see if 68 // it was preceded by "\r" (for Windows-style newlines, "\r\n"). 69 idx := br.Index(posixNewlineBytes) 70 71 newline := "" 72 if idx >= 0 { 73 br = br.CloneLimit(idx) 74 newline = posixNewline 75 } else if !c.allowSplit { 76 // No delimiter within our limit, and we're not allowed to split, so we're 77 // done. 78 break 79 } 80 81 // Load the exportable data into our buffer. 82 p.buf.Reset() 83 p.buf.ReadFrom(br) 84 85 // Does our exportable buffer end with "\r"? If so, treat it as a possible 86 // Windows newline sequence. 87 if p.buf.Len() > 0 && p.buf.Bytes()[p.buf.Len()-1] == byte('\r') { 88 split := false 89 if newline != "" { 90 // "\n" => "\r\n" 91 newline = windowsNewline 92 split = true 93 } else { 94 // If we're closed and this is the last byte in the stream, it is a 95 // dangling "\r" and we should include it. Otherwise, leave it for the 96 // next round. 97 split = !(c.closed && int64(p.buf.Len()) == p.Len()) 98 } 99 100 if split { 101 p.buf.Truncate(p.buf.Len() - 1) 102 } 103 } 104 105 partial := (idx < 0) 106 if !partial { 107 lineCount++ 108 } 109 110 // If we didn't have a delimiter, make sure we don't terminate in the middle 111 // of a UTF8 character. 112 if partial { 113 count := 0 114 lidx := -1 115 b := p.buf.Bytes() 116 for len(b) > 0 { 117 r, sz := utf8.DecodeRune(b) 118 count += sz 119 if r != utf8.RuneError { 120 lidx = count 121 } 122 b = b[sz:] 123 } 124 if lidx < 0 { 125 break 126 } 127 p.buf.Truncate(lidx) 128 } 129 130 txt.Lines = append(txt.Lines, &logpb.Text_Line{ 131 Value: append([]byte(nil), p.buf.Bytes()...), // Make a copy. 132 Delimiter: newline, 133 }) 134 p.Consume(int64(p.buf.Len() + len(newline))) 135 limit -= int64(p.buf.Len() + len(newline)) 136 } 137 138 if len(txt.Lines) == 0 { 139 return nil, nil 140 } 141 le := p.baseLogEntry(ts) 142 le.Sequence = uint64(p.sequence) 143 le.Content = &logpb.LogEntry_Text{Text: &txt} 144 145 p.sequence += int64(lineCount) 146 return le, nil 147 }