go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/client/butler/bundler/textParser.go

go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/client/butler/bundler/textParser.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bundler
    16  
    17  import (
    18  	"bytes"
    19  	"time"
    20  	"unicode/utf8"
    21  
    22  	"go.chromium.org/luci/logdog/api/logpb"
    23  )
    24  
    25  const (
    26  	posixNewline   = "\n"
    27  	windowsNewline = "\r\n"
    28  )
    29  
    30  var (
    31  	posixNewlineBytes = []byte(posixNewline)
    32  )
    33  
    34  // textParser is a parser implementation for the LogDog TEXT stream type.
    35  type textParser struct {
    36  	baseParser
    37  
    38  	sequence int64
    39  	buf      bytes.Buffer
    40  }
    41  
    42  var _ parser = (*textParser)(nil)
    43  
    44  func (p *textParser) nextEntry(c *constraints) (*logpb.LogEntry, error) {
    45  	limit := int64(c.limit)
    46  	ts := time.Time{}
    47  	txt := logpb.Text{}
    48  	lineCount := 0
    49  	for limit > 0 {
    50  		br := p.ViewLimit(limit)
    51  		if br.Remaining() == 0 {
    52  			// Exceeded either limit or available buffer data.
    53  			break
    54  		}
    55  
    56  		// Use the timestamp of the first data chunk.
    57  		if len(txt.Lines) == 0 {
    58  			ts, _ = p.firstChunkTime()
    59  		} else if ct, _ := p.firstChunkTime(); !ct.Equal(ts) {
    60  			// New timestamp, so need new LogEntry.
    61  			break
    62  		}
    63  
    64  		// Find the index of our delimiter.
    65  		//
    66  		// We do this using a cross-platform approach that works on POSIX systems,
    67  		// Mac (>=OSX), and Windows: we scan for "\n", then look backwards to see if
    68  		// it was preceded by "\r" (for Windows-style newlines, "\r\n").
    69  		idx := br.Index(posixNewlineBytes)
    70  
    71  		newline := ""
    72  		if idx >= 0 {
    73  			br = br.CloneLimit(idx)
    74  			newline = posixNewline
    75  		} else if !c.allowSplit {
    76  			// No delimiter within our limit, and we're not allowed to split, so we're
    77  			// done.
    78  			break
    79  		}
    80  
    81  		// Load the exportable data into our buffer.
    82  		p.buf.Reset()
    83  		p.buf.ReadFrom(br)
    84  
    85  		// Does our exportable buffer end with "\r"? If so, treat it as a possible
    86  		// Windows newline sequence.
    87  		if p.buf.Len() > 0 && p.buf.Bytes()[p.buf.Len()-1] == byte('\r') {
    88  			split := false
    89  			if newline != "" {
    90  				// "\n" => "\r\n"
    91  				newline = windowsNewline
    92  				split = true
    93  			} else {
    94  				// If we're closed and this is the last byte in the stream, it is a
    95  				// dangling "\r" and we should include it. Otherwise, leave it for the
    96  				// next round.
    97  				split = !(c.closed && int64(p.buf.Len()) == p.Len())
    98  			}
    99  
   100  			if split {
   101  				p.buf.Truncate(p.buf.Len() - 1)
   102  			}
   103  		}
   104  
   105  		partial := (idx < 0)
   106  		if !partial {
   107  			lineCount++
   108  		}
   109  
   110  		// If we didn't have a delimiter, make sure we don't terminate in the middle
   111  		// of a UTF8 character.
   112  		if partial {
   113  			count := 0
   114  			lidx := -1
   115  			b := p.buf.Bytes()
   116  			for len(b) > 0 {
   117  				r, sz := utf8.DecodeRune(b)
   118  				count += sz
   119  				if r != utf8.RuneError {
   120  					lidx = count
   121  				}
   122  				b = b[sz:]
   123  			}
   124  			if lidx < 0 {
   125  				break
   126  			}
   127  			p.buf.Truncate(lidx)
   128  		}
   129  
   130  		txt.Lines = append(txt.Lines, &logpb.Text_Line{
   131  			Value:     append([]byte(nil), p.buf.Bytes()...), // Make a copy.
   132  			Delimiter: newline,
   133  		})
   134  		p.Consume(int64(p.buf.Len() + len(newline)))
   135  		limit -= int64(p.buf.Len() + len(newline))
   136  	}
   137  
   138  	if len(txt.Lines) == 0 {
   139  		return nil, nil
   140  	}
   141  	le := p.baseLogEntry(ts)
   142  	le.Sequence = uint64(p.sequence)
   143  	le.Content = &logpb.LogEntry_Text{Text: &txt}
   144  
   145  	p.sequence += int64(lineCount)
   146  	return le, nil
   147  }