github.com/observiq/carbon@v0.9.11-0.20200820160507-1b872e368a5e/operator/builtin/input/file/line_splitter.go (about) 1 package file 2 3 import ( 4 "bufio" 5 "bytes" 6 "regexp" 7 8 "golang.org/x/text/encoding" 9 ) 10 11 // NewLineStartSplitFunc creates a bufio.SplitFunc that splits an incoming stream into 12 // tokens that start with a match to the regex pattern provided 13 func NewLineStartSplitFunc(re *regexp.Regexp) bufio.SplitFunc { 14 return func(data []byte, atEOF bool) (advance int, token []byte, err error) { 15 firstLoc := re.FindIndex(data) 16 if firstLoc == nil { 17 return 0, nil, nil // read more data and try again. 18 } 19 firstMatchStart := firstLoc[0] 20 firstMatchEnd := firstLoc[1] 21 22 if firstMatchStart != 0 { 23 // the beginning of the file does not match the start pattern, so return a token up to the first match so we don't lose data 24 advance = firstMatchStart 25 token = data[0:firstMatchStart] 26 return 27 } 28 29 if firstMatchEnd == len(data) { 30 // the first match goes to the end of the buffer, so don't look for a second match 31 return 0, nil, nil 32 } 33 34 secondLocOffset := firstMatchEnd + 1 35 secondLoc := re.FindIndex(data[secondLocOffset:]) 36 if secondLoc == nil { 37 return 0, nil, nil // read more data and try again 38 } 39 secondMatchStart := secondLoc[0] + secondLocOffset 40 41 advance = secondMatchStart // start scanning at the beginning of the second match 42 token = data[firstMatchStart:secondMatchStart] // the token begins at the first match, and ends at the beginning of the second match 43 err = nil 44 return 45 } 46 } 47 48 // NewLineEndSplitFunc creates a bufio.SplitFunc that splits an incoming stream into 49 // tokens that end with a match to the regex pattern provided 50 func NewLineEndSplitFunc(re *regexp.Regexp) bufio.SplitFunc { 51 return func(data []byte, atEOF bool) (advance int, token []byte, err error) { 52 loc := re.FindIndex(data) 53 if loc == nil { 54 return 0, nil, nil // read more data and try again 55 } 56 57 // If the match goes up to the end of the current buffer, do another 58 // read until we can capture the entire match 59 if loc[1] == len(data)-1 && !atEOF { 60 return 0, nil, nil 61 } 62 63 advance = loc[1] 64 token = data[:loc[1]] 65 err = nil 66 return 67 } 68 } 69 70 // NewNewlineSplitFunc splits log lines by newline, just as bufio.ScanLines, but 71 // never returning an token using EOF as a terminator 72 func NewNewlineSplitFunc(encoding encoding.Encoding) (bufio.SplitFunc, error) { 73 newline, err := encodedNewline(encoding) 74 if err != nil { 75 return nil, err 76 } 77 78 carriageReturn, err := encodedCarriageReturn(encoding) 79 if err != nil { 80 return nil, err 81 } 82 83 return func(data []byte, atEOF bool) (advance int, token []byte, err error) { 84 if atEOF && len(data) == 0 { 85 return 0, nil, nil 86 } 87 88 if i := bytes.Index(data, newline); i >= 0 { 89 // We have a full newline-terminated line. 90 return i + len(newline), bytes.TrimSuffix(data[:i], carriageReturn), nil 91 } 92 93 // Request more data. 94 return 0, nil, nil 95 }, nil 96 } 97 98 func encodedNewline(encoding encoding.Encoding) ([]byte, error) { 99 out := make([]byte, 10) 100 nDst, _, err := encoding.NewEncoder().Transform(out, []byte{'\n'}, true) 101 return out[:nDst], err 102 } 103 104 func encodedCarriageReturn(encoding encoding.Encoding) ([]byte, error) { 105 out := make([]byte, 10) 106 nDst, _, err := encoding.NewEncoder().Transform(out, []byte{'\r'}, true) 107 return out[:nDst], err 108 }