github.com/chipaca/snappy@v0.0.0-20210104084008-1f06296fe8ad/strutil/matchcounter.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2018 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package strutil 21 22 import ( 23 "bytes" 24 "regexp" 25 ) 26 27 // A MatchCounter is a discarding io.Writer that retains up to N 28 // matches to its Regexp before just counting matches. 29 // 30 // It does not work with regexps that cross newlines; in fact it will 31 // probably not work if the data written isn't line-orineted. 32 // 33 // If Regexp is not set (or nil), it matches whole non-empty lines. 34 type MatchCounter struct { 35 // Regexp to use to find matches in the stream 36 Regexp *regexp.Regexp 37 // Maximum number of matches to keep; if < 0, keep all matches 38 N int 39 40 count int 41 matches []string 42 partial bytes.Buffer 43 } 44 45 func (w *MatchCounter) Write(p []byte) (int, error) { 46 n := len(p) 47 if w.partial.Len() > 0 { 48 idx := bytes.IndexByte(p, '\n') 49 if idx < 0 { 50 // no newline yet, carry on accumulating 51 w.partial.Write(p) 52 return n, nil 53 } 54 idx++ 55 w.partial.Write(p[:idx]) 56 w.check(w.partial.Bytes()) 57 p = p[idx:] 58 } 59 w.partial.Reset() 60 idx := bytes.LastIndexByte(p, '\n') 61 if idx < 0 { 62 w.partial.Write(p) 63 return n, nil 64 } 65 idx++ 66 w.partial.Write(p[idx:]) 67 w.check(p[:idx]) 68 return n, nil 69 } 70 71 func (w *MatchCounter) check(p []byte) { 72 if w.Regexp == nil { 73 for { 74 idx := bytes.IndexByte(p, '\n') 75 if idx < 0 { 76 return 77 } 78 if idx == 0 { 79 // empty line 80 p = p[1:] 81 continue 82 } 83 if w.N < 0 || len(w.matches) < w.N { 84 w.matches = append(w.matches, string(p[:idx])) 85 } 86 w.count++ 87 p = p[idx+1:] 88 } 89 } 90 matches := w.Regexp.FindAll(p, -1) 91 for _, match := range matches { 92 if w.N >= 0 && len(w.matches) >= w.N { 93 break 94 } 95 w.matches = append(w.matches, string(match)) 96 } 97 w.count += len(matches) 98 } 99 100 // Matches returns the first few matches, and the total number of matches seen. 101 func (w *MatchCounter) Matches() ([]string, int) { 102 return w.matches, w.count 103 }