github.com/chipaca/snappy@v0.0.0-20210104084008-1f06296fe8ad/strutil/matchcounter.go (about)

     1  // -*- Mode: Go; indent-tabs-mode: t -*-
     2  
     3  /*
     4   * Copyright (C) 2018 Canonical Ltd
     5   *
     6   * This program is free software: you can redistribute it and/or modify
     7   * it under the terms of the GNU General Public License version 3 as
     8   * published by the Free Software Foundation.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package strutil
    21  
    22  import (
    23  	"bytes"
    24  	"regexp"
    25  )
    26  
    27  // A MatchCounter is a discarding io.Writer that retains up to N
    28  // matches to its Regexp before just counting matches.
    29  //
    30  // It does not work with regexps that cross newlines; in fact it will
    31  // probably not work if the data written isn't line-orineted.
    32  //
    33  // If Regexp is not set (or nil), it matches whole non-empty lines.
    34  type MatchCounter struct {
    35  	// Regexp to use to find matches in the stream
    36  	Regexp *regexp.Regexp
    37  	// Maximum number of matches to keep; if < 0, keep all matches
    38  	N int
    39  
    40  	count   int
    41  	matches []string
    42  	partial bytes.Buffer
    43  }
    44  
    45  func (w *MatchCounter) Write(p []byte) (int, error) {
    46  	n := len(p)
    47  	if w.partial.Len() > 0 {
    48  		idx := bytes.IndexByte(p, '\n')
    49  		if idx < 0 {
    50  			// no newline yet, carry on accumulating
    51  			w.partial.Write(p)
    52  			return n, nil
    53  		}
    54  		idx++
    55  		w.partial.Write(p[:idx])
    56  		w.check(w.partial.Bytes())
    57  		p = p[idx:]
    58  	}
    59  	w.partial.Reset()
    60  	idx := bytes.LastIndexByte(p, '\n')
    61  	if idx < 0 {
    62  		w.partial.Write(p)
    63  		return n, nil
    64  	}
    65  	idx++
    66  	w.partial.Write(p[idx:])
    67  	w.check(p[:idx])
    68  	return n, nil
    69  }
    70  
    71  func (w *MatchCounter) check(p []byte) {
    72  	if w.Regexp == nil {
    73  		for {
    74  			idx := bytes.IndexByte(p, '\n')
    75  			if idx < 0 {
    76  				return
    77  			}
    78  			if idx == 0 {
    79  				// empty line
    80  				p = p[1:]
    81  				continue
    82  			}
    83  			if w.N < 0 || len(w.matches) < w.N {
    84  				w.matches = append(w.matches, string(p[:idx]))
    85  			}
    86  			w.count++
    87  			p = p[idx+1:]
    88  		}
    89  	}
    90  	matches := w.Regexp.FindAll(p, -1)
    91  	for _, match := range matches {
    92  		if w.N >= 0 && len(w.matches) >= w.N {
    93  			break
    94  		}
    95  		w.matches = append(w.matches, string(match))
    96  	}
    97  	w.count += len(matches)
    98  }
    99  
   100  // Matches returns the first few matches, and the total number of matches seen.
   101  func (w *MatchCounter) Matches() ([]string, int) {
   102  	return w.matches, w.count
   103  }