
     1  // Copyright 2013-2017 the u-root Authors. All rights reserved
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     5  // Wc counts lines, words, runes, syntactically–invalid UTF codes.
     6  //
     7  // Synopsis:
     8  //     wc [OPTIONS...] [FILES]...
     9  //
    10  // Description:
    11  //     Wc counts lines, words, runes, syntactically–invalid UTF codes and bytes
    12  //     in the named files, or in the standard input if no file is named. A word
    13  //     is a maximal string of characters delimited by spaces, tabs or newlines.
    14  //     The count of runes includes invalid codes. If the optional argument is
    15  //     present, just the specified counts (lines, words, runes, broken UTF
    16  //     codes or bytes) are selected by the letters l, w, r, b, or c. Otherwise,
    17  //     lines, words and bytes (–lwc) are reported.
    18  //
    19  // Options:
    20  //     –l: count lines
    21  //     –w: count words
    22  //     –r: count runes
    23  //     –b: count broken UTF codes
    24  //     -c: count bytes
    25  //
    26  // Bugs:
    27  //     This wc differs from Plan 9's wc somewhat in word count (BSD's wc differs
    28  //     even more significantly):
    29  //
    30  //     $ unicode 0x0-0x10ffff | 9 wc -w
    31  //     2228221
    32  //     $ unicode 0x0-0x10ffff | gowc -w
    33  //     2228198
    34  //     $ unicode 0x0-0x10ffff | bsdwc -w
    35  //     2293628
    36  //
    37  //     This wc differs from Plan 9's wc significantly in bad rune count:
    38  //
    39  //     $ unicode 0x0-0x10ffff | gowc -b
    40  //     6144
    41  //     $ unicode 0x0-0x10ffff | 9 wc -b
    42  //     1966080
    43  package main
    45  import (
    46  	"bufio"
    47  	"bytes"
    48  	"flag"
    49  	"fmt"
    50  	"io"
    51  	"os"
    52  	"strings"
    53  	"unicode/utf8"
    54  )
    56  var lines = flag.Bool("l", false, "count lines")
    57  var words = flag.Bool("w", false, "count words")
    58  var runes = flag.Bool("r", false, "count runes")
    59  var broken = flag.Bool("b", false, "count broken")
    60  var chars = flag.Bool("c", false, "count bytes (include partial UTF)")
    62  type cnt struct {
    63  	nline, nword, nrune, nbadr, nchar int64
    64  }
    66  // A modified version of utf8.Valid()
    67  func invalidCount(p []byte) (n int64) {
    68  	i := 0
    69  	for i < len(p) {
    70  		if p[i] < utf8.RuneSelf {
    71  			i++
    72  		} else {
    73  			_, size := utf8.DecodeRune(p[i:])
    74  			if size == 1 {
    75  				// All valid runes of size 1 (those
    76  				// below RuneSelf) were handled above.
    77  				// This muse be a RuneError.
    78  				n++
    79  			}
    80  			i += size
    81  		}
    82  	}
    83  	return
    84  }
    86  func count(in io.Reader, fname string) (c cnt) {
    87  	b := bufio.NewReaderSize(in, 8192)
    89  	counted := false
    90  	for !counted {
    91  		line, err := b.ReadBytes('\n')
    92  		if err != nil {
    93  			if err == io.EOF {
    94  				counted = true
    95  			} else {
    96  				fmt.Fprintf(os.Stderr, "error %s: %v", fname, err)
    97  				return cnt{} // no partial counts; should perhaps quit altogether?
    98  			}
    99  		}
   100  		if !counted {
   101  			c.nline++
   102  		}
   103  		c.nword += int64(len(bytes.Fields(line)))
   104  		c.nrune += int64(utf8.RuneCount(line))
   105  		c.nchar += int64(len(line))
   106  		c.nbadr += invalidCount(line)
   107  	}
   108  	return
   109  }
   111  func report(c cnt, fname string) {
   112  	fields := []string{}
   113  	if *lines {
   114  		fields = append(fields, fmt.Sprintf("%d", c.nline))
   115  	}
   116  	if *words {
   117  		fields = append(fields, fmt.Sprintf("%d", c.nword))
   118  	}
   119  	if *runes {
   120  		fields = append(fields, fmt.Sprintf("%d", c.nrune))
   121  	}
   122  	if *broken {
   123  		fields = append(fields, fmt.Sprintf("%d", c.nbadr))
   124  	}
   125  	if *chars {
   126  		fields = append(fields, fmt.Sprintf("%d", c.nchar))
   127  	}
   128  	if fname != "" {
   129  		fields = append(fields, fname)
   130  	}
   132  	fmt.Println(strings.Join(fields, " "))
   133  }
   135  func main() {
   136  	var totals cnt
   138  	flag.Parse()
   140  	if !(*lines || *words || *runes || *broken || *chars) {
   141  		*lines, *words, *chars = true, true, true
   142  	}
   144  	if flag.NArg() == 0 {
   145  		cnt := count(os.Stdin, "")
   146  		report(cnt, "")
   147  		return
   148  	}
   150  	for _, v := range flag.Args() {
   151  		f, err := os.Open(v)
   152  		if err != nil {
   153  			fmt.Fprintf(os.Stderr, "error opening %s: %v\n", v, err)
   154  			os.Exit(1)
   155  		}
   156  		cnt := count(f, v)
   157  		totals.nline += cnt.nline
   158  		totals.nword += cnt.nword
   159  		totals.nrune += cnt.nrune
   160  		totals.nbadr += cnt.nbadr
   161  		totals.nchar += cnt.nchar
   162  		report(cnt, v)
   163  	}
   164  	if flag.NArg() > 1 {
   165  		report(totals, "total")
   166  	}
   167  }