github.com/u-root/u-root@v7.0.1-0.20200915234505-ad7babab0a8e+incompatible/cmds/exp/field/field.go

github.com/u-root/u-root@v7.0.1-0.20200915234505-ad7babab0a8e+incompatible/cmds/exp/field/field.go (about)

     1  // Copyright 2017-2018 the u-root Authors. All rights reserved
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // The `field` command reads newline-separated lines of data from either
     6  // the standard input or the specified files. It splits those lines into
     7  // a list of fields, separated by a specifiable regular expression. It
     8  // then prints all or a subset of those fields to the standard output.
     9  //
    10  // The list of output fields is specified using a grammar given in the
    11  // parsing code, below.
    12  //
    13  // Options '-F' and '-O' control the input and output separators,
    14  // respectively. The NUL character can be used as an output separator if
    15  // the '-0' is given. The '-e' and '-E' characters contol whether empty
    16  // fields are collapsed in the input; '-e' unconditionally preserves such
    17  // fields, '-E' discards them. If neither is specified, a heuristic is
    18  // applied to guess: if the input specifier is more than one character in
    19  // length, we discard empty fields, otherwise we preserve them.
    20  package main
    21  
    22  import (
    23  	"bufio"
    24  	"flag"
    25  	"fmt"
    26  	"os"
    27  	"regexp"
    28  	"strconv"
    29  	"unicode"
    30  	"unicode/utf8"
    31  )
    32  
    33  type frange struct {
    34  	begin int
    35  	end   int
    36  }
    37  
    38  const lastField = 0x7FFFFFFF
    39  const cmd = "field [ -E | -e ] [ -F regexp ] [ -0 | -O delimiter ] <field list> [file...]"
    40  
    41  var (
    42  	flags struct {
    43  		nuloutsep     bool
    44  		preserveEmpty bool
    45  		discardEmpty  bool
    46  		insep         string
    47  		outsep        string
    48  	}
    49  )
    50  
    51  func init() {
    52  	defUsage := flag.Usage
    53  	flag.Usage = func() {
    54  		os.Args[0] = cmd
    55  		defUsage()
    56  	}
    57  	flag.BoolVar(&flags.nuloutsep, "0", false, "use the NUL character ('\\0') as output separator")
    58  	flag.BoolVar(&flags.preserveEmpty, "e", false, "preseve empty input fields")
    59  	flag.BoolVar(&flags.discardEmpty, "E", false, "discard empty input fields")
    60  	flag.StringVar(&flags.insep, "F", "[ \t\v\r]+", "Input separator characters (regular expression)")
    61  	flag.StringVar(&flags.outsep, "O", " ", "Output separater (string)")
    62  }
    63  
    64  func main() {
    65  	flag.Parse()
    66  
    67  	fstate := make(map[string]bool)
    68  	flag.Visit(func(f *flag.Flag) { fstate[f.Name] = true })
    69  	if fstate["e"] && fstate["E"] {
    70  		fatal("flag conflict: -e and -E are mutually exclusive")
    71  	}
    72  	if fstate["0"] && fstate["O"] {
    73  		fatal("flag conflict: -O and -0 are mutually exclusive")
    74  	}
    75  
    76  	collapse := shouldcollapse(flags.insep)
    77  	delim, err := regexp.Compile(flags.insep)
    78  	if err != nil {
    79  		fatal("Delimiter regexp failed to parse: %v", err)
    80  	}
    81  
    82  	if flag.NArg() == 0 {
    83  		fatal("Range specifier missing")
    84  	}
    85  	rv := parseranges(flag.Arg(0))
    86  
    87  	if flag.NArg() == 1 {
    88  		process(os.Stdin, rv, delim, flags.outsep, collapse)
    89  		return
    90  	}
    91  	for i := 1; i < flag.NArg(); i++ {
    92  		filename := flag.Arg(i)
    93  		if filename == "-" {
    94  			process(os.Stdin, rv, delim, flags.outsep, collapse)
    95  			continue
    96  		}
    97  		file, err := os.Open(filename)
    98  		if err != nil {
    99  			fmt.Fprintf(os.Stderr, "Cannot open file %q: %v\n", filename, err)
   100  			continue
   101  		}
   102  		process(file, rv, delim, flags.outsep, collapse)
   103  		file.Close()
   104  	}
   105  }
   106  
   107  func shouldcollapse(s string) bool {
   108  	if flags.preserveEmpty {
   109  		return false
   110  	}
   111  	if flags.discardEmpty {
   112  		return true
   113  	}
   114  	l := utf8.RuneCountInString(s)
   115  	r, _ := utf8.DecodeRuneInString(s)
   116  	return l > 1 && (l != 2 || r != '\\')
   117  }
   118  
   119  // The field selection syntax is:
   120  //
   121  // ranges := range [[delim] range]
   122  // range := field | NUM '-' [field]
   123  // field := NUM | NF
   124  // delim := ws+ | '|' | ','
   125  // ws := c such that `isspace(c)` is true.
   126  // NF := 'NF' | 'N'
   127  // (Numbers can be negative)
   128  
   129  func parseranges(input string) []frange {
   130  	var rs []frange
   131  	lex := &lexer{input: input}
   132  	if input == "" {
   133  		fatal("Empty field range")
   134  	}
   135  	lex.next()
   136  	for {
   137  		if lex.peektype() == tokSpace {
   138  			lex.next()
   139  		}
   140  		r := parserange(lex)
   141  		rs = append(rs, r)
   142  		typ := lex.peektype()
   143  		if typ == tokEOF {
   144  			break
   145  		}
   146  		if !isdelim(typ) {
   147  			fatal("Syntax error in field list, tok = %s", lex.peektok())
   148  		}
   149  		lex.next()
   150  	}
   151  	return rs
   152  }
   153  
   154  func parserange(lex *lexer) frange {
   155  	r := frange{begin: lastField, end: lastField}
   156  	if lex.peektype() == tokEOF {
   157  		fatal("EOF at start of range")
   158  	}
   159  	fnum, typ := parsefield(lex)
   160  	r.begin = fnum
   161  	r.end = fnum
   162  	if typ == tokNF {
   163  		return r
   164  	}
   165  	typ = lex.peektype()
   166  	if typ != tokDash {
   167  		return r
   168  	}
   169  	lex.next()
   170  	r.end = lastField
   171  	typ = lex.peektype()
   172  	if typ != tokEOF && !isdelim(typ) {
   173  		r.end, _ = parsefield(lex)
   174  	}
   175  	return r
   176  }
   177  
   178  func parsefield(lex *lexer) (int, toktype) {
   179  	typ := lex.peektype()
   180  	if typ == tokNF {
   181  		lex.next()
   182  		return lastField, tokNF
   183  	}
   184  	return parsenum(lex), tokNum
   185  }
   186  
   187  func parsenum(lex *lexer) int {
   188  	tok, typ := lex.next()
   189  	if typ == tokEOF {
   190  		fatal("EOF in number parser")
   191  	}
   192  	if typ == tokNum {
   193  		num, _ := strconv.Atoi(tok)
   194  		return num
   195  	}
   196  	if typ != tokDash {
   197  		fatal("number parser error: unexpected token '%v'", tok)
   198  	}
   199  	tok, typ = lex.next()
   200  	if typ == tokEOF {
   201  		fatal("negative number parse error: unexpected EOF")
   202  	}
   203  	if typ != tokNum {
   204  		fatal("number parser error: bad lexical token '%v'", tok)
   205  	}
   206  	num, _ := strconv.Atoi(tok)
   207  	return -num
   208  }
   209  
   210  func isdelim(typ toktype) bool {
   211  	return typ == tokComma || typ == tokPipe || typ == tokSpace
   212  }
   213  
   214  type toktype int
   215  
   216  const (
   217  	tokError toktype = iota
   218  	tokEOF
   219  	tokComma
   220  	tokPipe
   221  	tokDash
   222  	tokNum
   223  	tokSpace
   224  	tokNF
   225  
   226  	eof = -1
   227  )
   228  
   229  type lexer struct {
   230  	input string
   231  	tok   string
   232  	typ   toktype
   233  	start int
   234  	pos   int
   235  	width int
   236  }
   237  
   238  func (lex *lexer) peek() (string, toktype) {
   239  	return lex.tok, lex.typ
   240  }
   241  
   242  func (lex *lexer) peektype() toktype {
   243  	return lex.typ
   244  }
   245  
   246  func (lex *lexer) peektok() string {
   247  	return lex.tok
   248  }
   249  
   250  func (lex *lexer) next() (string, toktype) {
   251  	tok, typ := lex.peek()
   252  	lex.tok, lex.typ = lex.scan()
   253  	return tok, typ
   254  }
   255  
   256  func (lex *lexer) scan() (string, toktype) {
   257  	switch r := lex.nextrune(); {
   258  	case r == eof:
   259  		return "", tokEOF
   260  	case r == ',':
   261  		return lex.token(), tokComma
   262  	case r == '|':
   263  		return lex.token(), tokPipe
   264  	case r == '-':
   265  		return lex.token(), tokDash
   266  	case r == 'N':
   267  		lex.consume()
   268  		r = lex.nextrune()
   269  		if r == 'F' {
   270  			lex.consume()
   271  		}
   272  		lex.ignore()
   273  		return lex.token(), tokNF
   274  	case unicode.IsDigit(r):
   275  		for r := lex.nextrune(); unicode.IsDigit(r); r = lex.nextrune() {
   276  			lex.consume()
   277  		}
   278  		lex.ignore()
   279  		return lex.token(), tokNum
   280  	case unicode.IsSpace(r):
   281  		for r := lex.nextrune(); unicode.IsSpace(r); r = lex.nextrune() {
   282  			lex.consume()
   283  		}
   284  		lex.ignore()
   285  		return lex.token(), tokSpace
   286  	default:
   287  		fatal("Lexical error at character '%v'", r)
   288  	}
   289  	return "", tokError
   290  }
   291  
   292  func (lex *lexer) nextrune() (r rune) {
   293  	if lex.pos >= len(lex.input) {
   294  		lex.width = 0
   295  		return eof
   296  	}
   297  	r, lex.width = utf8.DecodeRuneInString(lex.input[lex.pos:])
   298  	return r
   299  }
   300  
   301  func (lex *lexer) consume() {
   302  	lex.pos += lex.width
   303  	lex.width = 0
   304  }
   305  
   306  func (lex *lexer) ignore() {
   307  	lex.width = 0
   308  }
   309  
   310  func (lex *lexer) token() string {
   311  	lex.consume()
   312  	tok := lex.input[lex.start:lex.pos]
   313  	lex.start = lex.pos
   314  	return tok
   315  }
   316  
   317  func process(file *os.File, rv []frange, delim *regexp.Regexp, outsep string, collapse bool) {
   318  	scanner := bufio.NewScanner(file)
   319  	for scanner.Scan() {
   320  		prefix := ""
   321  		printed := false
   322  		line := scanner.Text()
   323  		fields := split(line, delim, collapse)
   324  		for _, r := range rv {
   325  			begin, end := r.begin, r.end
   326  			switch {
   327  			case begin == 0:
   328  				pprefix(prefix)
   329  				prefix = outsep
   330  				fmt.Print(line)
   331  				printed = true
   332  			case begin == lastField:
   333  				begin = len(fields) - 1
   334  			case begin < 0:
   335  				begin += len(fields)
   336  			default:
   337  				begin--
   338  			}
   339  			if end < 0 {
   340  				end += len(fields) + 1
   341  			}
   342  			if begin < 0 || end < 0 || end < begin || len(fields) < begin {
   343  				continue
   344  			}
   345  			for i := begin; i < end && i < len(fields); i++ {
   346  				pprefix(prefix)
   347  				prefix = outsep
   348  				fmt.Print(fields[i])
   349  				printed = true
   350  			}
   351  		}
   352  		if printed || !collapse {
   353  			fmt.Println()
   354  		}
   355  	}
   356  	err := scanner.Err()
   357  	if err != nil {
   358  		fmt.Fprintln(os.Stderr, err)
   359  	}
   360  }
   361  
   362  func split(s string, delim *regexp.Regexp, collapse bool) []string {
   363  	sv := delim.Split(s, -1)
   364  	if !collapse {
   365  		return sv
   366  	}
   367  	rv := []string{}
   368  	for _, s := range sv {
   369  		if s != "" {
   370  			rv = append(rv, s)
   371  		}
   372  	}
   373  	return rv
   374  }
   375  
   376  func pprefix(prefix string) {
   377  	if prefix == "" {
   378  		return
   379  	}
   380  	if flags.nuloutsep {
   381  		fmt.Print("\x00")
   382  	} else {
   383  		fmt.Print(prefix)
   384  	}
   385  }
   386  
   387  func fatal(format string, a ...interface{}) {
   388  	fmt.Fprintf(os.Stderr, format+"\n", a...)
   389  	flag.Usage()
   390  	os.Exit(1)
   391  }