github.com/rck/u-root@v0.0.0-20180106144920-7eb602e381bb/cmds/field/field.go (about)

     1  // The `field` command reads newline-separated lines of data from either
     2  // the standard input or the specified files. It splits those lines into
     3  // a list of fields, separated by a specifiable regular expression. It
     4  // then prints all or a subset of those fields to the standard output.
     5  //
     6  // The list of output fields is specified using a grammar given in the
     7  // parsing code, below.
     8  //
     9  // Options '-F' and '-O' control the input and output separators,
    10  // respectively. The NUL character can be used as an output separator if
    11  // the '-0' is given. The '-e' and '-E' characters contol whether empty
    12  // fields are collapsed in the input; '-e' unconditionally preserves such
    13  // fields, '-E' discards them. If neither is specified, a heuristic is
    14  // applied to guess: if the input specifier is more than one character in
    15  // length, we discard empty fields, otherwise we preserve them.
    16  package main
    17  
    18  import (
    19  	"bufio"
    20  	"flag"
    21  	"fmt"
    22  	"os"
    23  	"regexp"
    24  	"strconv"
    25  	"unicode"
    26  	"unicode/utf8"
    27  )
    28  
    29  type frange struct {
    30  	begin int
    31  	end   int
    32  }
    33  
    34  const lastField = 0x7FFFFFFF
    35  const cmd = "field [ -E | -e ] [ -F regexp ] [ -0 | -O delimiter ] <field list> [file...]"
    36  
    37  var (
    38  	flags struct {
    39  		nuloutsep     bool
    40  		preserveEmpty bool
    41  		discardEmpty  bool
    42  		insep         string
    43  		outsep        string
    44  	}
    45  )
    46  
    47  func init() {
    48  	defUsage := flag.Usage
    49  	flag.Usage = func() {
    50  		os.Args[0] = cmd
    51  		defUsage()
    52  	}
    53  	flag.BoolVar(&flags.nuloutsep, "0", false, "use the NUL character ('\\0') as output separator")
    54  	flag.BoolVar(&flags.preserveEmpty, "e", false, "preseve empty input fields")
    55  	flag.BoolVar(&flags.discardEmpty, "E", false, "discard empty input fields")
    56  	flag.StringVar(&flags.insep, "F", "[ \t\v\r]+", "Input separator characters (regular expression)")
    57  	flag.StringVar(&flags.outsep, "O", " ", "Output separater (string)")
    58  }
    59  
    60  func main() {
    61  	flag.Parse()
    62  
    63  	fstate := make(map[string]bool)
    64  	flag.Visit(func(f *flag.Flag) { fstate[f.Name] = true })
    65  	if fstate["e"] && fstate["E"] {
    66  		fatal("flag conflict: -e and -E are mutually exclusive")
    67  	}
    68  	if fstate["0"] && fstate["O"] {
    69  		fatal("flag conflict: -O and -0 are mutually exclusive")
    70  	}
    71  
    72  	collapse := shouldcollapse(flags.insep)
    73  	delim, err := regexp.Compile(flags.insep)
    74  	if err != nil {
    75  		fatal("Delimiter regexp failed to parse: %v", err)
    76  	}
    77  
    78  	if flag.NArg() == 0 {
    79  		fatal("Range specifier missing")
    80  	}
    81  	rv := parseranges(flag.Arg(0))
    82  
    83  	if flag.NArg() == 1 {
    84  		process(os.Stdin, rv, delim, flags.outsep, collapse)
    85  		return
    86  	}
    87  	for i := 1; i < flag.NArg(); i++ {
    88  		filename := flag.Arg(i)
    89  		if filename == "-" {
    90  			process(os.Stdin, rv, delim, flags.outsep, collapse)
    91  			continue
    92  		}
    93  		file, err := os.Open(filename)
    94  		if err != nil {
    95  			fmt.Fprintf(os.Stderr, "Cannot open file %q: %v\n", filename, err)
    96  			continue
    97  		}
    98  		process(file, rv, delim, flags.outsep, collapse)
    99  		file.Close()
   100  	}
   101  }
   102  
   103  func shouldcollapse(s string) bool {
   104  	if flags.preserveEmpty {
   105  		return false
   106  	}
   107  	if flags.discardEmpty {
   108  		return true
   109  	}
   110  	l := utf8.RuneCountInString(s)
   111  	r, _ := utf8.DecodeRuneInString(s)
   112  	return l > 1 && (l != 2 || r != '\\')
   113  }
   114  
   115  // The field selection syntax is:
   116  //
   117  // ranges := range [[delim] range]
   118  // range := field | NUM '-' [field]
   119  // field := NUM | NF
   120  // delim := ws+ | '|' | ','
   121  // ws := c such that `isspace(c)` is true.
   122  // NF := 'NF' | 'N'
   123  // (Numbers can be negative)
   124  
   125  func parseranges(input string) []frange {
   126  	var rs []frange
   127  	lex := &lexer{input: input}
   128  	if input == "" {
   129  		fatal("Empty field range")
   130  	}
   131  	lex.next()
   132  	for {
   133  		if lex.peektype() == tokSpace {
   134  			lex.next()
   135  		}
   136  		r := parserange(lex)
   137  		rs = append(rs, r)
   138  		typ := lex.peektype()
   139  		if typ == tokEOF {
   140  			break
   141  		}
   142  		if !isdelim(typ) {
   143  			fatal("Syntax error in field list, tok = %s", lex.peektok())
   144  		}
   145  		lex.next()
   146  	}
   147  	return rs
   148  }
   149  
   150  func parserange(lex *lexer) frange {
   151  	r := frange{begin: lastField, end: lastField}
   152  	if lex.peektype() == tokEOF {
   153  		fatal("EOF at start of range")
   154  	}
   155  	fnum, typ := parsefield(lex)
   156  	r.begin = fnum
   157  	r.end = fnum
   158  	if typ == tokNF {
   159  		return r
   160  	}
   161  	typ = lex.peektype()
   162  	if typ != tokDash {
   163  		return r
   164  	}
   165  	lex.next()
   166  	r.end = lastField
   167  	typ = lex.peektype()
   168  	if typ != tokEOF && !isdelim(typ) {
   169  		r.end, _ = parsefield(lex)
   170  	}
   171  	return r
   172  }
   173  
   174  func parsefield(lex *lexer) (int, toktype) {
   175  	typ := lex.peektype()
   176  	if typ == tokNF {
   177  		lex.next()
   178  		return lastField, tokNF
   179  	}
   180  	return parsenum(lex), tokNum
   181  }
   182  
   183  func parsenum(lex *lexer) int {
   184  	tok, typ := lex.next()
   185  	if typ == tokEOF {
   186  		fatal("EOF in number parser")
   187  	}
   188  	if typ == tokNum {
   189  		num, _ := strconv.Atoi(tok)
   190  		return num
   191  	}
   192  	if typ != tokDash {
   193  		fatal("number parser error: unexpected token '%v'", tok)
   194  	}
   195  	tok, typ = lex.next()
   196  	if typ == tokEOF {
   197  		fatal("negative number parse error: unexpected EOF")
   198  	}
   199  	if typ != tokNum {
   200  		fatal("number parser error: bad lexical token '%v'", tok)
   201  	}
   202  	num, _ := strconv.Atoi(tok)
   203  	return -num
   204  }
   205  
   206  func isdelim(typ toktype) bool {
   207  	return typ == tokComma || typ == tokPipe || typ == tokSpace
   208  }
   209  
   210  type toktype int
   211  
   212  const (
   213  	tokError toktype = iota
   214  	tokEOF
   215  	tokComma
   216  	tokPipe
   217  	tokDash
   218  	tokNum
   219  	tokSpace
   220  	tokNF
   221  
   222  	eof = -1
   223  )
   224  
   225  type lexer struct {
   226  	input string
   227  	tok   string
   228  	typ   toktype
   229  	start int
   230  	pos   int
   231  	width int
   232  }
   233  
   234  func (lex *lexer) peek() (string, toktype) {
   235  	return lex.tok, lex.typ
   236  }
   237  
   238  func (lex *lexer) peektype() toktype {
   239  	return lex.typ
   240  }
   241  
   242  func (lex *lexer) peektok() string {
   243  	return lex.tok
   244  }
   245  
   246  func (lex *lexer) next() (string, toktype) {
   247  	tok, typ := lex.peek()
   248  	lex.tok, lex.typ = lex.scan()
   249  	return tok, typ
   250  }
   251  
   252  func (lex *lexer) scan() (string, toktype) {
   253  	switch r := lex.nextrune(); {
   254  	case r == eof:
   255  		return "", tokEOF
   256  	case r == ',':
   257  		return lex.token(), tokComma
   258  	case r == '|':
   259  		return lex.token(), tokPipe
   260  	case r == '-':
   261  		return lex.token(), tokDash
   262  	case r == 'N':
   263  		lex.consume()
   264  		r = lex.nextrune()
   265  		if r == 'F' {
   266  			lex.consume()
   267  		}
   268  		lex.ignore()
   269  		return lex.token(), tokNF
   270  	case unicode.IsDigit(r):
   271  		for r := lex.nextrune(); unicode.IsDigit(r); r = lex.nextrune() {
   272  			lex.consume()
   273  		}
   274  		lex.ignore()
   275  		return lex.token(), tokNum
   276  	case unicode.IsSpace(r):
   277  		for r := lex.nextrune(); unicode.IsSpace(r); r = lex.nextrune() {
   278  			lex.consume()
   279  		}
   280  		lex.ignore()
   281  		return lex.token(), tokSpace
   282  	default:
   283  		fatal("Lexical error at character '%v'", r)
   284  	}
   285  	return "", tokError
   286  }
   287  
   288  func (lex *lexer) nextrune() (r rune) {
   289  	if lex.pos >= len(lex.input) {
   290  		lex.width = 0
   291  		return eof
   292  	}
   293  	r, lex.width = utf8.DecodeRuneInString(lex.input[lex.pos:])
   294  	return r
   295  }
   296  
   297  func (lex *lexer) consume() {
   298  	lex.pos += lex.width
   299  	lex.width = 0
   300  }
   301  
   302  func (lex *lexer) ignore() {
   303  	lex.width = 0
   304  }
   305  
   306  func (lex *lexer) token() string {
   307  	lex.consume()
   308  	tok := lex.input[lex.start:lex.pos]
   309  	lex.start = lex.pos
   310  	return tok
   311  }
   312  
   313  func process(file *os.File, rv []frange, delim *regexp.Regexp, outsep string, collapse bool) {
   314  	scanner := bufio.NewScanner(file)
   315  	for scanner.Scan() {
   316  		prefix := ""
   317  		printed := false
   318  		line := scanner.Text()
   319  		fields := split(line, delim, collapse)
   320  		for _, r := range rv {
   321  			begin, end := r.begin, r.end
   322  			switch {
   323  			case begin == 0:
   324  				pprefix(prefix)
   325  				prefix = outsep
   326  				fmt.Print(line)
   327  				printed = true
   328  			case begin == lastField:
   329  				begin = len(fields) - 1
   330  			case begin < 0:
   331  				begin += len(fields)
   332  			default:
   333  				begin--
   334  			}
   335  			if end < 0 {
   336  				end += len(fields) + 1
   337  			}
   338  			if begin < 0 || end < 0 || end < begin || len(fields) < begin {
   339  				continue
   340  			}
   341  			for i := begin; i < end && i < len(fields); i++ {
   342  				pprefix(prefix)
   343  				prefix = outsep
   344  				fmt.Print(fields[i])
   345  				printed = true
   346  			}
   347  		}
   348  		if printed || !collapse {
   349  			fmt.Println()
   350  		}
   351  	}
   352  	err := scanner.Err()
   353  	if err != nil {
   354  		fmt.Fprintln(os.Stderr, err)
   355  	}
   356  }
   357  
   358  func split(s string, delim *regexp.Regexp, collapse bool) []string {
   359  	sv := delim.Split(s, -1)
   360  	if !collapse {
   361  		return sv
   362  	}
   363  	rv := []string{}
   364  	for _, s := range sv {
   365  		if s != "" {
   366  			rv = append(rv, s)
   367  		}
   368  	}
   369  	return rv
   370  }
   371  
   372  func pprefix(prefix string) {
   373  	if prefix == "" {
   374  		return
   375  	}
   376  	if flags.nuloutsep {
   377  		fmt.Print("\x00")
   378  	} else {
   379  		fmt.Print(prefix)
   380  	}
   381  }
   382  
   383  func fatal(format string, a ...interface{}) {
   384  	fmt.Fprintf(os.Stderr, format+"\n", a...)
   385  	flag.Usage()
   386  	os.Exit(1)
   387  }