github.com/hugelgupf/u-root@v0.0.0-20191023214958-4807c632154c/cmds/exp/field/field.go (about) 1 // Copyright 2017-2018 the u-root Authors. All rights reserved 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // The `field` command reads newline-separated lines of data from either 6 // the standard input or the specified files. It splits those lines into 7 // a list of fields, separated by a specifiable regular expression. It 8 // then prints all or a subset of those fields to the standard output. 9 // 10 // The list of output fields is specified using a grammar given in the 11 // parsing code, below. 12 // 13 // Options '-F' and '-O' control the input and output separators, 14 // respectively. The NUL character can be used as an output separator if 15 // the '-0' is given. The '-e' and '-E' characters contol whether empty 16 // fields are collapsed in the input; '-e' unconditionally preserves such 17 // fields, '-E' discards them. If neither is specified, a heuristic is 18 // applied to guess: if the input specifier is more than one character in 19 // length, we discard empty fields, otherwise we preserve them. 20 package main 21 22 import ( 23 "bufio" 24 "flag" 25 "fmt" 26 "os" 27 "regexp" 28 "strconv" 29 "unicode" 30 "unicode/utf8" 31 ) 32 33 type frange struct { 34 begin int 35 end int 36 } 37 38 const lastField = 0x7FFFFFFF 39 const cmd = "field [ -E | -e ] [ -F regexp ] [ -0 | -O delimiter ] <field list> [file...]" 40 41 var ( 42 flags struct { 43 nuloutsep bool 44 preserveEmpty bool 45 discardEmpty bool 46 insep string 47 outsep string 48 } 49 ) 50 51 func init() { 52 defUsage := flag.Usage 53 flag.Usage = func() { 54 os.Args[0] = cmd 55 defUsage() 56 } 57 flag.BoolVar(&flags.nuloutsep, "0", false, "use the NUL character ('\\0') as output separator") 58 flag.BoolVar(&flags.preserveEmpty, "e", false, "preseve empty input fields") 59 flag.BoolVar(&flags.discardEmpty, "E", false, "discard empty input fields") 60 flag.StringVar(&flags.insep, "F", "[ \t\v\r]+", "Input separator characters (regular expression)") 61 flag.StringVar(&flags.outsep, "O", " ", "Output separater (string)") 62 } 63 64 func main() { 65 flag.Parse() 66 67 fstate := make(map[string]bool) 68 flag.Visit(func(f *flag.Flag) { fstate[f.Name] = true }) 69 if fstate["e"] && fstate["E"] { 70 fatal("flag conflict: -e and -E are mutually exclusive") 71 } 72 if fstate["0"] && fstate["O"] { 73 fatal("flag conflict: -O and -0 are mutually exclusive") 74 } 75 76 collapse := shouldcollapse(flags.insep) 77 delim, err := regexp.Compile(flags.insep) 78 if err != nil { 79 fatal("Delimiter regexp failed to parse: %v", err) 80 } 81 82 if flag.NArg() == 0 { 83 fatal("Range specifier missing") 84 } 85 rv := parseranges(flag.Arg(0)) 86 87 if flag.NArg() == 1 { 88 process(os.Stdin, rv, delim, flags.outsep, collapse) 89 return 90 } 91 for i := 1; i < flag.NArg(); i++ { 92 filename := flag.Arg(i) 93 if filename == "-" { 94 process(os.Stdin, rv, delim, flags.outsep, collapse) 95 continue 96 } 97 file, err := os.Open(filename) 98 if err != nil { 99 fmt.Fprintf(os.Stderr, "Cannot open file %q: %v\n", filename, err) 100 continue 101 } 102 process(file, rv, delim, flags.outsep, collapse) 103 file.Close() 104 } 105 } 106 107 func shouldcollapse(s string) bool { 108 if flags.preserveEmpty { 109 return false 110 } 111 if flags.discardEmpty { 112 return true 113 } 114 l := utf8.RuneCountInString(s) 115 r, _ := utf8.DecodeRuneInString(s) 116 return l > 1 && (l != 2 || r != '\\') 117 } 118 119 // The field selection syntax is: 120 // 121 // ranges := range [[delim] range] 122 // range := field | NUM '-' [field] 123 // field := NUM | NF 124 // delim := ws+ | '|' | ',' 125 // ws := c such that `isspace(c)` is true. 126 // NF := 'NF' | 'N' 127 // (Numbers can be negative) 128 129 func parseranges(input string) []frange { 130 var rs []frange 131 lex := &lexer{input: input} 132 if input == "" { 133 fatal("Empty field range") 134 } 135 lex.next() 136 for { 137 if lex.peektype() == tokSpace { 138 lex.next() 139 } 140 r := parserange(lex) 141 rs = append(rs, r) 142 typ := lex.peektype() 143 if typ == tokEOF { 144 break 145 } 146 if !isdelim(typ) { 147 fatal("Syntax error in field list, tok = %s", lex.peektok()) 148 } 149 lex.next() 150 } 151 return rs 152 } 153 154 func parserange(lex *lexer) frange { 155 r := frange{begin: lastField, end: lastField} 156 if lex.peektype() == tokEOF { 157 fatal("EOF at start of range") 158 } 159 fnum, typ := parsefield(lex) 160 r.begin = fnum 161 r.end = fnum 162 if typ == tokNF { 163 return r 164 } 165 typ = lex.peektype() 166 if typ != tokDash { 167 return r 168 } 169 lex.next() 170 r.end = lastField 171 typ = lex.peektype() 172 if typ != tokEOF && !isdelim(typ) { 173 r.end, _ = parsefield(lex) 174 } 175 return r 176 } 177 178 func parsefield(lex *lexer) (int, toktype) { 179 typ := lex.peektype() 180 if typ == tokNF { 181 lex.next() 182 return lastField, tokNF 183 } 184 return parsenum(lex), tokNum 185 } 186 187 func parsenum(lex *lexer) int { 188 tok, typ := lex.next() 189 if typ == tokEOF { 190 fatal("EOF in number parser") 191 } 192 if typ == tokNum { 193 num, _ := strconv.Atoi(tok) 194 return num 195 } 196 if typ != tokDash { 197 fatal("number parser error: unexpected token '%v'", tok) 198 } 199 tok, typ = lex.next() 200 if typ == tokEOF { 201 fatal("negative number parse error: unexpected EOF") 202 } 203 if typ != tokNum { 204 fatal("number parser error: bad lexical token '%v'", tok) 205 } 206 num, _ := strconv.Atoi(tok) 207 return -num 208 } 209 210 func isdelim(typ toktype) bool { 211 return typ == tokComma || typ == tokPipe || typ == tokSpace 212 } 213 214 type toktype int 215 216 const ( 217 tokError toktype = iota 218 tokEOF 219 tokComma 220 tokPipe 221 tokDash 222 tokNum 223 tokSpace 224 tokNF 225 226 eof = -1 227 ) 228 229 type lexer struct { 230 input string 231 tok string 232 typ toktype 233 start int 234 pos int 235 width int 236 } 237 238 func (lex *lexer) peek() (string, toktype) { 239 return lex.tok, lex.typ 240 } 241 242 func (lex *lexer) peektype() toktype { 243 return lex.typ 244 } 245 246 func (lex *lexer) peektok() string { 247 return lex.tok 248 } 249 250 func (lex *lexer) next() (string, toktype) { 251 tok, typ := lex.peek() 252 lex.tok, lex.typ = lex.scan() 253 return tok, typ 254 } 255 256 func (lex *lexer) scan() (string, toktype) { 257 switch r := lex.nextrune(); { 258 case r == eof: 259 return "", tokEOF 260 case r == ',': 261 return lex.token(), tokComma 262 case r == '|': 263 return lex.token(), tokPipe 264 case r == '-': 265 return lex.token(), tokDash 266 case r == 'N': 267 lex.consume() 268 r = lex.nextrune() 269 if r == 'F' { 270 lex.consume() 271 } 272 lex.ignore() 273 return lex.token(), tokNF 274 case unicode.IsDigit(r): 275 for r := lex.nextrune(); unicode.IsDigit(r); r = lex.nextrune() { 276 lex.consume() 277 } 278 lex.ignore() 279 return lex.token(), tokNum 280 case unicode.IsSpace(r): 281 for r := lex.nextrune(); unicode.IsSpace(r); r = lex.nextrune() { 282 lex.consume() 283 } 284 lex.ignore() 285 return lex.token(), tokSpace 286 default: 287 fatal("Lexical error at character '%v'", r) 288 } 289 return "", tokError 290 } 291 292 func (lex *lexer) nextrune() (r rune) { 293 if lex.pos >= len(lex.input) { 294 lex.width = 0 295 return eof 296 } 297 r, lex.width = utf8.DecodeRuneInString(lex.input[lex.pos:]) 298 return r 299 } 300 301 func (lex *lexer) consume() { 302 lex.pos += lex.width 303 lex.width = 0 304 } 305 306 func (lex *lexer) ignore() { 307 lex.width = 0 308 } 309 310 func (lex *lexer) token() string { 311 lex.consume() 312 tok := lex.input[lex.start:lex.pos] 313 lex.start = lex.pos 314 return tok 315 } 316 317 func process(file *os.File, rv []frange, delim *regexp.Regexp, outsep string, collapse bool) { 318 scanner := bufio.NewScanner(file) 319 for scanner.Scan() { 320 prefix := "" 321 printed := false 322 line := scanner.Text() 323 fields := split(line, delim, collapse) 324 for _, r := range rv { 325 begin, end := r.begin, r.end 326 switch { 327 case begin == 0: 328 pprefix(prefix) 329 prefix = outsep 330 fmt.Print(line) 331 printed = true 332 case begin == lastField: 333 begin = len(fields) - 1 334 case begin < 0: 335 begin += len(fields) 336 default: 337 begin-- 338 } 339 if end < 0 { 340 end += len(fields) + 1 341 } 342 if begin < 0 || end < 0 || end < begin || len(fields) < begin { 343 continue 344 } 345 for i := begin; i < end && i < len(fields); i++ { 346 pprefix(prefix) 347 prefix = outsep 348 fmt.Print(fields[i]) 349 printed = true 350 } 351 } 352 if printed || !collapse { 353 fmt.Println() 354 } 355 } 356 err := scanner.Err() 357 if err != nil { 358 fmt.Fprintln(os.Stderr, err) 359 } 360 } 361 362 func split(s string, delim *regexp.Regexp, collapse bool) []string { 363 sv := delim.Split(s, -1) 364 if !collapse { 365 return sv 366 } 367 rv := []string{} 368 for _, s := range sv { 369 if s != "" { 370 rv = append(rv, s) 371 } 372 } 373 return rv 374 } 375 376 func pprefix(prefix string) { 377 if prefix == "" { 378 return 379 } 380 if flags.nuloutsep { 381 fmt.Print("\x00") 382 } else { 383 fmt.Print(prefix) 384 } 385 } 386 387 func fatal(format string, a ...interface{}) { 388 fmt.Fprintf(os.Stderr, format+"\n", a...) 389 flag.Usage() 390 os.Exit(1) 391 }