github.com/rck/u-root@v0.0.0-20180106144920-7eb602e381bb/cmds/field/field.go (about) 1 // The `field` command reads newline-separated lines of data from either 2 // the standard input or the specified files. It splits those lines into 3 // a list of fields, separated by a specifiable regular expression. It 4 // then prints all or a subset of those fields to the standard output. 5 // 6 // The list of output fields is specified using a grammar given in the 7 // parsing code, below. 8 // 9 // Options '-F' and '-O' control the input and output separators, 10 // respectively. The NUL character can be used as an output separator if 11 // the '-0' is given. The '-e' and '-E' characters contol whether empty 12 // fields are collapsed in the input; '-e' unconditionally preserves such 13 // fields, '-E' discards them. If neither is specified, a heuristic is 14 // applied to guess: if the input specifier is more than one character in 15 // length, we discard empty fields, otherwise we preserve them. 16 package main 17 18 import ( 19 "bufio" 20 "flag" 21 "fmt" 22 "os" 23 "regexp" 24 "strconv" 25 "unicode" 26 "unicode/utf8" 27 ) 28 29 type frange struct { 30 begin int 31 end int 32 } 33 34 const lastField = 0x7FFFFFFF 35 const cmd = "field [ -E | -e ] [ -F regexp ] [ -0 | -O delimiter ] <field list> [file...]" 36 37 var ( 38 flags struct { 39 nuloutsep bool 40 preserveEmpty bool 41 discardEmpty bool 42 insep string 43 outsep string 44 } 45 ) 46 47 func init() { 48 defUsage := flag.Usage 49 flag.Usage = func() { 50 os.Args[0] = cmd 51 defUsage() 52 } 53 flag.BoolVar(&flags.nuloutsep, "0", false, "use the NUL character ('\\0') as output separator") 54 flag.BoolVar(&flags.preserveEmpty, "e", false, "preseve empty input fields") 55 flag.BoolVar(&flags.discardEmpty, "E", false, "discard empty input fields") 56 flag.StringVar(&flags.insep, "F", "[ \t\v\r]+", "Input separator characters (regular expression)") 57 flag.StringVar(&flags.outsep, "O", " ", "Output separater (string)") 58 } 59 60 func main() { 61 flag.Parse() 62 63 fstate := make(map[string]bool) 64 flag.Visit(func(f *flag.Flag) { fstate[f.Name] = true }) 65 if fstate["e"] && fstate["E"] { 66 fatal("flag conflict: -e and -E are mutually exclusive") 67 } 68 if fstate["0"] && fstate["O"] { 69 fatal("flag conflict: -O and -0 are mutually exclusive") 70 } 71 72 collapse := shouldcollapse(flags.insep) 73 delim, err := regexp.Compile(flags.insep) 74 if err != nil { 75 fatal("Delimiter regexp failed to parse: %v", err) 76 } 77 78 if flag.NArg() == 0 { 79 fatal("Range specifier missing") 80 } 81 rv := parseranges(flag.Arg(0)) 82 83 if flag.NArg() == 1 { 84 process(os.Stdin, rv, delim, flags.outsep, collapse) 85 return 86 } 87 for i := 1; i < flag.NArg(); i++ { 88 filename := flag.Arg(i) 89 if filename == "-" { 90 process(os.Stdin, rv, delim, flags.outsep, collapse) 91 continue 92 } 93 file, err := os.Open(filename) 94 if err != nil { 95 fmt.Fprintf(os.Stderr, "Cannot open file %q: %v\n", filename, err) 96 continue 97 } 98 process(file, rv, delim, flags.outsep, collapse) 99 file.Close() 100 } 101 } 102 103 func shouldcollapse(s string) bool { 104 if flags.preserveEmpty { 105 return false 106 } 107 if flags.discardEmpty { 108 return true 109 } 110 l := utf8.RuneCountInString(s) 111 r, _ := utf8.DecodeRuneInString(s) 112 return l > 1 && (l != 2 || r != '\\') 113 } 114 115 // The field selection syntax is: 116 // 117 // ranges := range [[delim] range] 118 // range := field | NUM '-' [field] 119 // field := NUM | NF 120 // delim := ws+ | '|' | ',' 121 // ws := c such that `isspace(c)` is true. 122 // NF := 'NF' | 'N' 123 // (Numbers can be negative) 124 125 func parseranges(input string) []frange { 126 var rs []frange 127 lex := &lexer{input: input} 128 if input == "" { 129 fatal("Empty field range") 130 } 131 lex.next() 132 for { 133 if lex.peektype() == tokSpace { 134 lex.next() 135 } 136 r := parserange(lex) 137 rs = append(rs, r) 138 typ := lex.peektype() 139 if typ == tokEOF { 140 break 141 } 142 if !isdelim(typ) { 143 fatal("Syntax error in field list, tok = %s", lex.peektok()) 144 } 145 lex.next() 146 } 147 return rs 148 } 149 150 func parserange(lex *lexer) frange { 151 r := frange{begin: lastField, end: lastField} 152 if lex.peektype() == tokEOF { 153 fatal("EOF at start of range") 154 } 155 fnum, typ := parsefield(lex) 156 r.begin = fnum 157 r.end = fnum 158 if typ == tokNF { 159 return r 160 } 161 typ = lex.peektype() 162 if typ != tokDash { 163 return r 164 } 165 lex.next() 166 r.end = lastField 167 typ = lex.peektype() 168 if typ != tokEOF && !isdelim(typ) { 169 r.end, _ = parsefield(lex) 170 } 171 return r 172 } 173 174 func parsefield(lex *lexer) (int, toktype) { 175 typ := lex.peektype() 176 if typ == tokNF { 177 lex.next() 178 return lastField, tokNF 179 } 180 return parsenum(lex), tokNum 181 } 182 183 func parsenum(lex *lexer) int { 184 tok, typ := lex.next() 185 if typ == tokEOF { 186 fatal("EOF in number parser") 187 } 188 if typ == tokNum { 189 num, _ := strconv.Atoi(tok) 190 return num 191 } 192 if typ != tokDash { 193 fatal("number parser error: unexpected token '%v'", tok) 194 } 195 tok, typ = lex.next() 196 if typ == tokEOF { 197 fatal("negative number parse error: unexpected EOF") 198 } 199 if typ != tokNum { 200 fatal("number parser error: bad lexical token '%v'", tok) 201 } 202 num, _ := strconv.Atoi(tok) 203 return -num 204 } 205 206 func isdelim(typ toktype) bool { 207 return typ == tokComma || typ == tokPipe || typ == tokSpace 208 } 209 210 type toktype int 211 212 const ( 213 tokError toktype = iota 214 tokEOF 215 tokComma 216 tokPipe 217 tokDash 218 tokNum 219 tokSpace 220 tokNF 221 222 eof = -1 223 ) 224 225 type lexer struct { 226 input string 227 tok string 228 typ toktype 229 start int 230 pos int 231 width int 232 } 233 234 func (lex *lexer) peek() (string, toktype) { 235 return lex.tok, lex.typ 236 } 237 238 func (lex *lexer) peektype() toktype { 239 return lex.typ 240 } 241 242 func (lex *lexer) peektok() string { 243 return lex.tok 244 } 245 246 func (lex *lexer) next() (string, toktype) { 247 tok, typ := lex.peek() 248 lex.tok, lex.typ = lex.scan() 249 return tok, typ 250 } 251 252 func (lex *lexer) scan() (string, toktype) { 253 switch r := lex.nextrune(); { 254 case r == eof: 255 return "", tokEOF 256 case r == ',': 257 return lex.token(), tokComma 258 case r == '|': 259 return lex.token(), tokPipe 260 case r == '-': 261 return lex.token(), tokDash 262 case r == 'N': 263 lex.consume() 264 r = lex.nextrune() 265 if r == 'F' { 266 lex.consume() 267 } 268 lex.ignore() 269 return lex.token(), tokNF 270 case unicode.IsDigit(r): 271 for r := lex.nextrune(); unicode.IsDigit(r); r = lex.nextrune() { 272 lex.consume() 273 } 274 lex.ignore() 275 return lex.token(), tokNum 276 case unicode.IsSpace(r): 277 for r := lex.nextrune(); unicode.IsSpace(r); r = lex.nextrune() { 278 lex.consume() 279 } 280 lex.ignore() 281 return lex.token(), tokSpace 282 default: 283 fatal("Lexical error at character '%v'", r) 284 } 285 return "", tokError 286 } 287 288 func (lex *lexer) nextrune() (r rune) { 289 if lex.pos >= len(lex.input) { 290 lex.width = 0 291 return eof 292 } 293 r, lex.width = utf8.DecodeRuneInString(lex.input[lex.pos:]) 294 return r 295 } 296 297 func (lex *lexer) consume() { 298 lex.pos += lex.width 299 lex.width = 0 300 } 301 302 func (lex *lexer) ignore() { 303 lex.width = 0 304 } 305 306 func (lex *lexer) token() string { 307 lex.consume() 308 tok := lex.input[lex.start:lex.pos] 309 lex.start = lex.pos 310 return tok 311 } 312 313 func process(file *os.File, rv []frange, delim *regexp.Regexp, outsep string, collapse bool) { 314 scanner := bufio.NewScanner(file) 315 for scanner.Scan() { 316 prefix := "" 317 printed := false 318 line := scanner.Text() 319 fields := split(line, delim, collapse) 320 for _, r := range rv { 321 begin, end := r.begin, r.end 322 switch { 323 case begin == 0: 324 pprefix(prefix) 325 prefix = outsep 326 fmt.Print(line) 327 printed = true 328 case begin == lastField: 329 begin = len(fields) - 1 330 case begin < 0: 331 begin += len(fields) 332 default: 333 begin-- 334 } 335 if end < 0 { 336 end += len(fields) + 1 337 } 338 if begin < 0 || end < 0 || end < begin || len(fields) < begin { 339 continue 340 } 341 for i := begin; i < end && i < len(fields); i++ { 342 pprefix(prefix) 343 prefix = outsep 344 fmt.Print(fields[i]) 345 printed = true 346 } 347 } 348 if printed || !collapse { 349 fmt.Println() 350 } 351 } 352 err := scanner.Err() 353 if err != nil { 354 fmt.Fprintln(os.Stderr, err) 355 } 356 } 357 358 func split(s string, delim *regexp.Regexp, collapse bool) []string { 359 sv := delim.Split(s, -1) 360 if !collapse { 361 return sv 362 } 363 rv := []string{} 364 for _, s := range sv { 365 if s != "" { 366 rv = append(rv, s) 367 } 368 } 369 return rv 370 } 371 372 func pprefix(prefix string) { 373 if prefix == "" { 374 return 375 } 376 if flags.nuloutsep { 377 fmt.Print("\x00") 378 } else { 379 fmt.Print(prefix) 380 } 381 } 382 383 func fatal(format string, a ...interface{}) { 384 fmt.Fprintf(os.Stderr, format+"\n", a...) 385 flag.Usage() 386 os.Exit(1) 387 }