modernc.org/cc@v1.0.1/v2/testdata/tcc-0.9.26/tests/tests2/46_grep.c (about) 1 /* 2 * The information in this document is subject to change 3 * without notice and should not be construed as a commitment 4 * by Digital Equipment Corporation or by DECUS. 5 * 6 * Neither Digital Equipment Corporation, DECUS, nor the authors 7 * assume any responsibility for the use or reliability of this 8 * document or the described software. 9 * 10 * Copyright (C) 1980, DECUS 11 * 12 * General permission to copy or modify, but not for profit, is 13 * hereby granted, provided that the above copyright notice is 14 * included and reference made to the fact that reproduction 15 * privileges were granted by DECUS. 16 */ 17 #include <stdio.h> 18 19 /* 20 * grep 21 * 22 * Runs on the Decus compiler or on vms, On vms, define as: 23 * grep :== "$disk:[account]grep" (native) 24 * grep :== "$disk:[account]grep grep" (Decus) 25 * See below for more information. 26 */ 27 28 #if 0 29 char *documentation[] = { 30 "grep searches a file for a given pattern. Execute by", 31 " grep [flags] regular_expression file_list\n", 32 "Flags are single characters preceeded by '-':", 33 " -c Only a count of matching lines is printed", 34 " -f Print file name for matching lines switch, see below", 35 " -n Each line is preceeded by its line number", 36 " -v Only print non-matching lines\n", 37 "The file_list is a list of files (wildcards are acceptable on RSX modes).", 38 "\nThe file name is normally printed if there is a file given.", 39 "The -f flag reverses this action (print name no file, not if more).\n", 40 0 }; 41 42 char *patdoc[] = { 43 "The regular_expression defines the pattern to search for. Upper- and", 44 "lower-case are always ignored. Blank lines never match. The expression", 45 "should be quoted to prevent file-name translation.", 46 "x An ordinary character (not mentioned below) matches that character.", 47 "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.", 48 "'^' A circumflex at the beginning of an expression matches the", 49 " beginning of a line.", 50 "'$' A dollar-sign at the end of an expression matches the end of a line.", 51 "'.' A period matches any character except \"new-line\".", 52 "':a' A colon matches a class of characters described by the following", 53 "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,", 54 "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and", 55 "': ' other control characters, such as new-line.", 56 "'*' An expression followed by an asterisk matches zero or more", 57 " occurrances of that expression: \"fo*\" matches \"f\", \"fo\"", 58 " \"foo\", etc.", 59 "'+' An expression followed by a plus sign matches one or more", 60 " occurrances of that expression: \"fo+\" matches \"fo\", etc.", 61 "'-' An expression followed by a minus sign optionally matches", 62 " the expression.", 63 "'[]' A string enclosed in square brackets matches any character in", 64 " that string, but no others. If the first character in the", 65 " string is a circumflex, the expression matches any character", 66 " except \"new-line\" and the characters in the string. For", 67 " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"", 68 " matches \"abc\" but not \"axb\". A range of characters may be", 69 " specified by two characters separated by \"-\". Note that,", 70 " [a-z] matches alphabetics, while [z-a] never matches.", 71 "The concatenation of regular expressions is a regular expression.", 72 0}; 73 #endif 74 75 #define LMAX 512 76 #define PMAX 256 77 78 #define CHAR 1 79 #define BOL 2 80 #define EOL 3 81 #define ANY 4 82 #define CLASS 5 83 #define NCLASS 6 84 #define STAR 7 85 #define PLUS 8 86 #define MINUS 9 87 #define ALPHA 10 88 #define DIGIT 11 89 #define NALPHA 12 90 #define PUNCT 13 91 #define RANGE 14 92 #define ENDPAT 15 93 94 int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0; 95 96 char *pp, lbuf[LMAX], pbuf[PMAX]; 97 98 char *cclass(); 99 char *pmatch(); 100 101 102 /*** Display a file name *******************************/ 103 void file(char *s) 104 { 105 printf("File %s:\n", s); 106 } 107 108 /*** Report unopenable file ****************************/ 109 void cant(char *s) 110 { 111 fprintf(stderr, "%s: cannot open\n", s); 112 } 113 114 /*** Give good help ************************************/ 115 void help(char **hp) 116 { 117 char **dp; 118 119 for (dp = hp; *dp; ++dp) 120 printf("%s\n", *dp); 121 } 122 123 /*** Display usage summary *****************************/ 124 void usage(char *s) 125 { 126 fprintf(stderr, "?GREP-E-%s\n", s); 127 fprintf(stderr, 128 "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n"); 129 exit(1); 130 } 131 132 /*** Compile the pattern into global pbuf[] ************/ 133 void compile(char *source) 134 { 135 char *s; /* Source string pointer */ 136 char *lp; /* Last pattern pointer */ 137 int c; /* Current character */ 138 int o; /* Temp */ 139 char *spp; /* Save beginning of pattern */ 140 141 s = source; 142 if (debug) 143 printf("Pattern = \"%s\"\n", s); 144 pp = pbuf; 145 while (c = *s++) { 146 /* 147 * STAR, PLUS and MINUS are special. 148 */ 149 if (c == '*' || c == '+' || c == '-') { 150 if (pp == pbuf || 151 (o=pp[-1]) == BOL || 152 o == EOL || 153 o == STAR || 154 o == PLUS || 155 o == MINUS) 156 badpat("Illegal occurrance op.", source, s); 157 store(ENDPAT); 158 store(ENDPAT); 159 spp = pp; /* Save pattern end */ 160 while (--pp > lp) /* Move pattern down */ 161 *pp = pp[-1]; /* one byte */ 162 *pp = (c == '*') ? STAR : 163 (c == '-') ? MINUS : PLUS; 164 pp = spp; /* Restore pattern end */ 165 continue; 166 } 167 /* 168 * All the rest. 169 */ 170 lp = pp; /* Remember start */ 171 switch(c) { 172 173 case '^': 174 store(BOL); 175 break; 176 177 case '$': 178 store(EOL); 179 break; 180 181 case '.': 182 store(ANY); 183 break; 184 185 case '[': 186 s = cclass(source, s); 187 break; 188 189 case ':': 190 if (*s) { 191 switch(tolower(c = *s++)) { 192 193 case 'a': 194 case 'A': 195 store(ALPHA); 196 break; 197 198 case 'd': 199 case 'D': 200 store(DIGIT); 201 break; 202 203 case 'n': 204 case 'N': 205 store(NALPHA); 206 break; 207 208 case ' ': 209 store(PUNCT); 210 break; 211 212 default: 213 badpat("Unknown : type", source, s); 214 215 } 216 break; 217 } 218 else badpat("No : type", source, s); 219 220 case '\\': 221 if (*s) 222 c = *s++; 223 224 default: 225 store(CHAR); 226 store(tolower(c)); 227 } 228 } 229 store(ENDPAT); 230 store(0); /* Terminate string */ 231 if (debug) { 232 for (lp = pbuf; lp < pp;) { 233 if ((c = (*lp++ & 0377)) < ' ') 234 printf("\\%o ", c); 235 else printf("%c ", c); 236 } 237 printf("\n"); 238 } 239 } 240 241 /*** Compile a class (within []) ***********************/ 242 char *cclass(char *source, char *src) 243 /* char *source; // Pattern start -- for error msg. */ 244 /* char *src; // Class start */ 245 { 246 char *s; /* Source pointer */ 247 char *cp; /* Pattern start */ 248 int c; /* Current character */ 249 int o; /* Temp */ 250 251 s = src; 252 o = CLASS; 253 if (*s == '^') { 254 ++s; 255 o = NCLASS; 256 } 257 store(o); 258 cp = pp; 259 store(0); /* Byte count */ 260 while ((c = *s++) && c!=']') { 261 if (c == '\\') { /* Store quoted char */ 262 if ((c = *s++) == '\0') /* Gotta get something */ 263 badpat("Class terminates badly", source, s); 264 else store(tolower(c)); 265 } 266 else if (c == '-' && 267 (pp - cp) > 1 && *s != ']' && *s != '\0') { 268 c = pp[-1]; /* Range start */ 269 pp[-1] = RANGE; /* Range signal */ 270 store(c); /* Re-store start */ 271 c = *s++; /* Get end char and*/ 272 store(tolower(c)); /* Store it */ 273 } 274 else { 275 store(tolower(c)); /* Store normal char */ 276 } 277 } 278 if (c != ']') 279 badpat("Unterminated class", source, s); 280 if ((c = (pp - cp)) >= 256) 281 badpat("Class too large", source, s); 282 if (c == 0) 283 badpat("Empty class", source, s); 284 *cp = c; 285 return(s); 286 } 287 288 /*** Store an entry in the pattern buffer **************/ 289 void store(int op) 290 { 291 if (pp >= &pbuf[PMAX]) 292 error("Pattern too complex\n"); 293 *pp++ = op; 294 } 295 296 /*** Report a bad pattern specification ****************/ 297 void badpat(char *message, char *source, char *stop) 298 /* char *message; // Error message */ 299 /* char *source; // Pattern start */ 300 /* char *stop; // Pattern end */ 301 { 302 fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source); 303 fprintf(stderr, "-GREP-E-Stopped at byte %d, '%c'\n", 304 stop-source, stop[-1]); 305 error("?GREP-E-Bad pattern\n"); 306 } 307 308 /*** Scan the file for the pattern in pbuf[] ***********/ 309 void grep(FILE *fp, char *fn) 310 /* FILE *fp; // File to process */ 311 /* char *fn; // File name (for -f option) */ 312 { 313 int lno, count, m; 314 315 lno = 0; 316 count = 0; 317 while (fgets(lbuf, LMAX, fp)) { 318 ++lno; 319 m = match(); 320 if ((m && !vflag) || (!m && vflag)) { 321 ++count; 322 if (!cflag) { 323 if (fflag && fn) { 324 file(fn); 325 fn = 0; 326 } 327 if (nflag) 328 printf("%d\t", lno); 329 printf("%s\n", lbuf); 330 } 331 } 332 } 333 if (cflag) { 334 if (fflag && fn) 335 file(fn); 336 printf("%d\n", count); 337 } 338 } 339 340 /*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/ 341 void match() 342 { 343 char *l; /* Line pointer */ 344 345 for (l = lbuf; *l; ++l) { 346 if (pmatch(l, pbuf)) 347 return(1); 348 } 349 return(0); 350 } 351 352 /*** Match partial line with pattern *******************/ 353 char *pmatch(char *line, char *pattern) 354 /* char *line; // (partial) line to match */ 355 /* char *pattern; // (partial) pattern to match */ 356 { 357 char *l; /* Current line pointer */ 358 char *p; /* Current pattern pointer */ 359 char c; /* Current character */ 360 char *e; /* End for STAR and PLUS match */ 361 int op; /* Pattern operation */ 362 int n; /* Class counter */ 363 char *are; /* Start of STAR match */ 364 365 l = line; 366 if (debug > 1) 367 printf("pmatch(\"%s\")\n", line); 368 p = pattern; 369 while ((op = *p++) != ENDPAT) { 370 if (debug > 1) 371 printf("byte[%d] = 0%o, '%c', op = 0%o\n", 372 l-line, *l, *l, op); 373 switch(op) { 374 375 case CHAR: 376 if (tolower(*l++) != *p++) 377 return(0); 378 break; 379 380 case BOL: 381 if (l != lbuf) 382 return(0); 383 break; 384 385 case EOL: 386 if (*l != '\0') 387 return(0); 388 break; 389 390 case ANY: 391 if (*l++ == '\0') 392 return(0); 393 break; 394 395 case DIGIT: 396 if ((c = *l++) < '0' || (c > '9')) 397 return(0); 398 break; 399 400 case ALPHA: 401 c = tolower(*l++); 402 if (c < 'a' || c > 'z') 403 return(0); 404 break; 405 406 case NALPHA: 407 c = tolower(*l++); 408 if (c >= 'a' && c <= 'z') 409 break; 410 else if (c < '0' || c > '9') 411 return(0); 412 break; 413 414 case PUNCT: 415 c = *l++; 416 if (c == 0 || c > ' ') 417 return(0); 418 break; 419 420 case CLASS: 421 case NCLASS: 422 c = tolower(*l++); 423 n = *p++ & 0377; 424 do { 425 if (*p == RANGE) { 426 p += 3; 427 n -= 2; 428 if (c >= p[-2] && c <= p[-1]) 429 break; 430 } 431 else if (c == *p++) 432 break; 433 } while (--n > 1); 434 if ((op == CLASS) == (n <= 1)) 435 return(0); 436 if (op == CLASS) 437 p += n - 2; 438 break; 439 440 case MINUS: 441 e = pmatch(l, p); /* Look for a match */ 442 while (*p++ != ENDPAT); /* Skip over pattern */ 443 if (e) /* Got a match? */ 444 l = e; /* Yes, update string */ 445 break; /* Always succeeds */ 446 447 case PLUS: /* One or more ... */ 448 if ((l = pmatch(l, p)) == 0) 449 return(0); /* Gotta have a match */ 450 case STAR: /* Zero or more ... */ 451 are = l; /* Remember line start */ 452 while (*l && (e = pmatch(l, p))) 453 l = e; /* Get longest match */ 454 while (*p++ != ENDPAT); /* Skip over pattern */ 455 while (l >= are) { /* Try to match rest */ 456 if (e = pmatch(l, p)) 457 return(e); 458 --l; /* Nope, try earlier */ 459 } 460 return(0); /* Nothing else worked */ 461 462 default: 463 printf("Bad op code %d\n", op); 464 error("Cannot happen -- match\n"); 465 } 466 } 467 return(l); 468 } 469 470 /*** Report an error ***********************************/ 471 void error(char *s) 472 { 473 fprintf(stderr, "%s", s); 474 exit(1); 475 } 476 477 /*** Main program - parse arguments & grep *************/ 478 int main(int argc, char **argv) 479 { 480 char *p; 481 int c, i; 482 int gotpattern; 483 484 FILE *f; 485 486 if (argc <= 1) 487 usage("No arguments"); 488 if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) { 489 help(documentation); 490 help(patdoc); 491 return 0; 492 } 493 nfile = argc-1; 494 gotpattern = 0; 495 for (i=1; i < argc; ++i) { 496 p = argv[i]; 497 if (*p == '-') { 498 ++p; 499 while (c = *p++) { 500 switch(tolower(c)) { 501 502 case '?': 503 help(documentation); 504 break; 505 506 case 'C': 507 case 'c': 508 ++cflag; 509 break; 510 511 case 'D': 512 case 'd': 513 ++debug; 514 break; 515 516 case 'F': 517 case 'f': 518 ++fflag; 519 break; 520 521 case 'n': 522 case 'N': 523 ++nflag; 524 break; 525 526 case 'v': 527 case 'V': 528 ++vflag; 529 break; 530 531 default: 532 usage("Unknown flag"); 533 } 534 } 535 argv[i] = 0; 536 --nfile; 537 } else if (!gotpattern) { 538 compile(p); 539 argv[i] = 0; 540 ++gotpattern; 541 --nfile; 542 } 543 } 544 if (!gotpattern) 545 usage("No pattern"); 546 if (nfile == 0) 547 grep(stdin, 0); 548 else { 549 fflag = fflag ^ (nfile > 0); 550 for (i=1; i < argc; ++i) { 551 if (p = argv[i]) { 552 if ((f=fopen(p, "r")) == NULL) 553 cant(p); 554 else { 555 grep(f, p); 556 fclose(f); 557 } 558 } 559 } 560 } 561 return 0; 562 } 563 564 /* vim: set expandtab ts=4 sw=3 sts=3 tw=80 :*/