modernc.org/cc@v1.0.1/testdata/tcc-0.9.26/tests/tests2/46_grep.c (about) 1 /* 2 * The information in this document is subject to change 3 * without notice and should not be construed as a commitment 4 * by Digital Equipment Corporation or by DECUS. 5 * 6 * Neither Digital Equipment Corporation, DECUS, nor the authors 7 * assume any responsibility for the use or reliability of this 8 * document or the described software. 9 * 10 * Copyright (C) 1980, DECUS 11 * 12 * General permission to copy or modify, but not for profit, is 13 * hereby granted, provided that the above copyright notice is 14 * included and reference made to the fact that reproduction 15 * privileges were granted by DECUS. 16 */ 17 18 #include <ctype.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 22 int match(); 23 void badpat(char *message, char *source, char *stop); 24 void error(char *s); 25 void store(int op); 26 27 /* 28 * grep 29 * 30 * Runs on the Decus compiler or on vms, On vms, define as: 31 * grep :== "$disk:[account]grep" (native) 32 * grep :== "$disk:[account]grep grep" (Decus) 33 * See below for more information. 34 */ 35 36 char *documentation[] = { 37 "grep searches a file for a given pattern. Execute by", 38 " grep [flags] regular_expression file_list\n", 39 "Flags are single characters preceeded by '-':", 40 " -c Only a count of matching lines is printed", 41 " -f Print file name for matching lines switch, see below", 42 " -n Each line is preceeded by its line number", 43 " -v Only print non-matching lines\n", 44 "The file_list is a list of files (wildcards are acceptable on RSX modes).", 45 "\nThe file name is normally printed if there is a file given.", 46 "The -f flag reverses this action (print name no file, not if more).\n", 47 0 }; 48 49 char *patdoc[] = { 50 "The regular_expression defines the pattern to search for. Upper- and", 51 "lower-case are always ignored. Blank lines never match. The expression", 52 "should be quoted to prevent file-name translation.", 53 "x An ordinary character (not mentioned below) matches that character.", 54 "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.", 55 "'^' A circumflex at the beginning of an expression matches the", 56 " beginning of a line.", 57 "'$' A dollar-sign at the end of an expression matches the end of a line.", 58 "'.' A period matches any character except \"new-line\".", 59 "':a' A colon matches a class of characters described by the following", 60 "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,", 61 "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and", 62 "': ' other control characters, such as new-line.", 63 "'*' An expression followed by an asterisk matches zero or more", 64 " occurrances of that expression: \"fo*\" matches \"f\", \"fo\"", 65 " \"foo\", etc.", 66 "'+' An expression followed by a plus sign matches one or more", 67 " occurrances of that expression: \"fo+\" matches \"fo\", etc.", 68 "'-' An expression followed by a minus sign optionally matches", 69 " the expression.", 70 "'[]' A string enclosed in square brackets matches any character in", 71 " that string, but no others. If the first character in the", 72 " string is a circumflex, the expression matches any character", 73 " except \"new-line\" and the characters in the string. For", 74 " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"", 75 " matches \"abc\" but not \"axb\". A range of characters may be", 76 " specified by two characters separated by \"-\". Note that,", 77 " [a-z] matches alphabetics, while [z-a] never matches.", 78 "The concatenation of regular expressions is a regular expression.", 79 0}; 80 81 #define LMAX 512 82 #define PMAX 256 83 84 #define CHAR 1 85 #define BOL 2 86 #define EOL 3 87 #define ANY 4 88 #define CLASS 5 89 #define NCLASS 6 90 #define STAR 7 91 #define PLUS 8 92 #define MINUS 9 93 #define ALPHA 10 94 #define DIGIT 11 95 #define NALPHA 12 96 #define PUNCT 13 97 #define RANGE 14 98 #define ENDPAT 15 99 100 int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0; 101 102 char *pp, lbuf[LMAX], pbuf[PMAX]; 103 104 char *cclass(); 105 char *pmatch(); 106 107 108 /*** Display a file name *******************************/ 109 void file(char *s) 110 { 111 printf("File %s:\n", s); 112 } 113 114 /*** Report unopenable file ****************************/ 115 void cant(char *s) 116 { 117 fprintf(stderr, "%s: cannot open\n", s); 118 } 119 120 /*** Give good help ************************************/ 121 void help(char **hp) 122 { 123 char **dp; 124 125 for (dp = hp; *dp; ++dp) 126 printf("%s\n", *dp); 127 } 128 129 /*** Display usage summary *****************************/ 130 void usage(char *s) 131 { 132 fprintf(stderr, "?GREP-E-%s\n", s); 133 fprintf(stderr, 134 "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n"); 135 exit(1); 136 } 137 138 /*** Compile the pattern into global pbuf[] ************/ 139 void compile(char *source) 140 { 141 char *s; /* Source string pointer */ 142 char *lp; /* Last pattern pointer */ 143 int c; /* Current character */ 144 int o; /* Temp */ 145 char *spp; /* Save beginning of pattern */ 146 147 s = source; 148 if (debug) 149 printf("Pattern = \"%s\"\n", s); 150 pp = pbuf; 151 while (c = *s++) { 152 /* 153 * STAR, PLUS and MINUS are special. 154 */ 155 if (c == '*' || c == '+' || c == '-') { 156 if (pp == pbuf || 157 (o=pp[-1]) == BOL || 158 o == EOL || 159 o == STAR || 160 o == PLUS || 161 o == MINUS) 162 badpat("Illegal occurrance op.", source, s); 163 store(ENDPAT); 164 store(ENDPAT); 165 spp = pp; /* Save pattern end */ 166 while (--pp > lp) /* Move pattern down */ 167 *pp = pp[-1]; /* one byte */ 168 *pp = (c == '*') ? STAR : 169 (c == '-') ? MINUS : PLUS; 170 pp = spp; /* Restore pattern end */ 171 continue; 172 } 173 /* 174 * All the rest. 175 */ 176 lp = pp; /* Remember start */ 177 switch(c) { 178 179 case '^': 180 store(BOL); 181 break; 182 183 case '$': 184 store(EOL); 185 break; 186 187 case '.': 188 store(ANY); 189 break; 190 191 case '[': 192 s = cclass(source, s); 193 break; 194 195 case ':': 196 if (*s) { 197 switch(tolower(c = *s++)) { 198 199 case 'a': 200 case 'A': 201 store(ALPHA); 202 break; 203 204 case 'd': 205 case 'D': 206 store(DIGIT); 207 break; 208 209 case 'n': 210 case 'N': 211 store(NALPHA); 212 break; 213 214 case ' ': 215 store(PUNCT); 216 break; 217 218 default: 219 badpat("Unknown : type", source, s); 220 221 } 222 break; 223 } 224 else badpat("No : type", source, s); 225 226 case '\\': 227 if (*s) 228 c = *s++; 229 230 default: 231 store(CHAR); 232 store(tolower(c)); 233 } 234 } 235 store(ENDPAT); 236 store(0); /* Terminate string */ 237 if (debug) { 238 for (lp = pbuf; lp < pp;) { 239 if ((c = (*lp++ & 0377)) < ' ') 240 printf("\\%o ", c); 241 else printf("%c ", c); 242 } 243 printf("\n"); 244 } 245 } 246 247 /*** Compile a class (within []) ***********************/ 248 char *cclass(char *source, char *src) 249 /* char *source; // Pattern start -- for error msg. */ 250 /* char *src; // Class start */ 251 { 252 char *s; /* Source pointer */ 253 char *cp; /* Pattern start */ 254 int c; /* Current character */ 255 int o; /* Temp */ 256 257 s = src; 258 o = CLASS; 259 if (*s == '^') { 260 ++s; 261 o = NCLASS; 262 } 263 store(o); 264 cp = pp; 265 store(0); /* Byte count */ 266 while ((c = *s++) && c!=']') { 267 if (c == '\\') { /* Store quoted char */ 268 if ((c = *s++) == '\0') /* Gotta get something */ 269 badpat("Class terminates badly", source, s); 270 else store(tolower(c)); 271 } 272 else if (c == '-' && 273 (pp - cp) > 1 && *s != ']' && *s != '\0') { 274 c = pp[-1]; /* Range start */ 275 pp[-1] = RANGE; /* Range signal */ 276 store(c); /* Re-store start */ 277 c = *s++; /* Get end char and*/ 278 store(tolower(c)); /* Store it */ 279 } 280 else { 281 store(tolower(c)); /* Store normal char */ 282 } 283 } 284 if (c != ']') 285 badpat("Unterminated class", source, s); 286 if ((c = (pp - cp)) >= 256) 287 badpat("Class too large", source, s); 288 if (c == 0) 289 badpat("Empty class", source, s); 290 *cp = c; 291 return(s); 292 } 293 294 /*** Store an entry in the pattern buffer **************/ 295 void store(int op) 296 { 297 if (pp >= &pbuf[PMAX]) 298 error("Pattern too complex\n"); 299 *pp++ = op; 300 } 301 302 /*** Report a bad pattern specification ****************/ 303 void badpat(char *message, char *source, char *stop) 304 /* char *message; // Error message */ 305 /* char *source; // Pattern start */ 306 /* char *stop; // Pattern end */ 307 { 308 fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source); 309 fprintf(stderr, "-GREP-E-Stopped at byte %d, '%c'\n", 310 stop-source, stop[-1]); 311 error("?GREP-E-Bad pattern\n"); 312 } 313 314 /*** Scan the file for the pattern in pbuf[] ***********/ 315 void grep(FILE *fp, char *fn) 316 /* FILE *fp; // File to process */ 317 /* char *fn; // File name (for -f option) */ 318 { 319 int lno, count, m; 320 321 lno = 0; 322 count = 0; 323 while (fgets(lbuf, LMAX, fp)) { 324 ++lno; 325 m = match(); 326 if ((m && !vflag) || (!m && vflag)) { 327 ++count; 328 if (!cflag) { 329 if (fflag && fn) { 330 file(fn); 331 fn = 0; 332 } 333 if (nflag) 334 printf("%d\t", lno); 335 printf("%s\n", lbuf); 336 } 337 } 338 } 339 if (cflag) { 340 if (fflag && fn) 341 file(fn); 342 printf("%d\n", count); 343 } 344 } 345 346 /*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/ 347 int match() 348 { 349 char *l; /* Line pointer */ 350 351 for (l = lbuf; *l; ++l) { 352 if (pmatch(l, pbuf)) 353 return(1); 354 } 355 return(0); 356 } 357 358 /*** Match partial line with pattern *******************/ 359 char *pmatch(char *line, char *pattern) 360 /* char *line; // (partial) line to match */ 361 /* char *pattern; // (partial) pattern to match */ 362 { 363 char *l; /* Current line pointer */ 364 char *p; /* Current pattern pointer */ 365 char c; /* Current character */ 366 char *e; /* End for STAR and PLUS match */ 367 int op; /* Pattern operation */ 368 int n; /* Class counter */ 369 char *are; /* Start of STAR match */ 370 371 l = line; 372 if (debug > 1) 373 printf("pmatch(\"%s\")\n", line); 374 p = pattern; 375 while ((op = *p++) != ENDPAT) { 376 if (debug > 1) 377 printf("byte[%d] = 0%o, '%c', op = 0%o\n", 378 l-line, *l, *l, op); 379 switch(op) { 380 381 case CHAR: 382 if (tolower(*l++) != *p++) 383 return(0); 384 break; 385 386 case BOL: 387 if (l != lbuf) 388 return(0); 389 break; 390 391 case EOL: 392 if (*l != '\0') 393 return(0); 394 break; 395 396 case ANY: 397 if (*l++ == '\0') 398 return(0); 399 break; 400 401 case DIGIT: 402 if ((c = *l++) < '0' || (c > '9')) 403 return(0); 404 break; 405 406 case ALPHA: 407 c = tolower(*l++); 408 if (c < 'a' || c > 'z') 409 return(0); 410 break; 411 412 case NALPHA: 413 c = tolower(*l++); 414 if (c >= 'a' && c <= 'z') 415 break; 416 else if (c < '0' || c > '9') 417 return(0); 418 break; 419 420 case PUNCT: 421 c = *l++; 422 if (c == 0 || c > ' ') 423 return(0); 424 break; 425 426 case CLASS: 427 case NCLASS: 428 c = tolower(*l++); 429 n = *p++ & 0377; 430 do { 431 if (*p == RANGE) { 432 p += 3; 433 n -= 2; 434 if (c >= p[-2] && c <= p[-1]) 435 break; 436 } 437 else if (c == *p++) 438 break; 439 } while (--n > 1); 440 if ((op == CLASS) == (n <= 1)) 441 return(0); 442 if (op == CLASS) 443 p += n - 2; 444 break; 445 446 case MINUS: 447 e = pmatch(l, p); /* Look for a match */ 448 while (*p++ != ENDPAT); /* Skip over pattern */ 449 if (e) /* Got a match? */ 450 l = e; /* Yes, update string */ 451 break; /* Always succeeds */ 452 453 case PLUS: /* One or more ... */ 454 if ((l = pmatch(l, p)) == 0) 455 return(0); /* Gotta have a match */ 456 case STAR: /* Zero or more ... */ 457 are = l; /* Remember line start */ 458 while (*l && (e = pmatch(l, p))) 459 l = e; /* Get longest match */ 460 while (*p++ != ENDPAT); /* Skip over pattern */ 461 while (l >= are) { /* Try to match rest */ 462 if (e = pmatch(l, p)) 463 return(e); 464 --l; /* Nope, try earlier */ 465 } 466 return(0); /* Nothing else worked */ 467 468 default: 469 printf("Bad op code %d\n", op); 470 error("Cannot happen -- match\n"); 471 } 472 } 473 return(l); 474 } 475 476 /*** Report an error ***********************************/ 477 void error(char *s) 478 { 479 fprintf(stderr, "%s", s); 480 exit(1); 481 } 482 483 /*** Main program - parse arguments & grep *************/ 484 int main(int argc, char **argv) 485 { 486 char *p; 487 int c, i; 488 int gotpattern; 489 490 FILE *f; 491 492 if (argc <= 1) 493 usage("No arguments"); 494 if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) { 495 help(documentation); 496 help(patdoc); 497 return 0; 498 } 499 nfile = argc-1; 500 gotpattern = 0; 501 for (i=1; i < argc; ++i) { 502 p = argv[i]; 503 if (*p == '-') { 504 ++p; 505 while (c = *p++) { 506 switch(tolower(c)) { 507 508 case '?': 509 help(documentation); 510 break; 511 512 case 'C': 513 case 'c': 514 ++cflag; 515 break; 516 517 case 'D': 518 case 'd': 519 ++debug; 520 break; 521 522 case 'F': 523 case 'f': 524 ++fflag; 525 break; 526 527 case 'n': 528 case 'N': 529 ++nflag; 530 break; 531 532 case 'v': 533 case 'V': 534 ++vflag; 535 break; 536 537 default: 538 usage("Unknown flag"); 539 } 540 } 541 argv[i] = 0; 542 --nfile; 543 } else if (!gotpattern) { 544 compile(p); 545 argv[i] = 0; 546 ++gotpattern; 547 --nfile; 548 } 549 } 550 if (!gotpattern) 551 usage("No pattern"); 552 if (nfile == 0) 553 grep(stdin, 0); 554 else { 555 fflag = fflag ^ (nfile > 0); 556 for (i=1; i < argc; ++i) { 557 if (p = argv[i]) { 558 if ((f=fopen(p, "r")) == NULL) 559 cant(p); 560 else { 561 grep(f, p); 562 fclose(f); 563 } 564 } 565 } 566 } 567 return 0; 568 } 569 570 /* vim: set expandtab ts=4 sw=3 sts=3 tw=80 :*/