modernc.org/ccgo/v3@v3.16.14/lib/testdata/tcc-0.9.27/tests/tests2/46_grep.c (about) 1 /* 2 * The information in this document is subject to change 3 * without notice and should not be construed as a commitment 4 * by Digital Equipment Corporation or by DECUS. 5 * 6 * Neither Digital Equipment Corporation, DECUS, nor the authors 7 * assume any responsibility for the use or reliability of this 8 * document or the described software. 9 * 10 * Copyright (C) 1980, DECUS 11 * 12 * General permission to copy or modify, but not for profit, is 13 * hereby granted, provided that the above copyright notice is 14 * included and reference made to the fact that reproduction 15 * privileges were granted by DECUS. 16 */ 17 #include <stdio.h> 18 #include <stdlib.h> 19 #include <ctype.h> // tolower() 20 21 /* 22 * grep 23 * 24 * Runs on the Decus compiler or on vms, On vms, define as: 25 * grep :== "$disk:[account]grep" (native) 26 * grep :== "$disk:[account]grep grep" (Decus) 27 * See below for more information. 28 */ 29 30 char *documentation[] = { 31 "grep searches a file for a given pattern. Execute by", 32 " grep [flags] regular_expression file_list\n", 33 "Flags are single characters preceded by '-':", 34 " -c Only a count of matching lines is printed", 35 " -f Print file name for matching lines switch, see below", 36 " -n Each line is preceded by its line number", 37 " -v Only print non-matching lines\n", 38 "The file_list is a list of files (wildcards are acceptable on RSX modes).", 39 "\nThe file name is normally printed if there is a file given.", 40 "The -f flag reverses this action (print name no file, not if more).\n", 41 0 }; 42 43 char *patdoc[] = { 44 "The regular_expression defines the pattern to search for. Upper- and", 45 "lower-case are always ignored. Blank lines never match. The expression", 46 "should be quoted to prevent file-name translation.", 47 "x An ordinary character (not mentioned below) matches that character.", 48 "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.", 49 "'^' A circumflex at the beginning of an expression matches the", 50 " beginning of a line.", 51 "'$' A dollar-sign at the end of an expression matches the end of a line.", 52 "'.' A period matches any character except \"new-line\".", 53 "':a' A colon matches a class of characters described by the following", 54 "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,", 55 "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and", 56 "': ' other control characters, such as new-line.", 57 "'*' An expression followed by an asterisk matches zero or more", 58 " occurrences of that expression: \"fo*\" matches \"f\", \"fo\"", 59 " \"foo\", etc.", 60 "'+' An expression followed by a plus sign matches one or more", 61 " occurrences of that expression: \"fo+\" matches \"fo\", etc.", 62 "'-' An expression followed by a minus sign optionally matches", 63 " the expression.", 64 "'[]' A string enclosed in square brackets matches any character in", 65 " that string, but no others. If the first character in the", 66 " string is a circumflex, the expression matches any character", 67 " except \"new-line\" and the characters in the string. For", 68 " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"", 69 " matches \"abc\" but not \"axb\". A range of characters may be", 70 " specified by two characters separated by \"-\". Note that,", 71 " [a-z] matches alphabetics, while [z-a] never matches.", 72 "The concatenation of regular expressions is a regular expression.", 73 0}; 74 75 #define LMAX 512 76 #define PMAX 256 77 78 #define CHAR 1 79 #define BOL 2 80 #define EOL 3 81 #define ANY 4 82 #define CLASS 5 83 #define NCLASS 6 84 #define STAR 7 85 #define PLUS 8 86 #define MINUS 9 87 #define ALPHA 10 88 #define DIGIT 11 89 #define NALPHA 12 90 #define PUNCT 13 91 #define RANGE 14 92 #define ENDPAT 15 93 94 int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0; 95 96 char *pp, lbuf[LMAX], pbuf[PMAX]; 97 98 char *cclass(); 99 char *pmatch(); 100 void store(int); 101 void error(char *); 102 void badpat(char *, char *, char *); 103 int match(void); 104 105 106 /*** Display a file name *******************************/ 107 void file(char *s) 108 { 109 printf("File %s:\n", s); 110 } 111 112 /*** Report unopenable file ****************************/ 113 void cant(char *s) 114 { 115 fprintf(stderr, "%s: cannot open\n", s); 116 } 117 118 /*** Give good help ************************************/ 119 void help(char **hp) 120 { 121 char **dp; 122 123 for (dp = hp; *dp; ++dp) 124 printf("%s\n", *dp); 125 } 126 127 /*** Display usage summary *****************************/ 128 void usage(char *s) 129 { 130 fprintf(stderr, "?GREP-E-%s\n", s); 131 fprintf(stderr, 132 "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n"); 133 exit(1); 134 } 135 136 /*** Compile the pattern into global pbuf[] ************/ 137 void compile(char *source) 138 { 139 char *s; /* Source string pointer */ 140 char *lp; /* Last pattern pointer */ 141 int c; /* Current character */ 142 int o; /* Temp */ 143 char *spp; /* Save beginning of pattern */ 144 145 s = source; 146 if (debug) 147 printf("Pattern = \"%s\"\n", s); 148 pp = pbuf; 149 while (c = *s++) { 150 /* 151 * STAR, PLUS and MINUS are special. 152 */ 153 if (c == '*' || c == '+' || c == '-') { 154 if (pp == pbuf || 155 (o=pp[-1]) == BOL || 156 o == EOL || 157 o == STAR || 158 o == PLUS || 159 o == MINUS) 160 badpat("Illegal occurrence op.", source, s); 161 store(ENDPAT); 162 store(ENDPAT); 163 spp = pp; /* Save pattern end */ 164 while (--pp > lp) /* Move pattern down */ 165 *pp = pp[-1]; /* one byte */ 166 *pp = (c == '*') ? STAR : 167 (c == '-') ? MINUS : PLUS; 168 pp = spp; /* Restore pattern end */ 169 continue; 170 } 171 /* 172 * All the rest. 173 */ 174 lp = pp; /* Remember start */ 175 switch(c) { 176 177 case '^': 178 store(BOL); 179 break; 180 181 case '$': 182 store(EOL); 183 break; 184 185 case '.': 186 store(ANY); 187 break; 188 189 case '[': 190 s = cclass(source, s); 191 break; 192 193 case ':': 194 if (*s) { 195 switch(tolower(c = *s++)) { 196 197 case 'a': 198 case 'A': 199 store(ALPHA); 200 break; 201 202 case 'd': 203 case 'D': 204 store(DIGIT); 205 break; 206 207 case 'n': 208 case 'N': 209 store(NALPHA); 210 break; 211 212 case ' ': 213 store(PUNCT); 214 break; 215 216 default: 217 badpat("Unknown : type", source, s); 218 219 } 220 break; 221 } 222 else badpat("No : type", source, s); 223 224 case '\\': 225 if (*s) 226 c = *s++; 227 228 default: 229 store(CHAR); 230 store(tolower(c)); 231 } 232 } 233 store(ENDPAT); 234 store(0); /* Terminate string */ 235 if (debug) { 236 for (lp = pbuf; lp < pp;) { 237 if ((c = (*lp++ & 0377)) < ' ') 238 printf("\\%o ", c); 239 else printf("%c ", c); 240 } 241 printf("\n"); 242 } 243 } 244 245 /*** Compile a class (within []) ***********************/ 246 char *cclass(char *source, char *src) 247 /* char *source; // Pattern start -- for error msg. */ 248 /* char *src; // Class start */ 249 { 250 char *s; /* Source pointer */ 251 char *cp; /* Pattern start */ 252 int c; /* Current character */ 253 int o; /* Temp */ 254 255 s = src; 256 o = CLASS; 257 if (*s == '^') { 258 ++s; 259 o = NCLASS; 260 } 261 store(o); 262 cp = pp; 263 store(0); /* Byte count */ 264 while ((c = *s++) && c!=']') { 265 if (c == '\\') { /* Store quoted char */ 266 if ((c = *s++) == '\0') /* Gotta get something */ 267 badpat("Class terminates badly", source, s); 268 else store(tolower(c)); 269 } 270 else if (c == '-' && 271 (pp - cp) > 1 && *s != ']' && *s != '\0') { 272 c = pp[-1]; /* Range start */ 273 pp[-1] = RANGE; /* Range signal */ 274 store(c); /* Re-store start */ 275 c = *s++; /* Get end char and*/ 276 store(tolower(c)); /* Store it */ 277 } 278 else { 279 store(tolower(c)); /* Store normal char */ 280 } 281 } 282 if (c != ']') 283 badpat("Unterminated class", source, s); 284 if ((c = (pp - cp)) >= 256) 285 badpat("Class too large", source, s); 286 if (c == 0) 287 badpat("Empty class", source, s); 288 *cp = c; 289 return(s); 290 } 291 292 /*** Store an entry in the pattern buffer **************/ 293 void store(int op) 294 { 295 if (pp >= &pbuf[PMAX]) 296 error("Pattern too complex\n"); 297 *pp++ = op; 298 } 299 300 /*** Report a bad pattern specification ****************/ 301 void badpat(char *message, char *source, char *stop) 302 /* char *message; // Error message */ 303 /* char *source; // Pattern start */ 304 /* char *stop; // Pattern end */ 305 { 306 fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source); 307 fprintf(stderr, "-GREP-E-Stopped at byte %ld, '%c'\n", 308 stop-source, stop[-1]); 309 error("?GREP-E-Bad pattern\n"); 310 } 311 312 /*** Scan the file for the pattern in pbuf[] ***********/ 313 void grep(FILE *fp, char *fn) 314 /* FILE *fp; // File to process */ 315 /* char *fn; // File name (for -f option) */ 316 { 317 int lno, count, m; 318 319 lno = 0; 320 count = 0; 321 while (fgets(lbuf, LMAX, fp)) { 322 ++lno; 323 m = match(); 324 if ((m && !vflag) || (!m && vflag)) { 325 ++count; 326 if (!cflag) { 327 if (fflag && fn) { 328 file(fn); 329 fn = 0; 330 } 331 if (nflag) 332 printf("%d\t", lno); 333 printf("%s\n", lbuf); 334 } 335 } 336 } 337 if (cflag) { 338 if (fflag && fn) 339 file(fn); 340 printf("%d\n", count); 341 } 342 } 343 344 /*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/ 345 int match() 346 { 347 char *l; /* Line pointer */ 348 349 for (l = lbuf; *l; ++l) { 350 if (pmatch(l, pbuf)) 351 return(1); 352 } 353 return(0); 354 } 355 356 /*** Match partial line with pattern *******************/ 357 char *pmatch(char *line, char *pattern) 358 /* char *line; // (partial) line to match */ 359 /* char *pattern; // (partial) pattern to match */ 360 { 361 char *l; /* Current line pointer */ 362 char *p; /* Current pattern pointer */ 363 char c; /* Current character */ 364 char *e; /* End for STAR and PLUS match */ 365 int op; /* Pattern operation */ 366 int n; /* Class counter */ 367 char *are; /* Start of STAR match */ 368 369 l = line; 370 if (debug > 1) 371 printf("pmatch(\"%s\")\n", line); 372 p = pattern; 373 while ((op = *p++) != ENDPAT) { 374 if (debug > 1) 375 printf("byte[%ld] = 0%o, '%c', op = 0%o\n", 376 l-line, *l, *l, op); 377 switch(op) { 378 379 case CHAR: 380 if (tolower(*l++) != *p++) 381 return(0); 382 break; 383 384 case BOL: 385 if (l != lbuf) 386 return(0); 387 break; 388 389 case EOL: 390 if (*l != '\0') 391 return(0); 392 break; 393 394 case ANY: 395 if (*l++ == '\0') 396 return(0); 397 break; 398 399 case DIGIT: 400 if ((c = *l++) < '0' || (c > '9')) 401 return(0); 402 break; 403 404 case ALPHA: 405 c = tolower(*l++); 406 if (c < 'a' || c > 'z') 407 return(0); 408 break; 409 410 case NALPHA: 411 c = tolower(*l++); 412 if (c >= 'a' && c <= 'z') 413 break; 414 else if (c < '0' || c > '9') 415 return(0); 416 break; 417 418 case PUNCT: 419 c = *l++; 420 if (c == 0 || c > ' ') 421 return(0); 422 break; 423 424 case CLASS: 425 case NCLASS: 426 c = tolower(*l++); 427 n = *p++ & 0377; 428 do { 429 if (*p == RANGE) { 430 p += 3; 431 n -= 2; 432 if (c >= p[-2] && c <= p[-1]) 433 break; 434 } 435 else if (c == *p++) 436 break; 437 } while (--n > 1); 438 if ((op == CLASS) == (n <= 1)) 439 return(0); 440 if (op == CLASS) 441 p += n - 2; 442 break; 443 444 case MINUS: 445 e = pmatch(l, p); /* Look for a match */ 446 while (*p++ != ENDPAT); /* Skip over pattern */ 447 if (e) /* Got a match? */ 448 l = e; /* Yes, update string */ 449 break; /* Always succeeds */ 450 451 case PLUS: /* One or more ... */ 452 if ((l = pmatch(l, p)) == 0) 453 return(0); /* Gotta have a match */ 454 case STAR: /* Zero or more ... */ 455 are = l; /* Remember line start */ 456 while (*l && (e = pmatch(l, p))) 457 l = e; /* Get longest match */ 458 while (*p++ != ENDPAT); /* Skip over pattern */ 459 while (l >= are) { /* Try to match rest */ 460 if (e = pmatch(l, p)) 461 return(e); 462 --l; /* Nope, try earlier */ 463 } 464 return(0); /* Nothing else worked */ 465 466 default: 467 printf("Bad op code %d\n", op); 468 error("Cannot happen -- match\n"); 469 } 470 } 471 return(l); 472 } 473 474 /*** Report an error ***********************************/ 475 void error(char *s) 476 { 477 fprintf(stderr, "%s", s); 478 exit(1); 479 } 480 481 /*** Main program - parse arguments & grep *************/ 482 int main(int argc, char **argv) 483 { 484 char *p; 485 int c, i; 486 int gotpattern; 487 488 FILE *f; 489 490 if (argc <= 1) 491 usage("No arguments"); 492 if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) { 493 help(documentation); 494 help(patdoc); 495 return 0; 496 } 497 nfile = argc-1; 498 gotpattern = 0; 499 for (i=1; i < argc; ++i) { 500 p = argv[i]; 501 if (*p == '-') { 502 ++p; 503 while (c = *p++) { 504 switch(tolower(c)) { 505 506 case '?': 507 help(documentation); 508 break; 509 510 case 'C': 511 case 'c': 512 ++cflag; 513 break; 514 515 case 'D': 516 case 'd': 517 ++debug; 518 break; 519 520 case 'F': 521 case 'f': 522 ++fflag; 523 break; 524 525 case 'n': 526 case 'N': 527 ++nflag; 528 break; 529 530 case 'v': 531 case 'V': 532 ++vflag; 533 break; 534 535 default: 536 usage("Unknown flag"); 537 } 538 } 539 argv[i] = 0; 540 --nfile; 541 } else if (!gotpattern) { 542 compile(p); 543 argv[i] = 0; 544 ++gotpattern; 545 --nfile; 546 } 547 } 548 if (!gotpattern) 549 usage("No pattern"); 550 if (nfile == 0) 551 grep(stdin, 0); 552 else { 553 fflag = fflag ^ (nfile > 0); 554 for (i=1; i < argc; ++i) { 555 if (p = argv[i]) { 556 if ((f=fopen(p, "r")) == NULL) 557 cant(p); 558 else { 559 grep(f, p); 560 fclose(f); 561 } 562 } 563 } 564 } 565 return 0; 566 } 567 568 /* vim: set expandtab ts=4 sw=3 sts=3 tw=80 :*/