modernc.org/cc@v1.0.1/testdata/tcc-0.9.26/tests/tests2/46_grep.c (about)

     1  /*
     2   * The  information  in  this  document  is  subject  to  change
     3   * without  notice  and  should not be construed as a commitment
     4   * by Digital Equipment Corporation or by DECUS.
     5   *
     6   * Neither Digital Equipment Corporation, DECUS, nor the authors
     7   * assume any responsibility for the use or reliability of  this
     8   * document or the described software.
     9   *
    10   *      Copyright (C) 1980, DECUS
    11   *
    12   * General permission to copy or modify, but not for profit,  is
    13   * hereby  granted,  provided that the above copyright notice is
    14   * included and reference made to  the  fact  that  reproduction
    15   * privileges were granted by DECUS.
    16   */
    17  
    18  #include <ctype.h>
    19  #include <stdio.h>
    20  #include <stdlib.h>
    21  
    22  int match();
    23  void badpat(char *message, char *source, char *stop);
    24  void error(char *s);
    25  void store(int op);
    26  
    27  /*
    28   * grep
    29   *
    30   * Runs on the Decus compiler or on vms, On vms, define as:
    31   *      grep :== "$disk:[account]grep"      (native)
    32   *      grep :== "$disk:[account]grep grep" (Decus)
    33   * See below for more information.
    34   */
    35  
    36  char    *documentation[] = {
    37     "grep searches a file for a given pattern.  Execute by",
    38     "   grep [flags] regular_expression file_list\n",
    39     "Flags are single characters preceeded by '-':",
    40     "   -c      Only a count of matching lines is printed",
    41     "   -f      Print file name for matching lines switch, see below",
    42     "   -n      Each line is preceeded by its line number",
    43     "   -v      Only print non-matching lines\n",
    44     "The file_list is a list of files (wildcards are acceptable on RSX modes).",
    45     "\nThe file name is normally printed if there is a file given.",
    46     "The -f flag reverses this action (print name no file, not if more).\n",
    47     0 };
    48  
    49  char    *patdoc[] = {
    50     "The regular_expression defines the pattern to search for.  Upper- and",
    51     "lower-case are always ignored.  Blank lines never match.  The expression",
    52     "should be quoted to prevent file-name translation.",
    53     "x      An ordinary character (not mentioned below) matches that character.",
    54     "'\\'    The backslash quotes any character.  \"\\$\" matches a dollar-sign.",
    55     "'^'    A circumflex at the beginning of an expression matches the",
    56     "       beginning of a line.",
    57     "'$'    A dollar-sign at the end of an expression matches the end of a line.",
    58     "'.'    A period matches any character except \"new-line\".",
    59     "':a'   A colon matches a class of characters described by the following",
    60     "':d'     character.  \":a\" matches any alphabetic, \":d\" matches digits,",
    61     "':n'     \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
    62     "': '     other control characters, such as new-line.",
    63     "'*'    An expression followed by an asterisk matches zero or more",
    64     "       occurrances of that expression: \"fo*\" matches \"f\", \"fo\"",
    65     "       \"foo\", etc.",
    66     "'+'    An expression followed by a plus sign matches one or more",
    67     "       occurrances of that expression: \"fo+\" matches \"fo\", etc.",
    68     "'-'    An expression followed by a minus sign optionally matches",
    69     "       the expression.",
    70     "'[]'   A string enclosed in square brackets matches any character in",
    71     "       that string, but no others.  If the first character in the",
    72     "       string is a circumflex, the expression matches any character",
    73     "       except \"new-line\" and the characters in the string.  For",
    74     "       example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
    75     "       matches \"abc\" but not \"axb\".  A range of characters may be",
    76     "       specified by two characters separated by \"-\".  Note that,",
    77     "       [a-z] matches alphabetics, while [z-a] never matches.",
    78     "The concatenation of regular expressions is a regular expression.",
    79     0};
    80  
    81  #define LMAX    512
    82  #define PMAX    256
    83  
    84  #define CHAR    1
    85  #define BOL     2
    86  #define EOL     3
    87  #define ANY     4
    88  #define CLASS   5
    89  #define NCLASS  6
    90  #define STAR    7
    91  #define PLUS    8
    92  #define MINUS   9
    93  #define ALPHA   10
    94  #define DIGIT   11
    95  #define NALPHA  12
    96  #define PUNCT   13
    97  #define RANGE   14
    98  #define ENDPAT  15
    99  
   100  int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0;
   101  
   102  char *pp, lbuf[LMAX], pbuf[PMAX];
   103  
   104  char *cclass();
   105  char *pmatch();
   106  
   107  
   108  /*** Display a file name *******************************/
   109  void file(char *s)
   110  {
   111     printf("File %s:\n", s);
   112  }
   113  
   114  /*** Report unopenable file ****************************/
   115  void cant(char *s)
   116  {
   117     fprintf(stderr, "%s: cannot open\n", s);
   118  }
   119  
   120  /*** Give good help ************************************/
   121  void help(char **hp)
   122  {
   123     char   **dp;
   124  
   125     for (dp = hp; *dp; ++dp)
   126        printf("%s\n", *dp);
   127  }
   128  
   129  /*** Display usage summary *****************************/
   130  void usage(char *s)
   131  {
   132     fprintf(stderr, "?GREP-E-%s\n", s);
   133     fprintf(stderr,
   134           "Usage: grep [-cfnv] pattern [file ...].  grep ? for help\n");
   135     exit(1);
   136  }
   137  
   138  /*** Compile the pattern into global pbuf[] ************/
   139  void compile(char *source)
   140  {
   141     char  *s;         /* Source string pointer     */
   142     char  *lp;        /* Last pattern pointer      */
   143     int   c;          /* Current character         */
   144     int            o;          /* Temp                      */
   145     char           *spp;       /* Save beginning of pattern */
   146  
   147     s = source;
   148     if (debug)
   149        printf("Pattern = \"%s\"\n", s);
   150     pp = pbuf;
   151     while (c = *s++) {
   152        /*
   153         * STAR, PLUS and MINUS are special.
   154         */
   155        if (c == '*' || c == '+' || c == '-') {
   156           if (pp == pbuf ||
   157                 (o=pp[-1]) == BOL ||
   158                 o == EOL ||
   159                 o == STAR ||
   160                 o == PLUS ||
   161                 o == MINUS)
   162              badpat("Illegal occurrance op.", source, s);
   163           store(ENDPAT);
   164           store(ENDPAT);
   165           spp = pp;               /* Save pattern end     */
   166           while (--pp > lp)       /* Move pattern down    */
   167              *pp = pp[-1];        /* one byte             */
   168           *pp =   (c == '*') ? STAR :
   169              (c == '-') ? MINUS : PLUS;
   170           pp = spp;               /* Restore pattern end  */
   171           continue;
   172        }
   173        /*
   174         * All the rest.
   175         */
   176        lp = pp;         /* Remember start       */
   177        switch(c) {
   178  
   179           case '^':
   180              store(BOL);
   181              break;
   182  
   183           case '$':
   184              store(EOL);
   185              break;
   186  
   187           case '.':
   188              store(ANY);
   189              break;
   190  
   191           case '[':
   192              s = cclass(source, s);
   193              break;
   194  
   195           case ':':
   196              if (*s) {
   197                 switch(tolower(c = *s++)) {
   198  
   199                    case 'a':
   200                    case 'A':
   201                       store(ALPHA);
   202                       break;
   203  
   204                    case 'd':
   205                    case 'D':
   206                       store(DIGIT);
   207                       break;
   208  
   209                    case 'n':
   210                    case 'N':
   211                       store(NALPHA);
   212                       break;
   213  
   214                    case ' ':
   215                       store(PUNCT);
   216                       break;
   217  
   218                    default:
   219                       badpat("Unknown : type", source, s);
   220  
   221                 }
   222                 break;
   223              }
   224              else    badpat("No : type", source, s);
   225  
   226           case '\\':
   227              if (*s)
   228                 c = *s++;
   229  
   230           default:
   231              store(CHAR);
   232              store(tolower(c));
   233        }
   234     }
   235     store(ENDPAT);
   236     store(0);                /* Terminate string     */
   237     if (debug) {
   238        for (lp = pbuf; lp < pp;) {
   239           if ((c = (*lp++ & 0377)) < ' ')
   240              printf("\\%o ", c);
   241           else    printf("%c ", c);
   242        }
   243        printf("\n");
   244     }
   245  }
   246  
   247  /*** Compile a class (within []) ***********************/
   248  char *cclass(char *source, char *src)
   249     /* char       *source;   // Pattern start -- for error msg. */
   250     /* char       *src;      // Class start */
   251  {
   252     char   *s;        /* Source pointer    */
   253     char   *cp;       /* Pattern start     */
   254     int    c;         /* Current character */
   255     int             o;         /* Temp              */
   256  
   257     s = src;
   258     o = CLASS;
   259     if (*s == '^') {
   260        ++s;
   261        o = NCLASS;
   262     }
   263     store(o);
   264     cp = pp;
   265     store(0);                          /* Byte count      */
   266     while ((c = *s++) && c!=']') {
   267        if (c == '\\') {                /* Store quoted char    */
   268           if ((c = *s++) == '\0')      /* Gotta get something  */
   269              badpat("Class terminates badly", source, s);
   270           else    store(tolower(c));
   271        }
   272        else if (c == '-' &&
   273              (pp - cp) > 1 && *s != ']' && *s != '\0') {
   274           c = pp[-1];             /* Range start     */
   275           pp[-1] = RANGE;         /* Range signal    */
   276           store(c);               /* Re-store start  */
   277           c = *s++;               /* Get end char and*/
   278           store(tolower(c));      /* Store it        */
   279        }
   280        else {
   281           store(tolower(c));      /* Store normal char */
   282        }
   283     }
   284     if (c != ']')
   285        badpat("Unterminated class", source, s);
   286     if ((c = (pp - cp)) >= 256)
   287        badpat("Class too large", source, s);
   288     if (c == 0)
   289        badpat("Empty class", source, s);
   290     *cp = c;
   291     return(s);
   292  }
   293  
   294  /*** Store an entry in the pattern buffer **************/
   295  void store(int op)
   296  {
   297     if (pp >= &pbuf[PMAX])
   298        error("Pattern too complex\n");
   299     *pp++ = op;
   300  }
   301  
   302  /*** Report a bad pattern specification ****************/
   303  void badpat(char *message, char *source, char *stop)
   304     /* char  *message;       // Error message */
   305     /* char  *source;        // Pattern start */
   306     /* char  *stop;          // Pattern end   */
   307  {
   308     fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
   309     fprintf(stderr, "-GREP-E-Stopped at byte %d, '%c'\n",
   310           stop-source, stop[-1]);
   311     error("?GREP-E-Bad pattern\n");
   312  }
   313  
   314  /*** Scan the file for the pattern in pbuf[] ***********/
   315  void grep(FILE *fp, char *fn)
   316     /* FILE       *fp;       // File to process            */
   317     /* char       *fn;       // File name (for -f option)  */
   318  {
   319     int lno, count, m;
   320  
   321     lno = 0;
   322     count = 0;
   323     while (fgets(lbuf, LMAX, fp)) {
   324        ++lno;
   325        m = match();
   326        if ((m && !vflag) || (!m && vflag)) {
   327           ++count;
   328           if (!cflag) {
   329              if (fflag && fn) {
   330                 file(fn);
   331                 fn = 0;
   332              }
   333              if (nflag)
   334                 printf("%d\t", lno);
   335              printf("%s\n", lbuf);
   336           }
   337        }
   338     }
   339     if (cflag) {
   340        if (fflag && fn)
   341           file(fn);
   342        printf("%d\n", count);
   343     }
   344  }
   345  
   346  /*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/
   347  int match()
   348  {
   349     char   *l;        /* Line pointer       */
   350  
   351     for (l = lbuf; *l; ++l) {
   352        if (pmatch(l, pbuf))
   353           return(1);
   354     }
   355     return(0);
   356  }
   357  
   358  /*** Match partial line with pattern *******************/
   359  char *pmatch(char *line, char *pattern)
   360     /* char               *line;     // (partial) line to match      */
   361     /* char               *pattern;  // (partial) pattern to match   */
   362  {
   363     char   *l;        /* Current line pointer         */
   364     char   *p;        /* Current pattern pointer      */
   365     char   c;         /* Current character            */
   366     char            *e;        /* End for STAR and PLUS match  */
   367     int             op;        /* Pattern operation            */
   368     int             n;         /* Class counter                */
   369     char            *are;      /* Start of STAR match          */
   370  
   371     l = line;
   372     if (debug > 1)
   373        printf("pmatch(\"%s\")\n", line);
   374     p = pattern;
   375     while ((op = *p++) != ENDPAT) {
   376        if (debug > 1)
   377           printf("byte[%d] = 0%o, '%c', op = 0%o\n",
   378                 l-line, *l, *l, op);
   379        switch(op) {
   380  
   381           case CHAR:
   382              if (tolower(*l++) != *p++)
   383                 return(0);
   384              break;
   385  
   386           case BOL:
   387              if (l != lbuf)
   388                 return(0);
   389              break;
   390  
   391           case EOL:
   392              if (*l != '\0')
   393                 return(0);
   394              break;
   395  
   396           case ANY:
   397              if (*l++ == '\0')
   398                 return(0);
   399              break;
   400  
   401           case DIGIT:
   402              if ((c = *l++) < '0' || (c > '9'))
   403                 return(0);
   404              break;
   405  
   406           case ALPHA:
   407              c = tolower(*l++);
   408              if (c < 'a' || c > 'z')
   409                 return(0);
   410              break;
   411  
   412           case NALPHA:
   413              c = tolower(*l++);
   414              if (c >= 'a' && c <= 'z')
   415                 break;
   416              else if (c < '0' || c > '9')
   417                 return(0);
   418              break;
   419  
   420           case PUNCT:
   421              c = *l++;
   422              if (c == 0 || c > ' ')
   423                 return(0);
   424              break;
   425  
   426           case CLASS:
   427           case NCLASS:
   428              c = tolower(*l++);
   429              n = *p++ & 0377;
   430              do {
   431                 if (*p == RANGE) {
   432                    p += 3;
   433                    n -= 2;
   434                    if (c >= p[-2] && c <= p[-1])
   435                       break;
   436                 }
   437                 else if (c == *p++)
   438                    break;
   439              } while (--n > 1);
   440              if ((op == CLASS) == (n <= 1))
   441                 return(0);
   442              if (op == CLASS)
   443                 p += n - 2;
   444              break;
   445  
   446           case MINUS:
   447              e = pmatch(l, p);       /* Look for a match    */
   448              while (*p++ != ENDPAT); /* Skip over pattern   */
   449              if (e)                  /* Got a match?        */
   450                 l = e;               /* Yes, update string  */
   451              break;                  /* Always succeeds     */
   452  
   453           case PLUS:                 /* One or more ...     */
   454              if ((l = pmatch(l, p)) == 0)
   455                 return(0);           /* Gotta have a match  */
   456           case STAR:                 /* Zero or more ...    */
   457              are = l;                /* Remember line start */
   458              while (*l && (e = pmatch(l, p)))
   459                 l = e;               /* Get longest match   */
   460              while (*p++ != ENDPAT); /* Skip over pattern   */
   461              while (l >= are) {      /* Try to match rest   */
   462                 if (e = pmatch(l, p))
   463                    return(e);
   464                 --l;                 /* Nope, try earlier   */
   465              }
   466              return(0);              /* Nothing else worked */
   467  
   468           default:
   469              printf("Bad op code %d\n", op);
   470              error("Cannot happen -- match\n");
   471        }
   472     }
   473     return(l);
   474  }
   475  
   476  /*** Report an error ***********************************/
   477  void error(char *s)
   478  {
   479     fprintf(stderr, "%s", s);
   480     exit(1);
   481  }
   482  
   483  /*** Main program - parse arguments & grep *************/
   484  int main(int argc, char **argv)
   485  {
   486     char   *p;
   487     int    c, i;
   488     int             gotpattern;
   489  
   490     FILE            *f;
   491  
   492     if (argc <= 1)
   493        usage("No arguments");
   494     if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) {
   495        help(documentation);
   496        help(patdoc);
   497        return 0;
   498     }
   499     nfile = argc-1;
   500     gotpattern = 0;
   501     for (i=1; i < argc; ++i) {
   502        p = argv[i];
   503        if (*p == '-') {
   504           ++p;
   505           while (c = *p++) {
   506              switch(tolower(c)) {
   507  
   508                 case '?':
   509                    help(documentation);
   510                    break;
   511  
   512                 case 'C':
   513                 case 'c':
   514                    ++cflag;
   515                    break;
   516  
   517                 case 'D':
   518                 case 'd':
   519                    ++debug;
   520                    break;
   521  
   522                 case 'F':
   523                 case 'f':
   524                    ++fflag;
   525                    break;
   526  
   527                 case 'n':
   528                 case 'N':
   529                    ++nflag;
   530                    break;
   531  
   532                 case 'v':
   533                 case 'V':
   534                    ++vflag;
   535                    break;
   536  
   537                 default:
   538                    usage("Unknown flag");
   539              }
   540           }
   541           argv[i] = 0;
   542           --nfile;
   543        } else if (!gotpattern) {
   544           compile(p);
   545           argv[i] = 0;
   546           ++gotpattern;
   547           --nfile;
   548        }
   549     }
   550     if (!gotpattern)
   551        usage("No pattern");
   552     if (nfile == 0)
   553        grep(stdin, 0);
   554     else {
   555        fflag = fflag ^ (nfile > 0);
   556        for (i=1; i < argc; ++i) {
   557           if (p = argv[i]) {
   558              if ((f=fopen(p, "r")) == NULL)
   559                 cant(p);
   560              else {
   561                 grep(f, p);
   562                 fclose(f);
   563              }
   564           }
   565        }
   566     }
   567     return 0;
   568  }
   569  
   570  /* vim: set expandtab ts=4 sw=3 sts=3 tw=80 :*/