modernc.org/ccgo/v3@v3.16.14/lib/testdata/tcc-0.9.27/tests/tests2/46_grep.c (about)

     1  /*
     2   * The  information  in  this  document  is  subject  to  change
     3   * without  notice  and  should not be construed as a commitment
     4   * by Digital Equipment Corporation or by DECUS.
     5   *
     6   * Neither Digital Equipment Corporation, DECUS, nor the authors
     7   * assume any responsibility for the use or reliability of  this
     8   * document or the described software.
     9   *
    10   *      Copyright (C) 1980, DECUS
    11   *
    12   * General permission to copy or modify, but not for profit,  is
    13   * hereby  granted,  provided that the above copyright notice is
    14   * included and reference made to  the  fact  that  reproduction
    15   * privileges were granted by DECUS.
    16   */
    17  #include <stdio.h>
    18  #include <stdlib.h>
    19  #include <ctype.h>	// tolower()
    20  
    21  /*
    22   * grep
    23   *
    24   * Runs on the Decus compiler or on vms, On vms, define as:
    25   *      grep :== "$disk:[account]grep"      (native)
    26   *      grep :== "$disk:[account]grep grep" (Decus)
    27   * See below for more information.
    28   */
    29  
    30  char    *documentation[] = {
    31     "grep searches a file for a given pattern.  Execute by",
    32     "   grep [flags] regular_expression file_list\n",
    33     "Flags are single characters preceded by '-':",
    34     "   -c      Only a count of matching lines is printed",
    35     "   -f      Print file name for matching lines switch, see below",
    36     "   -n      Each line is preceded by its line number",
    37     "   -v      Only print non-matching lines\n",
    38     "The file_list is a list of files (wildcards are acceptable on RSX modes).",
    39     "\nThe file name is normally printed if there is a file given.",
    40     "The -f flag reverses this action (print name no file, not if more).\n",
    41     0 };
    42  
    43  char    *patdoc[] = {
    44     "The regular_expression defines the pattern to search for.  Upper- and",
    45     "lower-case are always ignored.  Blank lines never match.  The expression",
    46     "should be quoted to prevent file-name translation.",
    47     "x      An ordinary character (not mentioned below) matches that character.",
    48     "'\\'    The backslash quotes any character.  \"\\$\" matches a dollar-sign.",
    49     "'^'    A circumflex at the beginning of an expression matches the",
    50     "       beginning of a line.",
    51     "'$'    A dollar-sign at the end of an expression matches the end of a line.",
    52     "'.'    A period matches any character except \"new-line\".",
    53     "':a'   A colon matches a class of characters described by the following",
    54     "':d'     character.  \":a\" matches any alphabetic, \":d\" matches digits,",
    55     "':n'     \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
    56     "': '     other control characters, such as new-line.",
    57     "'*'    An expression followed by an asterisk matches zero or more",
    58     "       occurrences of that expression: \"fo*\" matches \"f\", \"fo\"",
    59     "       \"foo\", etc.",
    60     "'+'    An expression followed by a plus sign matches one or more",
    61     "       occurrences of that expression: \"fo+\" matches \"fo\", etc.",
    62     "'-'    An expression followed by a minus sign optionally matches",
    63     "       the expression.",
    64     "'[]'   A string enclosed in square brackets matches any character in",
    65     "       that string, but no others.  If the first character in the",
    66     "       string is a circumflex, the expression matches any character",
    67     "       except \"new-line\" and the characters in the string.  For",
    68     "       example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
    69     "       matches \"abc\" but not \"axb\".  A range of characters may be",
    70     "       specified by two characters separated by \"-\".  Note that,",
    71     "       [a-z] matches alphabetics, while [z-a] never matches.",
    72     "The concatenation of regular expressions is a regular expression.",
    73     0};
    74  
    75  #define LMAX    512
    76  #define PMAX    256
    77  
    78  #define CHAR    1
    79  #define BOL     2
    80  #define EOL     3
    81  #define ANY     4
    82  #define CLASS   5
    83  #define NCLASS  6
    84  #define STAR    7
    85  #define PLUS    8
    86  #define MINUS   9
    87  #define ALPHA   10
    88  #define DIGIT   11
    89  #define NALPHA  12
    90  #define PUNCT   13
    91  #define RANGE   14
    92  #define ENDPAT  15
    93  
    94  int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0;
    95  
    96  char *pp, lbuf[LMAX], pbuf[PMAX];
    97  
    98  char *cclass();
    99  char *pmatch();
   100  void store(int);
   101  void error(char *);
   102  void badpat(char *, char *, char *);
   103  int match(void);
   104  
   105  
   106  /*** Display a file name *******************************/
   107  void file(char *s)
   108  {
   109     printf("File %s:\n", s);
   110  }
   111  
   112  /*** Report unopenable file ****************************/
   113  void cant(char *s)
   114  {
   115     fprintf(stderr, "%s: cannot open\n", s);
   116  }
   117  
   118  /*** Give good help ************************************/
   119  void help(char **hp)
   120  {
   121     char   **dp;
   122  
   123     for (dp = hp; *dp; ++dp)
   124        printf("%s\n", *dp);
   125  }
   126  
   127  /*** Display usage summary *****************************/
   128  void usage(char *s)
   129  {
   130     fprintf(stderr, "?GREP-E-%s\n", s);
   131     fprintf(stderr,
   132           "Usage: grep [-cfnv] pattern [file ...].  grep ? for help\n");
   133     exit(1);
   134  }
   135  
   136  /*** Compile the pattern into global pbuf[] ************/
   137  void compile(char *source)
   138  {
   139     char  *s;         /* Source string pointer     */
   140     char  *lp;        /* Last pattern pointer      */
   141     int   c;          /* Current character         */
   142     int            o;          /* Temp                      */
   143     char           *spp;       /* Save beginning of pattern */
   144  
   145     s = source;
   146     if (debug)
   147        printf("Pattern = \"%s\"\n", s);
   148     pp = pbuf;
   149     while (c = *s++) {
   150        /*
   151         * STAR, PLUS and MINUS are special.
   152         */
   153        if (c == '*' || c == '+' || c == '-') {
   154           if (pp == pbuf ||
   155                 (o=pp[-1]) == BOL ||
   156                 o == EOL ||
   157                 o == STAR ||
   158                 o == PLUS ||
   159                 o == MINUS)
   160              badpat("Illegal occurrence op.", source, s);
   161           store(ENDPAT);
   162           store(ENDPAT);
   163           spp = pp;               /* Save pattern end     */
   164           while (--pp > lp)       /* Move pattern down    */
   165              *pp = pp[-1];        /* one byte             */
   166           *pp =   (c == '*') ? STAR :
   167              (c == '-') ? MINUS : PLUS;
   168           pp = spp;               /* Restore pattern end  */
   169           continue;
   170        }
   171        /*
   172         * All the rest.
   173         */
   174        lp = pp;         /* Remember start       */
   175        switch(c) {
   176  
   177           case '^':
   178              store(BOL);
   179              break;
   180  
   181           case '$':
   182              store(EOL);
   183              break;
   184  
   185           case '.':
   186              store(ANY);
   187              break;
   188  
   189           case '[':
   190              s = cclass(source, s);
   191              break;
   192  
   193           case ':':
   194              if (*s) {
   195                 switch(tolower(c = *s++)) {
   196  
   197                    case 'a':
   198                    case 'A':
   199                       store(ALPHA);
   200                       break;
   201  
   202                    case 'd':
   203                    case 'D':
   204                       store(DIGIT);
   205                       break;
   206  
   207                    case 'n':
   208                    case 'N':
   209                       store(NALPHA);
   210                       break;
   211  
   212                    case ' ':
   213                       store(PUNCT);
   214                       break;
   215  
   216                    default:
   217                       badpat("Unknown : type", source, s);
   218  
   219                 }
   220                 break;
   221              }
   222              else    badpat("No : type", source, s);
   223  
   224           case '\\':
   225              if (*s)
   226                 c = *s++;
   227  
   228           default:
   229              store(CHAR);
   230              store(tolower(c));
   231        }
   232     }
   233     store(ENDPAT);
   234     store(0);                /* Terminate string     */
   235     if (debug) {
   236        for (lp = pbuf; lp < pp;) {
   237           if ((c = (*lp++ & 0377)) < ' ')
   238              printf("\\%o ", c);
   239           else    printf("%c ", c);
   240        }
   241        printf("\n");
   242     }
   243  }
   244  
   245  /*** Compile a class (within []) ***********************/
   246  char *cclass(char *source, char *src)
   247     /* char       *source;   // Pattern start -- for error msg. */
   248     /* char       *src;      // Class start */
   249  {
   250     char   *s;        /* Source pointer    */
   251     char   *cp;       /* Pattern start     */
   252     int    c;         /* Current character */
   253     int             o;         /* Temp              */
   254  
   255     s = src;
   256     o = CLASS;
   257     if (*s == '^') {
   258        ++s;
   259        o = NCLASS;
   260     }
   261     store(o);
   262     cp = pp;
   263     store(0);                          /* Byte count      */
   264     while ((c = *s++) && c!=']') {
   265        if (c == '\\') {                /* Store quoted char    */
   266           if ((c = *s++) == '\0')      /* Gotta get something  */
   267              badpat("Class terminates badly", source, s);
   268           else    store(tolower(c));
   269        }
   270        else if (c == '-' &&
   271              (pp - cp) > 1 && *s != ']' && *s != '\0') {
   272           c = pp[-1];             /* Range start     */
   273           pp[-1] = RANGE;         /* Range signal    */
   274           store(c);               /* Re-store start  */
   275           c = *s++;               /* Get end char and*/
   276           store(tolower(c));      /* Store it        */
   277        }
   278        else {
   279           store(tolower(c));      /* Store normal char */
   280        }
   281     }
   282     if (c != ']')
   283        badpat("Unterminated class", source, s);
   284     if ((c = (pp - cp)) >= 256)
   285        badpat("Class too large", source, s);
   286     if (c == 0)
   287        badpat("Empty class", source, s);
   288     *cp = c;
   289     return(s);
   290  }
   291  
   292  /*** Store an entry in the pattern buffer **************/
   293  void store(int op)
   294  {
   295     if (pp >= &pbuf[PMAX])
   296        error("Pattern too complex\n");
   297     *pp++ = op;
   298  }
   299  
   300  /*** Report a bad pattern specification ****************/
   301  void badpat(char *message, char *source, char *stop)
   302     /* char  *message;       // Error message */
   303     /* char  *source;        // Pattern start */
   304     /* char  *stop;          // Pattern end   */
   305  {
   306     fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
   307     fprintf(stderr, "-GREP-E-Stopped at byte %ld, '%c'\n",
   308           stop-source, stop[-1]);
   309     error("?GREP-E-Bad pattern\n");
   310  }
   311  
   312  /*** Scan the file for the pattern in pbuf[] ***********/
   313  void grep(FILE *fp, char *fn)
   314     /* FILE       *fp;       // File to process            */
   315     /* char       *fn;       // File name (for -f option)  */
   316  {
   317     int lno, count, m;
   318  
   319     lno = 0;
   320     count = 0;
   321     while (fgets(lbuf, LMAX, fp)) {
   322        ++lno;
   323        m = match();
   324        if ((m && !vflag) || (!m && vflag)) {
   325           ++count;
   326           if (!cflag) {
   327              if (fflag && fn) {
   328                 file(fn);
   329                 fn = 0;
   330              }
   331              if (nflag)
   332                 printf("%d\t", lno);
   333              printf("%s\n", lbuf);
   334           }
   335        }
   336     }
   337     if (cflag) {
   338        if (fflag && fn)
   339           file(fn);
   340        printf("%d\n", count);
   341     }
   342  }
   343  
   344  /*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/
   345  int match()
   346  {
   347     char   *l;        /* Line pointer       */
   348  
   349     for (l = lbuf; *l; ++l) {
   350        if (pmatch(l, pbuf))
   351           return(1);
   352     }
   353     return(0);
   354  }
   355  
   356  /*** Match partial line with pattern *******************/
   357  char *pmatch(char *line, char *pattern)
   358     /* char               *line;     // (partial) line to match      */
   359     /* char               *pattern;  // (partial) pattern to match   */
   360  {
   361     char   *l;        /* Current line pointer         */
   362     char   *p;        /* Current pattern pointer      */
   363     char   c;         /* Current character            */
   364     char            *e;        /* End for STAR and PLUS match  */
   365     int             op;        /* Pattern operation            */
   366     int             n;         /* Class counter                */
   367     char            *are;      /* Start of STAR match          */
   368  
   369     l = line;
   370     if (debug > 1)
   371        printf("pmatch(\"%s\")\n", line);
   372     p = pattern;
   373     while ((op = *p++) != ENDPAT) {
   374        if (debug > 1)
   375           printf("byte[%ld] = 0%o, '%c', op = 0%o\n",
   376                 l-line, *l, *l, op);
   377        switch(op) {
   378  
   379           case CHAR:
   380              if (tolower(*l++) != *p++)
   381                 return(0);
   382              break;
   383  
   384           case BOL:
   385              if (l != lbuf)
   386                 return(0);
   387              break;
   388  
   389           case EOL:
   390              if (*l != '\0')
   391                 return(0);
   392              break;
   393  
   394           case ANY:
   395              if (*l++ == '\0')
   396                 return(0);
   397              break;
   398  
   399           case DIGIT:
   400              if ((c = *l++) < '0' || (c > '9'))
   401                 return(0);
   402              break;
   403  
   404           case ALPHA:
   405              c = tolower(*l++);
   406              if (c < 'a' || c > 'z')
   407                 return(0);
   408              break;
   409  
   410           case NALPHA:
   411              c = tolower(*l++);
   412              if (c >= 'a' && c <= 'z')
   413                 break;
   414              else if (c < '0' || c > '9')
   415                 return(0);
   416              break;
   417  
   418           case PUNCT:
   419              c = *l++;
   420              if (c == 0 || c > ' ')
   421                 return(0);
   422              break;
   423  
   424           case CLASS:
   425           case NCLASS:
   426              c = tolower(*l++);
   427              n = *p++ & 0377;
   428              do {
   429                 if (*p == RANGE) {
   430                    p += 3;
   431                    n -= 2;
   432                    if (c >= p[-2] && c <= p[-1])
   433                       break;
   434                 }
   435                 else if (c == *p++)
   436                    break;
   437              } while (--n > 1);
   438              if ((op == CLASS) == (n <= 1))
   439                 return(0);
   440              if (op == CLASS)
   441                 p += n - 2;
   442              break;
   443  
   444           case MINUS:
   445              e = pmatch(l, p);       /* Look for a match    */
   446              while (*p++ != ENDPAT); /* Skip over pattern   */
   447              if (e)                  /* Got a match?        */
   448                 l = e;               /* Yes, update string  */
   449              break;                  /* Always succeeds     */
   450  
   451           case PLUS:                 /* One or more ...     */
   452              if ((l = pmatch(l, p)) == 0)
   453                 return(0);           /* Gotta have a match  */
   454           case STAR:                 /* Zero or more ...    */
   455              are = l;                /* Remember line start */
   456              while (*l && (e = pmatch(l, p)))
   457                 l = e;               /* Get longest match   */
   458              while (*p++ != ENDPAT); /* Skip over pattern   */
   459              while (l >= are) {      /* Try to match rest   */
   460                 if (e = pmatch(l, p))
   461                    return(e);
   462                 --l;                 /* Nope, try earlier   */
   463              }
   464              return(0);              /* Nothing else worked */
   465  
   466           default:
   467              printf("Bad op code %d\n", op);
   468              error("Cannot happen -- match\n");
   469        }
   470     }
   471     return(l);
   472  }
   473  
   474  /*** Report an error ***********************************/
   475  void error(char *s)
   476  {
   477     fprintf(stderr, "%s", s);
   478     exit(1);
   479  }
   480  
   481  /*** Main program - parse arguments & grep *************/
   482  int main(int argc, char **argv)
   483  {
   484     char   *p;
   485     int    c, i;
   486     int             gotpattern;
   487  
   488     FILE            *f;
   489  
   490     if (argc <= 1)
   491        usage("No arguments");
   492     if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) {
   493        help(documentation);
   494        help(patdoc);
   495        return 0;
   496     }
   497     nfile = argc-1;
   498     gotpattern = 0;
   499     for (i=1; i < argc; ++i) {
   500        p = argv[i];
   501        if (*p == '-') {
   502           ++p;
   503           while (c = *p++) {
   504              switch(tolower(c)) {
   505  
   506                 case '?':
   507                    help(documentation);
   508                    break;
   509  
   510                 case 'C':
   511                 case 'c':
   512                    ++cflag;
   513                    break;
   514  
   515                 case 'D':
   516                 case 'd':
   517                    ++debug;
   518                    break;
   519  
   520                 case 'F':
   521                 case 'f':
   522                    ++fflag;
   523                    break;
   524  
   525                 case 'n':
   526                 case 'N':
   527                    ++nflag;
   528                    break;
   529  
   530                 case 'v':
   531                 case 'V':
   532                    ++vflag;
   533                    break;
   534  
   535                 default:
   536                    usage("Unknown flag");
   537              }
   538           }
   539           argv[i] = 0;
   540           --nfile;
   541        } else if (!gotpattern) {
   542           compile(p);
   543           argv[i] = 0;
   544           ++gotpattern;
   545           --nfile;
   546        }
   547     }
   548     if (!gotpattern)
   549        usage("No pattern");
   550     if (nfile == 0)
   551        grep(stdin, 0);
   552     else {
   553        fflag = fflag ^ (nfile > 0);
   554        for (i=1; i < argc; ++i) {
   555           if (p = argv[i]) {
   556              if ((f=fopen(p, "r")) == NULL)
   557                 cant(p);
   558              else {
   559                 grep(f, p);
   560                 fclose(f);
   561              }
   562           }
   563        }
   564     }
   565     return 0;
   566  }
   567  
   568  /* vim: set expandtab ts=4 sw=3 sts=3 tw=80 :*/