github.com/maruel/nin@v0.0.0-20220112143044-f35891e3ce7e/src/depfile_parser.cc (about)

     1  /* Generated by re2c */
     2  // Copyright 2011 Google Inc. All Rights Reserved.
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  #include "depfile_parser.h"
    17  #include "util.h"
    18  
    19  #include <algorithm>
    20  
    21  using namespace std;
    22  
    23  DepfileParser::DepfileParser(DepfileParserOptions options)
    24    : options_(options)
    25  {
    26  }
    27  
    28  // A note on backslashes in Makefiles, from reading the docs:
    29  // Backslash-newline is the line continuation character.
    30  // Backslash-# escapes a # (otherwise meaningful as a comment start).
    31  // Backslash-% escapes a % (otherwise meaningful as a special).
    32  // Finally, quoting the GNU manual, "Backslashes that are not in danger
    33  // of quoting ‘%’ characters go unmolested."
    34  // How do you end a line with a backslash?  The netbsd Make docs suggest
    35  // reading the result of a shell command echoing a backslash!
    36  //
    37  // Rather than implement all of above, we follow what GCC/Clang produces:
    38  // Backslashes escape a space or hash sign.
    39  // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
    40  // followed by space.
    41  // When a space is preceded by 2N backslashes, it represents 2N backslashes at
    42  // the end of a filename.
    43  // A hash sign is escaped by a single backslash. All other backslashes remain
    44  // unchanged.
    45  //
    46  // If anyone actually has depfiles that rely on the more complicated
    47  // behavior we can adjust this.
    48  bool DepfileParser::Parse(string* content, string* err) {
    49    // in: current parser input point.
    50    // end: end of input.
    51    // parsing_targets: whether we are parsing targets or dependencies.
    52    char* in = &(*content)[0];
    53    char* end = in + content->size();
    54    bool have_target = false;
    55    bool parsing_targets = true;
    56    bool poisoned_input = false;
    57    while (in < end) {
    58      bool have_newline = false;
    59      // out: current output point (typically same as in, but can fall behind
    60      // as we de-escape backslashes).
    61      char* out = in;
    62      // filename: start of the current parsed filename.
    63      char* filename = out;
    64      for (;;) {
    65        // start: beginning of the current parsed span.
    66        const char* start = in;
    67        char* yymarker = NULL;
    68        
    69      {
    70        unsigned char yych;
    71        static const unsigned char yybm[] = {
    72            0,   0,   0,   0,   0,   0,   0,   0, 
    73            0,   0,   0,   0,   0,   0,   0,   0, 
    74            0,   0,   0,   0,   0,   0,   0,   0, 
    75            0,   0,   0,   0,   0,   0,   0,   0, 
    76            0, 128,   0,   0,   0, 128,   0,   0, 
    77          128, 128,   0, 128, 128, 128, 128, 128, 
    78          128, 128, 128, 128, 128, 128, 128, 128, 
    79          128, 128, 128,   0,   0, 128,   0,   0, 
    80          128, 128, 128, 128, 128, 128, 128, 128, 
    81          128, 128, 128, 128, 128, 128, 128, 128, 
    82          128, 128, 128, 128, 128, 128, 128, 128, 
    83          128, 128, 128, 128,   0, 128,   0, 128, 
    84            0, 128, 128, 128, 128, 128, 128, 128, 
    85          128, 128, 128, 128, 128, 128, 128, 128, 
    86          128, 128, 128, 128, 128, 128, 128, 128, 
    87          128, 128, 128, 128,   0, 128, 128,   0, 
    88          128, 128, 128, 128, 128, 128, 128, 128, 
    89          128, 128, 128, 128, 128, 128, 128, 128, 
    90          128, 128, 128, 128, 128, 128, 128, 128, 
    91          128, 128, 128, 128, 128, 128, 128, 128, 
    92          128, 128, 128, 128, 128, 128, 128, 128, 
    93          128, 128, 128, 128, 128, 128, 128, 128, 
    94          128, 128, 128, 128, 128, 128, 128, 128, 
    95          128, 128, 128, 128, 128, 128, 128, 128, 
    96          128, 128, 128, 128, 128, 128, 128, 128, 
    97          128, 128, 128, 128, 128, 128, 128, 128, 
    98          128, 128, 128, 128, 128, 128, 128, 128, 
    99          128, 128, 128, 128, 128, 128, 128, 128, 
   100          128, 128, 128, 128, 128, 128, 128, 128, 
   101          128, 128, 128, 128, 128, 128, 128, 128, 
   102          128, 128, 128, 128, 128, 128, 128, 128, 
   103          128, 128, 128, 128, 128, 128, 128, 128, 
   104        };
   105        yych = *in;
   106        if (yybm[0+yych] & 128) {
   107          goto yy9;
   108        }
   109        if (yych <= '\r') {
   110          if (yych <= '\t') {
   111            if (yych >= 0x01) goto yy4;
   112          } else {
   113            if (yych <= '\n') goto yy6;
   114            if (yych <= '\f') goto yy4;
   115            goto yy8;
   116          }
   117        } else {
   118          if (yych <= '$') {
   119            if (yych <= '#') goto yy4;
   120            goto yy12;
   121          } else {
   122            if (yych <= '?') goto yy4;
   123            if (yych <= '\\') goto yy13;
   124            goto yy4;
   125          }
   126        }
   127        ++in;
   128        {
   129          break;
   130        }
   131  yy4:
   132        ++in;
   133  yy5:
   134        {
   135          // For any other character (e.g. whitespace), swallow it here,
   136          // allowing the outer logic to loop around again.
   137          break;
   138        }
   139  yy6:
   140        ++in;
   141        {
   142          // A newline ends the current file name and the current rule.
   143          have_newline = true;
   144          break;
   145        }
   146  yy8:
   147        yych = *++in;
   148        if (yych == '\n') goto yy6;
   149        goto yy5;
   150  yy9:
   151        yych = *++in;
   152        if (yybm[0+yych] & 128) {
   153          goto yy9;
   154        }
   155  yy11:
   156        {
   157          // Got a span of plain text.
   158          int len = (int)(in - start);
   159          // Need to shift it over if we're overwriting backslashes.
   160          if (out < start)
   161            memmove(out, start, len);
   162          out += len;
   163          continue;
   164        }
   165  yy12:
   166        yych = *++in;
   167        if (yych == '$') goto yy14;
   168        goto yy5;
   169  yy13:
   170        yych = *(yymarker = ++in);
   171        if (yych <= ' ') {
   172          if (yych <= '\n') {
   173            if (yych <= 0x00) goto yy5;
   174            if (yych <= '\t') goto yy16;
   175            goto yy17;
   176          } else {
   177            if (yych == '\r') goto yy19;
   178            if (yych <= 0x1F) goto yy16;
   179            goto yy21;
   180          }
   181        } else {
   182          if (yych <= '9') {
   183            if (yych == '#') goto yy23;
   184            goto yy16;
   185          } else {
   186            if (yych <= ':') goto yy25;
   187            if (yych == '\\') goto yy27;
   188            goto yy16;
   189          }
   190        }
   191  yy14:
   192        ++in;
   193        {
   194          // De-escape dollar character.
   195          *out++ = '$';
   196          continue;
   197        }
   198  yy16:
   199        ++in;
   200        goto yy11;
   201  yy17:
   202        ++in;
   203        {
   204          // A line continuation ends the current file name.
   205          break;
   206        }
   207  yy19:
   208        yych = *++in;
   209        if (yych == '\n') goto yy17;
   210        in = yymarker;
   211        goto yy5;
   212  yy21:
   213        ++in;
   214        {
   215          // 2N+1 backslashes plus space -> N backslashes plus space.
   216          int len = (int)(in - start);
   217          int n = len / 2 - 1;
   218          if (out < start)
   219            memset(out, '\\', n);
   220          out += n;
   221          *out++ = ' ';
   222          continue;
   223        }
   224  yy23:
   225        ++in;
   226        {
   227          // De-escape hash sign, but preserve other leading backslashes.
   228          int len = (int)(in - start);
   229          if (len > 2 && out < start)
   230            memset(out, '\\', len - 2);
   231          out += len - 2;
   232          *out++ = '#';
   233          continue;
   234        }
   235  yy25:
   236        yych = *++in;
   237        if (yych <= '\f') {
   238          if (yych <= 0x00) goto yy28;
   239          if (yych <= 0x08) goto yy26;
   240          if (yych <= '\n') goto yy28;
   241        } else {
   242          if (yych <= '\r') goto yy28;
   243          if (yych == ' ') goto yy28;
   244        }
   245  yy26:
   246        {
   247          // De-escape colon sign, but preserve other leading backslashes.
   248          // Regular expression uses lookahead to make sure that no whitespace
   249          // nor EOF follows. In that case it'd be the : at the end of a target
   250          int len = (int)(in - start);
   251          if (len > 2 && out < start)
   252            memset(out, '\\', len - 2);
   253          out += len - 2;
   254          *out++ = ':';
   255          continue;
   256        }
   257  yy27:
   258        yych = *++in;
   259        if (yych <= ' ') {
   260          if (yych <= '\n') {
   261            if (yych <= 0x00) goto yy11;
   262            if (yych <= '\t') goto yy16;
   263            goto yy11;
   264          } else {
   265            if (yych == '\r') goto yy11;
   266            if (yych <= 0x1F) goto yy16;
   267            goto yy30;
   268          }
   269        } else {
   270          if (yych <= '9') {
   271            if (yych == '#') goto yy23;
   272            goto yy16;
   273          } else {
   274            if (yych <= ':') goto yy25;
   275            if (yych == '\\') goto yy32;
   276            goto yy16;
   277          }
   278        }
   279  yy28:
   280        ++in;
   281        {
   282          // Backslash followed by : and whitespace.
   283          // It is therefore normal text and not an escaped colon
   284          int len = (int)(in - start - 1);
   285          // Need to shift it over if we're overwriting backslashes.
   286          if (out < start)
   287            memmove(out, start, len);
   288          out += len;
   289          if (*(in - 1) == '\n')
   290            have_newline = true;
   291          break;
   292        }
   293  yy30:
   294        ++in;
   295        {
   296          // 2N backslashes plus space -> 2N backslashes, end of filename.
   297          int len = (int)(in - start);
   298          if (out < start)
   299            memset(out, '\\', len - 1);
   300          out += len - 1;
   301          break;
   302        }
   303  yy32:
   304        yych = *++in;
   305        if (yych <= ' ') {
   306          if (yych <= '\n') {
   307            if (yych <= 0x00) goto yy11;
   308            if (yych <= '\t') goto yy16;
   309            goto yy11;
   310          } else {
   311            if (yych == '\r') goto yy11;
   312            if (yych <= 0x1F) goto yy16;
   313            goto yy21;
   314          }
   315        } else {
   316          if (yych <= '9') {
   317            if (yych == '#') goto yy23;
   318            goto yy16;
   319          } else {
   320            if (yych <= ':') goto yy25;
   321            if (yych == '\\') goto yy27;
   322            goto yy16;
   323          }
   324        }
   325      }
   326  
   327      }
   328  
   329      int len = (int)(out - filename);
   330      const bool is_dependency = !parsing_targets;
   331      if (len > 0 && filename[len - 1] == ':') {
   332        len--;  // Strip off trailing colon, if any.
   333        parsing_targets = false;
   334        have_target = true;
   335      }
   336  
   337      if (len > 0) {
   338        StringPiece piece = StringPiece(filename, len);
   339        // If we've seen this as an input before, skip it.
   340        std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
   341        if (pos == ins_.end()) {
   342          if (is_dependency) {
   343            if (poisoned_input) {
   344              *err = "inputs may not also have inputs";
   345              return false;
   346            }
   347            // New input.
   348            ins_.push_back(piece);
   349          } else {
   350            // Check for a new output.
   351            if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
   352              outs_.push_back(piece);
   353          }
   354        } else if (!is_dependency) {
   355          // We've passed an input on the left side; reject new inputs.
   356          poisoned_input = true;
   357        }
   358      }
   359  
   360      if (have_newline) {
   361        // A newline ends a rule so the next filename will be a new target.
   362        parsing_targets = true;
   363        poisoned_input = false;
   364      }
   365    }
   366    if (!have_target) {
   367      *err = "expected ':' in depfile";
   368      return false;
   369    }
   370    return true;
   371  }