github.com/maruel/nin@v0.0.0-20220112143044-f35891e3ce7e/src/depfile_parser.in.cc (about)

     1  // Copyright 2011 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include "depfile_parser.h"
    16  #include "util.h"
    17  
    18  #include <algorithm>
    19  
    20  using namespace std;
    21  
    22  DepfileParser::DepfileParser(DepfileParserOptions options)
    23    : options_(options)
    24  {
    25  }
    26  
    27  // A note on backslashes in Makefiles, from reading the docs:
    28  // Backslash-newline is the line continuation character.
    29  // Backslash-# escapes a # (otherwise meaningful as a comment start).
    30  // Backslash-% escapes a % (otherwise meaningful as a special).
    31  // Finally, quoting the GNU manual, "Backslashes that are not in danger
    32  // of quoting ‘%’ characters go unmolested."
    33  // How do you end a line with a backslash?  The netbsd Make docs suggest
    34  // reading the result of a shell command echoing a backslash!
    35  //
    36  // Rather than implement all of above, we follow what GCC/Clang produces:
    37  // Backslashes escape a space or hash sign.
    38  // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
    39  // followed by space.
    40  // When a space is preceded by 2N backslashes, it represents 2N backslashes at
    41  // the end of a filename.
    42  // A hash sign is escaped by a single backslash. All other backslashes remain
    43  // unchanged.
    44  //
    45  // If anyone actually has depfiles that rely on the more complicated
    46  // behavior we can adjust this.
    47  bool DepfileParser::Parse(string* content, string* err) {
    48    // in: current parser input point.
    49    // end: end of input.
    50    // parsing_targets: whether we are parsing targets or dependencies.
    51    char* in = &(*content)[0];
    52    char* end = in + content->size();
    53    bool have_target = false;
    54    bool parsing_targets = true;
    55    bool poisoned_input = false;
    56    while (in < end) {
    57      bool have_newline = false;
    58      // out: current output point (typically same as in, but can fall behind
    59      // as we de-escape backslashes).
    60      char* out = in;
    61      // filename: start of the current parsed filename.
    62      char* filename = out;
    63      for (;;) {
    64        // start: beginning of the current parsed span.
    65        const char* start = in;
    66        char* yymarker = NULL;
    67        /*!re2c
    68        re2c:define:YYCTYPE = "unsigned char";
    69        re2c:define:YYCURSOR = in;
    70        re2c:define:YYLIMIT = end;
    71        re2c:define:YYMARKER = yymarker;
    72  
    73        re2c:yyfill:enable = 0;
    74  
    75        re2c:indent:top = 2;
    76        re2c:indent:string = "  ";
    77  
    78        nul = "\000";
    79        newline = '\r'?'\n';
    80  
    81        '\\\\'* '\\ ' {
    82          // 2N+1 backslashes plus space -> N backslashes plus space.
    83          int len = (int)(in - start);
    84          int n = len / 2 - 1;
    85          if (out < start)
    86            memset(out, '\\', n);
    87          out += n;
    88          *out++ = ' ';
    89          continue;
    90        }
    91        '\\\\'+ ' ' {
    92          // 2N backslashes plus space -> 2N backslashes, end of filename.
    93          int len = (int)(in - start);
    94          if (out < start)
    95            memset(out, '\\', len - 1);
    96          out += len - 1;
    97          break;
    98        }
    99        '\\'+ '#' {
   100          // De-escape hash sign, but preserve other leading backslashes.
   101          int len = (int)(in - start);
   102          if (len > 2 && out < start)
   103            memset(out, '\\', len - 2);
   104          out += len - 2;
   105          *out++ = '#';
   106          continue;
   107        }
   108        '\\'+ ':' [\x00\x20\r\n\t] {
   109          // Backslash followed by : and whitespace.
   110          // It is therefore normal text and not an escaped colon
   111          int len = (int)(in - start - 1);
   112          // Need to shift it over if we're overwriting backslashes.
   113          if (out < start)
   114            memmove(out, start, len);
   115          out += len;
   116          if (*(in - 1) == '\n')
   117            have_newline = true;
   118          break;
   119        }
   120        '\\'+ ':' {
   121          // De-escape colon sign, but preserve other leading backslashes.
   122          // Regular expression uses lookahead to make sure that no whitespace
   123          // nor EOF follows. In that case it'd be the : at the end of a target
   124          int len = (int)(in - start);
   125          if (len > 2 && out < start)
   126            memset(out, '\\', len - 2);
   127          out += len - 2;
   128          *out++ = ':';
   129          continue;
   130        }
   131        '$$' {
   132          // De-escape dollar character.
   133          *out++ = '$';
   134          continue;
   135        }
   136        '\\'+ [^\000\r\n] | [a-zA-Z0-9+,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ {
   137          // Got a span of plain text.
   138          int len = (int)(in - start);
   139          // Need to shift it over if we're overwriting backslashes.
   140          if (out < start)
   141            memmove(out, start, len);
   142          out += len;
   143          continue;
   144        }
   145        nul {
   146          break;
   147        }
   148        '\\' newline {
   149          // A line continuation ends the current file name.
   150          break;
   151        }
   152        newline {
   153          // A newline ends the current file name and the current rule.
   154          have_newline = true;
   155          break;
   156        }
   157        [^] {
   158          // For any other character (e.g. whitespace), swallow it here,
   159          // allowing the outer logic to loop around again.
   160          break;
   161        }
   162        */
   163      }
   164  
   165      int len = (int)(out - filename);
   166      const bool is_dependency = !parsing_targets;
   167      if (len > 0 && filename[len - 1] == ':') {
   168        len--;  // Strip off trailing colon, if any.
   169        parsing_targets = false;
   170        have_target = true;
   171      }
   172  
   173      if (len > 0) {
   174        StringPiece piece = StringPiece(filename, len);
   175        // If we've seen this as an input before, skip it.
   176        std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
   177        if (pos == ins_.end()) {
   178          if (is_dependency) {
   179            if (poisoned_input) {
   180              *err = "inputs may not also have inputs";
   181              return false;
   182            }
   183            // New input.
   184            ins_.push_back(piece);
   185          } else {
   186            // Check for a new output.
   187            if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
   188              outs_.push_back(piece);
   189          }
   190        } else if (!is_dependency) {
   191          // We've passed an input on the left side; reject new inputs.
   192          poisoned_input = true;
   193        }
   194      }
   195  
   196      if (have_newline) {
   197        // A newline ends a rule so the next filename will be a new target.
   198        parsing_targets = true;
   199        poisoned_input = false;
   200      }
   201    }
   202    if (!have_target) {
   203      *err = "expected ':' in depfile";
   204      return false;
   205    }
   206    return true;
   207  }