kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/verifier/assertions.cc (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "assertions.h"
    18  
    19  #include "absl/strings/str_cat.h"
    20  #include "absl/strings/string_view.h"
    21  #include "kythe/cxx/common/file_utils.h"
    22  #include "verifier.h"
    23  
    24  namespace kythe {
    25  namespace verifier {
    26  
    27  void EVar::Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) {
    28    if (AstNode* node = current()) {
    29      node->Dump(symbol_table, printer);
    30    } else {
    31      printer->Print("<null>");
    32    }
    33  }
    34  
    35  void Identifier::Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) {
    36    printer->Print(symbol_table.PrettyText(symbol_));
    37  }
    38  
    39  void Range::Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) {
    40    printer->Print("Range(");
    41    printer->Print(symbol_table.PrettyText(corpus_));
    42    printer->Print(",");
    43    printer->Print(symbol_table.PrettyText(root_));
    44    printer->Print(",");
    45    printer->Print(symbol_table.PrettyText(path_));
    46    printer->Print(",");
    47    printer->Print(std::to_string(begin_));
    48    printer->Print(",");
    49    printer->Print(std::to_string(end_));
    50    printer->Print(")");
    51  }
    52  
    53  void Tuple::Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) {
    54    printer->Print("(");
    55    for (size_t v = 0; v < element_count_; ++v) {
    56      elements_[v]->Dump(symbol_table, printer);
    57      if (v + 1 < element_count_) {
    58        printer->Print(", ");
    59      }
    60    }
    61    printer->Print(")");
    62  }
    63  
    64  void App::Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) {
    65    lhs_->Dump(symbol_table, printer);
    66    // rhs_ should be a Tuple, which outputs "(...)" around itself.
    67    rhs_->Dump(symbol_table, printer);
    68  }
    69  
    70  bool AssertionParser::ParseInlineRuleString(const std::string& content,
    71                                              const std::string& fake_filename,
    72                                              Symbol path, Symbol root,
    73                                              Symbol corpus,
    74                                              const RE2& goal_comment_regex) {
    75    path_ = path;
    76    root_ = root;
    77    corpus_ = corpus;
    78    had_errors_ = false;
    79    files_.push_back(fake_filename);
    80    ResetLine();
    81    ScanBeginString(goal_comment_regex, content, trace_lex_);
    82    yy::AssertionParserImpl parser(*this);
    83    parser.set_debug_level(trace_parse_);
    84    int result = parser.parse();
    85    ScanEnd(last_eof_, last_eof_ofs_);
    86    return result == 0 && !had_errors_;
    87  }
    88  
    89  bool AssertionParser::ParseInlineRuleFile(const std::string& filename,
    90                                            Symbol path, Symbol root,
    91                                            Symbol corpus,
    92                                            const RE2& goal_comment_regex) {
    93    path_ = path;
    94    root_ = root;
    95    corpus_ = corpus;
    96    files_.push_back(filename);
    97    had_errors_ = false;
    98    ResetLine();
    99    ScanBeginFile(goal_comment_regex, trace_lex_);
   100    yy::AssertionParserImpl parser(*this);
   101    parser.set_debug_level(trace_parse_);
   102    int result = parser.parse();
   103    ScanEnd(last_eof_, last_eof_ofs_);
   104    return result == 0 && !had_errors_;
   105  }
   106  
   107  void AssertionParser::Error(const yy::location& location,
   108                              const std::string& message) {
   109    // TODO(zarko): replace with a PrettyPrinter
   110    std::cerr << location << ": " << message << std::endl;
   111    had_errors_ = true;
   112  }
   113  
   114  void AssertionParser::Error(const std::string& message) {
   115    // TODO(zarko): replace with a PrettyPrinter
   116    std::cerr << "When trying " << file() << ": " << message << std::endl;
   117    had_errors_ = true;
   118  }
   119  
   120  bool AssertionParser::CheckForSingletonEVars() {
   121    bool old_had_errors = had_errors_;
   122    for (const auto& singleton : singleton_evars_) {
   123      Error(singleton.first->location(),
   124            "singleton variable " +
   125                verifier_.symbol_table()->text(singleton.second) +
   126                " used only here");
   127    }
   128    had_errors_ = old_had_errors;
   129    return !singleton_evars_.empty();
   130  }
   131  
   132  AssertionParser::AssertionParser(Verifier* verifier, bool trace_lex,
   133                                   bool trace_parse)
   134      : verifier_(*verifier),
   135        arena_(verifier->arena()),
   136        trace_lex_(trace_lex),
   137        trace_parse_(trace_parse) {
   138    groups_.push_back(GoalGroup{GoalGroup::kNoneMayFail});
   139  }
   140  
   141  bool AssertionParser::Unescape(const char* yytext, std::string* out) {
   142    if (out == nullptr || *yytext != '\"') {
   143      return false;
   144    }
   145    ++yytext;  // Skip initial ".
   146    out->clear();
   147    char current = *yytext++;  // yytext will always immediately follow `current`.
   148    for (; current != '\0' && current != '\"'; current = *yytext++) {
   149      if (current == '\\') {
   150        current = *yytext++;
   151        switch (current) {
   152          case '\"':
   153            out->push_back(current);
   154            break;
   155          case '\\':
   156            out->push_back(current);
   157            break;
   158          case 'n':
   159            out->push_back('\n');
   160            break;
   161          default:
   162            return false;
   163        }
   164      } else {
   165        out->push_back(current);
   166      }
   167    }
   168    return (current == '\"' && *yytext == '\0');
   169  }
   170  
   171  void AssertionParser::ResetLine() { line_.clear(); }
   172  
   173  void AssertionParser::PushLocationSpec(const std::string& for_token) {
   174    location_spec_stack_.emplace_back(LocationSpec{for_token, -1, false, true});
   175  }
   176  
   177  void AssertionParser::PushRelativeLocationSpec(const std::string& for_token,
   178                                                 const std::string& relative) {
   179    location_spec_stack_.emplace_back(
   180        LocationSpec{for_token, atoi(relative.c_str()), false, true});
   181  }
   182  
   183  void AssertionParser::PushAbsoluteLocationSpec(const std::string& for_token,
   184                                                 const std::string& absolute) {
   185    location_spec_stack_.emplace_back(
   186        LocationSpec{for_token, atoi(absolute.c_str()), true, true});
   187  }
   188  
   189  void AssertionParser::SetTopLocationSpecMatchNumber(const std::string& number) {
   190    if (!location_spec_stack_.empty()) {
   191      // number is "#"{blank}*{int}
   192      location_spec_stack_.back().must_be_unambiguous = false;
   193      location_spec_stack_.back().match_number = atoi(number.c_str() + 1);
   194    }
   195  }
   196  
   197  Identifier* AssertionParser::PathIdentifierFor(
   198      const yy::location& location, const std::string& path_frag,
   199      const std::string& default_root) {
   200    if (path_frag.empty()) {
   201      return verifier_.IdentifierFor(location, "/");
   202    }
   203    std::string sigil;
   204    if (path_frag[0] == '#' || path_frag[0] == '%') {
   205      sigil = path_frag[0];
   206      if (path_frag.size() == 1) {
   207        return verifier_.IdentifierFor(location, sigil);
   208      }
   209    }
   210    if (path_frag[sigil.size()] != '/') {
   211      return verifier_.IdentifierFor(
   212          location, sigil + default_root + path_frag.substr(sigil.size()));
   213    }
   214    return verifier_.IdentifierFor(location, path_frag);
   215  }
   216  
   217  AstNode* AssertionParser::CreateEqualityConstraint(const yy::location& location,
   218                                                     AstNode* lhs, AstNode* rhs) {
   219    return verifier_.MakePredicate(location, verifier_.eq_id(), {lhs, rhs});
   220  }
   221  
   222  AstNode* AssertionParser::CreateSimpleEdgeFact(const yy::location& location,
   223                                                 AstNode* edge_lhs,
   224                                                 const std::string& literal_kind,
   225                                                 AstNode* edge_rhs,
   226                                                 AstNode* ordinal) {
   227    if (ordinal) {
   228      return verifier_.MakePredicate(
   229          location, verifier_.fact_id(),
   230          {edge_lhs, PathIdentifierFor(location, literal_kind, "/kythe/edge/"),
   231           edge_rhs, verifier_.ordinal_id(), ordinal});
   232    } else {
   233      return verifier_.MakePredicate(
   234          location, verifier_.fact_id(),
   235          {edge_lhs, PathIdentifierFor(location, literal_kind, "/kythe/edge/"),
   236           edge_rhs, verifier_.root_id(), verifier_.empty_string_id()});
   237    }
   238  }
   239  
   240  AstNode* AssertionParser::CreateSimpleNodeFact(const yy::location& location,
   241                                                 AstNode* lhs,
   242                                                 const std::string& literal_key,
   243                                                 AstNode* value) {
   244    return verifier_.MakePredicate(
   245        location, verifier_.fact_id(),
   246        {lhs, verifier_.empty_string_id(), verifier_.empty_string_id(),
   247         PathIdentifierFor(location, literal_key, "/kythe/"), value});
   248  }
   249  
   250  AstNode* AssertionParser::CreateInspect(const yy::location& location,
   251                                          const std::string& inspect_id,
   252                                          AstNode* to_inspect) {
   253    if (EVar* evar = to_inspect->AsEVar()) {
   254      singleton_evars_.erase(evar);
   255      inspections_.emplace_back(inspect_id, evar, Inspection::Kind::EXPLICIT);
   256      return to_inspect;
   257    } else {
   258      Error(location, "Inspecting something that's not an EVar.");
   259      return to_inspect;
   260    }
   261  }
   262  
   263  AstNode* AssertionParser::CreateDontCare(const yy::location& location) {
   264    return new (verifier_.arena()) EVar(location);
   265  }
   266  
   267  AstNode* AssertionParser::CreateAtom(const yy::location& location,
   268                                       const std::string& for_token) {
   269    if (!for_token.empty() && for_token[0] == '_') {
   270      return CreateDontCare(location);
   271    } else if (!for_token.empty() && isupper(for_token[0])) {
   272      return CreateEVar(location, for_token);
   273    } else {
   274      return CreateIdentifier(location, for_token);
   275    }
   276  }
   277  
   278  Identifier* AssertionParser::CreateIdentifier(const yy::location& location,
   279                                                const std::string& for_text) {
   280    Symbol symbol = verifier_.symbol_table()->intern(for_text);
   281    const auto old_binding = identifier_context_.find(symbol);
   282    if (old_binding == identifier_context_.end()) {
   283      Identifier* new_id = new (verifier_.arena()) Identifier(location, symbol);
   284      identifier_context_.emplace(symbol, new_id);
   285      return new_id;
   286    } else {
   287      return old_binding->second;
   288    }
   289  }
   290  
   291  EVar* AssertionParser::CreateEVar(const yy::location& location,
   292                                    const std::string& for_token) {
   293    Symbol symbol = verifier_.symbol_table()->intern(for_token);
   294    const auto old_binding = evar_context_.find(symbol);
   295    if (old_binding == evar_context_.end()) {
   296      EVar* new_evar = new (verifier_.arena()) EVar(location);
   297      evar_context_.emplace(symbol, new_evar);
   298      if (default_inspect_) {
   299        inspections_.emplace_back(for_token, new_evar,
   300                                  Inspection::Kind::IMPLICIT);
   301      }
   302      singleton_evars_[new_evar] = symbol;
   303      return new_evar;
   304    } else {
   305      singleton_evars_.erase(old_binding->second);
   306      return old_binding->second;
   307    }
   308  }
   309  
   310  bool AssertionParser::ValidateTopLocationSpec(const yy::location& location,
   311                                                size_t* line_number,
   312                                                bool* use_line_number,
   313                                                bool* must_be_unambiguous,
   314                                                int* match_number) {
   315    if (location_spec_stack_.empty()) {
   316      Error(location, "No locations on location stack.");
   317      return verifier_.empty_string_id();
   318    }
   319    const auto& spec = location_spec_stack_.back();
   320    *must_be_unambiguous = spec.must_be_unambiguous;
   321    *match_number = spec.match_number;
   322    if (spec.line_offset == 0) {
   323      Error(location, "This line offset is invalid.");
   324      return verifier_.empty_string_id();
   325    } else if (spec.line_offset < 0) {
   326      *use_line_number = false;
   327      *line_number = 0;
   328      return true;
   329    }
   330    *use_line_number = true;
   331    *line_number = spec.is_absolute ? spec.line_offset
   332                                    : spec.line_offset + location.begin.line;
   333    if (*line_number <= location.begin.line) {
   334      Error(location, "This line offset points to a previous or equal line.");
   335      return false;
   336    }
   337    return true;
   338  }
   339  
   340  AstNode* AssertionParser::CreateAnchorSpec(const yy::location& location) {
   341    size_t line_number = -1;
   342    bool use_line_number = false;
   343    bool must_be_unambiguous = false;
   344    int match_number = -1;
   345    if (!ValidateTopLocationSpec(location, &line_number, &use_line_number,
   346                                 &must_be_unambiguous, &match_number)) {
   347      return verifier_.empty_string_id();
   348    }
   349    const auto& spec = location_spec_stack_.back();
   350    EVar* new_evar = new (verifier_.arena()) EVar(location);
   351    unresolved_locations_.push_back(UnresolvedLocation{
   352        new_evar, spec.spec, line_number, use_line_number, group_id(),
   353        UnresolvedLocation::Kind::kAnchor, must_be_unambiguous, match_number});
   354    location_spec_stack_.pop_back();
   355    return new_evar;
   356  }
   357  
   358  AstNode* AssertionParser::CreateOffsetSpec(const yy::location& location,
   359                                             bool at_end) {
   360    size_t line_number = -1;
   361    bool use_line_number = false;
   362    bool must_be_unambiguous = false;
   363    int match_number = -1;
   364    if (!ValidateTopLocationSpec(location, &line_number, &use_line_number,
   365                                 &must_be_unambiguous, &match_number)) {
   366      return verifier_.empty_string_id();
   367    }
   368    const auto& spec = location_spec_stack_.back();
   369    EVar* new_evar = new (verifier_.arena()) EVar(location);
   370    unresolved_locations_.push_back(UnresolvedLocation{
   371        new_evar, spec.spec, line_number, use_line_number, group_id(),
   372        at_end ? UnresolvedLocation::Kind::kOffsetEnd
   373               : UnresolvedLocation::Kind::kOffsetBegin,
   374        must_be_unambiguous, match_number});
   375    location_spec_stack_.pop_back();
   376    return new_evar;
   377  }
   378  
   379  bool AssertionParser::ResolveLocations(const yy::location& end_of_line,
   380                                         size_t offset_after_endline,
   381                                         bool end_of_file) {
   382    bool was_ok = true;
   383    std::vector<UnresolvedLocation> succ_lines;
   384    for (auto& record : unresolved_locations_) {
   385      EVar* evar = record.anchor_evar;
   386      std::string& token = record.anchor_text;
   387      yy::location location = evar->location();
   388      location.columns(token.size());
   389      if (record.use_line_number &&
   390          (record.line_number != end_of_line.begin.line)) {
   391        if (end_of_file) {
   392          Error(location, token + ":" + std::to_string(record.line_number) +
   393                              " not found before end of file.");
   394          was_ok = false;
   395        } else {
   396          succ_lines.push_back(record);
   397        }
   398        continue;
   399      }
   400      size_t group_id = record.group_id;
   401      auto col = line_.find(token);
   402      if (col == std::string::npos) {
   403        Error(location, token + " not found.");
   404        was_ok = false;
   405        continue;
   406      }
   407      if (record.must_be_unambiguous) {
   408        if (line_.find(token, col + 1) != std::string::npos) {
   409          Error(location, token + " is ambiguous.");
   410          was_ok = false;
   411          continue;
   412        }
   413      } else {
   414        int match_number = 0;
   415        while (match_number != record.match_number) {
   416          col = line_.find(token, col + 1);
   417          if (col == std::string::npos) {
   418            break;
   419          }
   420          ++match_number;
   421        }
   422        if (match_number != record.match_number) {
   423          Error(location, token + " has no match #" +
   424                              std::to_string(record.match_number) + ".");
   425          was_ok = false;
   426          continue;
   427        }
   428      }
   429      size_t line_start = offset_after_endline - line_.size() - 1;
   430      switch (record.kind) {
   431        case UnresolvedLocation::Kind::kOffsetBegin:
   432          if (evar->current()) {
   433            Error(location, token + " already resolved.");
   434            was_ok = false;
   435            continue;
   436          }
   437          evar->set_current(verifier_.IdentifierFor(
   438              location, std::to_string(line_start + col)));
   439          break;
   440        case UnresolvedLocation::Kind::kOffsetEnd:
   441          if (evar->current()) {
   442            Error(location, token + " already resolved.");
   443            was_ok = false;
   444            continue;
   445          }
   446          evar->set_current(verifier_.IdentifierFor(
   447              location, std::to_string(line_start + col + token.size())));
   448          break;
   449        case UnresolvedLocation::Kind::kAnchor:
   450          if (default_inspect_) {
   451            inspections_.emplace_back(
   452                absl::StrCat("@", token, ":", location.begin.line, ".", col),
   453                evar, Inspection::Kind::IMPLICIT);
   454          }
   455          AppendGoal(group_id, verifier_.MakePredicate(
   456                                   location, verifier_.eq_id(),
   457                                   {new (verifier_.arena())
   458                                        Range(location, line_start + col,
   459                                              line_start + col + token.size(),
   460                                              path_, root_, corpus_),
   461                                    evar}));
   462          break;
   463      }
   464    }
   465    unresolved_locations_.swap(succ_lines);
   466    ResetLine();
   467    return was_ok;
   468  }
   469  
   470  void AssertionParser::AppendToLine(const char* yytext) { line_.append(yytext); }
   471  
   472  void AssertionParser::PushNode(AstNode* node) { node_stack_.push_back(node); }
   473  
   474  AstNode** AssertionParser::PopNodes(size_t count) {
   475    AstNode** nodes = (AstNode**)verifier_.arena()->New(count * sizeof(AstNode*));
   476    size_t start = node_stack_.size() - count;
   477    for (size_t c = 0; c < count; ++c) {
   478      nodes[c] = node_stack_[start + c];
   479    }
   480    node_stack_.resize(start);
   481    return nodes;
   482  }
   483  
   484  void AssertionParser::AppendGoal(size_t group_id, AstNode* goal) {
   485    assert(group_id < groups_.size());
   486    groups_[group_id].goals.push_back(goal);
   487  }
   488  
   489  void AssertionParser::EnterGoalGroup(const yy::location& location,
   490                                       bool negated) {
   491    if (inside_goal_group_) {
   492      Error(location, "It is not valid to enter nested goal groups.");
   493      return;
   494    }
   495    inside_goal_group_ = true;
   496    groups_.push_back(
   497        GoalGroup{negated ? GoalGroup::kSomeMustFail : GoalGroup::kNoneMayFail});
   498  }
   499  
   500  void AssertionParser::ExitGoalGroup(const yy::location& location) {
   501    if (!inside_goal_group_) {
   502      Error(location, "You've left a goal group before you've entered it.");
   503      return;
   504    }
   505    inside_goal_group_ = false;
   506  }
   507  
   508  void AssertionParser::ScanBeginString(const RE2& goal_comment_regex,
   509                                        const std::string& data,
   510                                        bool trace_scanning) {
   511    // Preprocess the input by adding a - to the left of every goal line and a
   512    // . to the left of every non-goal line. From every goal line remove any
   513    // character that is not part of the goal regex's capture group. This means
   514    // that we don't have to push RE2 deeper into the lexer; it also preserves
   515    // file locations for diagnostics (after taking into account the constant
   516    // 1 offset).
   517    std::string yy_buf;
   518    size_t next_line_begin = 0;
   519    auto append_line = [&](size_t line_end) {
   520      absl::string_view match_region;
   521      size_t line_length = line_end - next_line_begin;
   522      auto is_goal = RE2::FullMatch(
   523          absl::string_view(data.data() + next_line_begin, line_length),
   524          goal_comment_regex, &match_region);
   525      if (is_goal == 1) {
   526        yy_buf.push_back('-');
   527        size_t pre_pad = match_region.data() - data.data() - next_line_begin;
   528        for (size_t s = 0; s < pre_pad; ++s) {
   529          yy_buf.push_back(' ');
   530        }
   531        yy_buf.append(match_region.data(), match_region.size());
   532        size_t post_pad = line_length - pre_pad - match_region.size();
   533        for (size_t s = 0; s < post_pad; ++s) {
   534          yy_buf.push_back(' ');
   535        }
   536      } else {
   537        yy_buf.push_back('.');
   538        yy_buf.append(data, next_line_begin, line_length);
   539      }
   540      if (line_end != data.size()) {
   541        yy_buf.push_back('\n');
   542      }
   543      next_line_begin = line_end + 1;
   544    };
   545    auto endline = data.find('\n');
   546    while (endline != std::string::npos) {
   547      append_line(endline);
   548      endline = data.find('\n', next_line_begin);
   549    }
   550    append_line(data.size());
   551    SetScanBuffer(yy_buf, trace_scanning);
   552  }
   553  
   554  void AssertionParser::ScanBeginFile(const RE2& goal_comment_regex,
   555                                      bool trace_scanning) {
   556    if (file().empty() || file() == "-") {
   557      Error("will not read goals from stdin");
   558      exit(EXIT_FAILURE);
   559    }
   560    std::string buffer = LoadFileOrDie(file());
   561    ScanBeginString(goal_comment_regex, buffer, trace_scanning);
   562  }
   563  
   564  }  // namespace verifier
   565  }  // namespace kythe