kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/verifier/assertions_to_souffle.cc (about)

     1  /*
     2   * Copyright 2021 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "kythe/cxx/verifier/assertions_to_souffle.h"
    18  
    19  #include <vector>
    20  
    21  #include "absl/container/flat_hash_set.h"
    22  #include "absl/strings/str_cat.h"
    23  #include "absl/strings/substitute.h"
    24  #include "kythe/cxx/verifier/assertion_ast.h"
    25  #include "kythe/cxx/verifier/pretty_printer.h"
    26  
    27  // Define DEBUG_LOWERING for debug output. This uses the pretty printer.
    28  
    29  namespace kythe::verifier {
    30  namespace {
    31  constexpr absl::string_view kGlobalDecls = R"(
    32  .type vname = [
    33    signature:number,
    34    corpus:number,
    35    root:number,
    36    path:number,
    37    language:number
    38  ]
    39  .decl sym(id:number)
    40  sym(0).
    41  sym(n + 1) :- sym(n), n >= 1.
    42  .decl entry(source:vname, kind:number, target:vname, name:number, value:number)
    43  .input entry(IO=kythe)
    44  .decl anchor(begin:number, end:number, vname:vname)
    45  .input anchor(IO=kythe, anchors=1)
    46  .decl at(startsym:number, endsym:number, vname:vname)
    47  at(s, e, v) :- entry(v, $0, nil, $1, $2),
    48                 entry(v, $0, nil, $3, s),
    49                 entry(v, $0, nil, $4, e).
    50  $5
    51  .decl result($6)
    52  result($7) :- true$8
    53  )";
    54  }  // namespace
    55  
    56  bool SouffleErrorState::NextStep() {
    57    if (goal_groups_->empty()) return false;
    58    if (target_group_ >= 0 && target_goal_ >= 0) {
    59      recovering_ = true;
    60    }
    61    if (target_group_ < 0) {
    62      target_group_ = static_cast<int>(goal_groups_->size()) - 1;
    63      target_goal_ = (*goal_groups_)[target_group_].goals.size();
    64    }
    65    --target_goal_;
    66    if (target_goal_ >= 0) {
    67      return true;
    68    }
    69    // We need to handle empty groups.
    70    while (target_goal_ < 0) {
    71      --target_group_;
    72      if (target_group_ < 0) {
    73        return false;
    74      }
    75      target_goal_ =
    76          static_cast<int>((*goal_groups_)[target_group_].goals.size()) - 1;
    77    }
    78    // target_group_ and target_goal_ >= 0.
    79    return true;
    80  }
    81  
    82  bool SouffleProgram::LowerSubexpression(AstNode* node, EVarType type,
    83                                          bool positive_cxt) {
    84    if (auto* app = node->AsApp()) {
    85      auto* tup = app->rhs()->AsTuple();
    86      absl::StrAppend(&code_, "[");
    87      for (size_t p = 0; p < 5; ++p) {
    88        if (p != 0) {
    89          absl::StrAppend(&code_, ", ");
    90        }
    91        if (!LowerSubexpression(tup->element(p), EVarType::kSymbol,
    92                                positive_cxt)) {
    93          return false;
    94        }
    95      }
    96      absl::StrAppend(&code_, "]");
    97      return true;
    98    } else if (auto* id = node->AsIdentifier()) {
    99      absl::StrAppend(&code_, id->symbol());
   100      return true;
   101    } else if (auto* evar = node->AsEVar()) {
   102      if (!AssignEVarType(evar, type)) return false;
   103      if (auto* evc = evar->current()) {
   104        return LowerSubexpression(evc, type, positive_cxt);
   105      }
   106      auto fresh = FindFreshEVar(evar);
   107      if (fresh.is_fresh && !positive_cxt) {
   108        negated_evars_.insert(evar);
   109      }
   110      absl::StrAppend(&code_, "v", fresh.id);
   111      return true;
   112    } else {
   113      LOG(ERROR) << "unknown subexpression kind";
   114      return false;
   115    }
   116  }
   117  
   118  bool SouffleProgram::AssignEVarType(EVar* evar, EVarType type) {
   119    auto t = evar_types_.insert({evar, type});
   120    if (t.second) return true;
   121    return t.first->second == type;
   122  }
   123  
   124  bool SouffleProgram::AssignEVarType(EVar* evar, EVar* oevar) {
   125    auto t = evar_types_.find(evar);
   126    if (t == evar_types_.end()) {
   127      auto s = evar_types_.find(oevar);
   128      if (s != evar_types_.end()) return AssignEVarType(evar, s->second);
   129      // A fancier implementation would keep track of these relations.
   130      return true;
   131    } else {
   132      return AssignEVarType(oevar, t->second);
   133    }
   134  }
   135  
   136  bool SouffleProgram::LowerGoalGroup(const SymbolTable& symbol_table,
   137                                      const GoalGroup& group, int target_goal) {
   138  #ifdef DEBUG_LOWERING
   139    FileHandlePrettyPrinter dprinter(stderr);
   140    for (const auto& goal : group.goals) {
   141      dprinter.Print("goal <");
   142      goal->Dump(symbol_table, &dprinter);
   143      dprinter.Print(">\n");
   144    }
   145    size_t ccode = code_.size();
   146  #endif
   147    if (group.goals.empty()) return true;
   148    bool pos = group.accept_if != GoalGroup::AcceptanceCriterion::kSomeMustFail;
   149    int cur_goal = 0;
   150    if (pos) {
   151      absl::StrAppend(&code_, ", (true");
   152      for (const auto& goal : group.goals) {
   153        if (target_goal >= 0 && cur_goal > target_goal) break;
   154        ++cur_goal;
   155        if (!LowerGoal(symbol_table, goal, true)) return false;
   156      }
   157      absl::StrAppend(&code_, ")\n");
   158    } else {
   159      absl::StrAppend(&code_, ", 0 = count:{true, sym(_)");
   160      for (const auto& goal : group.goals) {
   161        if (target_goal >= 0 && cur_goal > target_goal) break;
   162        ++cur_goal;
   163        if (!LowerGoal(symbol_table, goal, false)) return false;
   164      }
   165      absl::StrAppend(&code_, "}\n");
   166    }
   167  #ifdef DEBUG_LOWERING
   168    dprinter.Print(" => <");
   169    dprinter.Print(code_.substr(ccode, code_.size() - ccode));
   170    dprinter.Print("> \n");
   171  #endif
   172    return true;
   173  }
   174  
   175  bool SouffleProgram::LowerGoal(const SymbolTable& symbol_table, AstNode* goal,
   176                                 bool positive_cxt) {
   177    auto eq_sym = symbol_table.MustIntern("=");
   178    auto empty_sym = symbol_table.MustIntern("");
   179    auto* app = goal->AsApp();
   180    auto* tup = app->rhs()->AsTuple();
   181    if (app->lhs()->AsIdentifier()->symbol() == eq_sym) {
   182      auto EqEvarRange = [&](EVar* evar, Range* range) {
   183        auto beginsym = symbol_table.FindInterned(std::to_string(range->begin()));
   184        auto endsym = symbol_table.FindInterned(std::to_string(range->end()));
   185        if (!beginsym || !endsym) {
   186          // TODO(zarko): emit a warning here (if we're in a positive goal)?
   187          absl::StrAppend(&code_, ", false");
   188        } else {
   189          auto fresh = FindFreshEVar(evar);
   190          if (fresh.is_fresh && !positive_cxt) {
   191            negated_evars_.insert(evar);
   192          }
   193          // We need to name the elements of the range; otherwise the compiler
   194          // will complain that they are ungrounded in certain cases.
   195          absl::StrAppend(&code_, ", v", fresh.id, "=[v", fresh.id, "r1, ",
   196                          range->corpus(), ", ", range->root(), ", ",
   197                          range->path(), ", v", fresh.id, "r2]");
   198          // TODO(zarko): there might be a cleaner way to handle eq_sym; it would
   199          // need LowerSubexpression to be able to emit this as a side-clause.
   200          absl::StrAppend(&code_, ", at(", *beginsym, ", ", *endsym, ", v",
   201                          fresh.id, ")");
   202        }
   203        return AssignEVarType(evar, EVarType::kVName);
   204      };
   205      auto EqEvarSubexp = [&](EVar* evar, AstNode* subexp) {
   206        auto fresh = FindFreshEVar(evar);
   207        if (fresh.is_fresh && !positive_cxt) {
   208          negated_evars_.insert(evar);
   209        }
   210        absl::StrAppend(&code_, ", v", fresh.id, "=");
   211        if (auto* app = subexp->AsApp()) {
   212          AssignEVarType(evar, EVarType::kVName);
   213          return LowerSubexpression(app, EVarType::kVName, positive_cxt);
   214        } else if (auto* ident = subexp->AsIdentifier()) {
   215          AssignEVarType(evar, EVarType::kSymbol);
   216          return LowerSubexpression(ident, EVarType::kSymbol, positive_cxt);
   217        } else if (auto* oevar = subexp->AsEVar()) {
   218          auto ofresh = FindFreshEVar(oevar);
   219          if (ofresh.is_fresh && !positive_cxt) {
   220            negated_evars_.insert(oevar);
   221          }
   222          absl::StrAppend(&code_, "v", ofresh.id);
   223          return AssignEVarType(evar, oevar);
   224        } else {
   225          LOG(ERROR) << "expected equality on evar, app, ident, or range";
   226          return false;
   227        }
   228      };
   229      auto* revar = tup->element(1)->AsEVar();
   230      auto* rrange = tup->element(1)->AsRange();
   231      auto* levar = tup->element(0)->AsEVar();
   232      auto* lrange = tup->element(0)->AsRange();
   233      if (revar != nullptr && lrange != nullptr) {
   234        return EqEvarRange(revar, lrange);
   235      } else if (levar != nullptr && lrange != nullptr) {
   236        return EqEvarRange(levar, rrange);
   237      } else if (levar != nullptr) {
   238        return EqEvarSubexp(levar, tup->element(1));
   239      } else if (revar != nullptr) {
   240        return EqEvarSubexp(revar, tup->element(0));
   241      } else {
   242        LOG(ERROR) << "expected eqality with evar on some side";
   243        return false;
   244      }
   245    } else {
   246      // This is an edge or fact pattern.
   247      absl::StrAppend(&code_, ", entry(");
   248      for (size_t p = 0; p < 5; ++p) {
   249        if (p != 0) {
   250          absl::StrAppend(&code_, ", ");
   251        }
   252        if (p == 2 && tup->element(p)->AsIdentifier() &&
   253            tup->element(p)->AsIdentifier()->symbol() == empty_sym) {
   254          // Facts have nil vnames in the target position.
   255          absl::StrAppend(&code_, "nil");
   256          continue;
   257        }
   258        if (!LowerSubexpression(
   259                tup->element(p),
   260                p == 0 || p == 2 ? EVarType::kVName : EVarType::kSymbol,
   261                positive_cxt)) {
   262          return false;
   263        }
   264      }
   265      absl::StrAppend(&code_, ")");
   266    }
   267    return true;
   268  }
   269  
   270  bool SouffleProgram::Lower(const SymbolTable& symbol_table,
   271                             const std::vector<GoalGroup>& goal_groups,
   272                             const std::vector<Inspection>& inspections,
   273                             const SouffleErrorState& error_state) {
   274    code_.clear();
   275    int cur_group = 0;
   276    for (const auto& group : goal_groups) {
   277      if (error_state.IsFinished(cur_group)) break;
   278      if (!LowerGoalGroup(symbol_table, group,
   279                          error_state.GoalForGroup(cur_group)))
   280        return false;
   281      ++cur_group;
   282    }
   283    if (emit_prelude_) {
   284      std::string code;
   285      code_.swap(code);
   286      std::string result_tyspec;  // ".type result_ty = ..." or ""
   287      std::string result_spec;    // .decl result($result_spec)
   288      std::string
   289          result_argspec;  // result($result_argspec) :- true$result_clause
   290      std::string result_clause;
   291      absl::flat_hash_set<size_t> inspected_vars;
   292      for (const auto& i : inspections) {
   293        if (negated_evars_.contains(i.evar)) {
   294          // TODO(zarko): If we intend to preserve this restriction, it would be
   295          // better to catch it earlier (possibly during goal parsing). It's
   296          // possible to support it (e.g., an evar in a negative context with an
   297          // inspection will be inspected if the negative context fails, giving a
   298          // witness for *why* that negative goal group failed), but this will
   299          // complicate error recovery. This message is still better than getting
   300          // a diagnostic from Souffle about a leaky witness.
   301          if (i.kind == Inspection::Kind::IMPLICIT) {
   302            // Ignore implicit inspections inside negated contexts.
   303            continue;
   304          }
   305          LOG(ERROR) << i.evar->location() << ": " << i.label
   306                     << ": can't inspect a negated evar";
   307          return false;
   308        }
   309        auto type = evar_types_.find(i.evar);
   310        if (type == evar_types_.end()) {
   311          LOG(ERROR) << (error_state.IsDoingErrorRecovery()
   312                             ? "(during error recovery) "
   313                             : "")
   314                     << "evar typing missing for v" << FindEVar(i.evar);
   315          return false;
   316        }
   317        if (type->second == EVarType::kUnknown) {
   318          LOG(ERROR) << (error_state.IsDoingErrorRecovery()
   319                             ? "(during error recovery) "
   320                             : "")
   321                     << "evar typing unknown for v" << FindEVar(i.evar);
   322          return false;
   323        }
   324        auto id = FindEVar(i.evar);
   325        if (inspected_vars.insert(id).second) {
   326          if (result_spec.empty()) {
   327            result_spec = "rrec : result_ty";
   328            result_argspec = "rrec";
   329          }
   330          absl::StrAppend(&result_clause,
   331                          result_clause.empty() ? ", rrec=[" : ", ", "v", id);
   332          absl::StrAppend(&result_tyspec,
   333                          result_tyspec.empty() ? ".type result_ty = [" : ", ",
   334                          "rv", id, ":",
   335                          type->second == EVarType::kVName ? "vname" : "number");
   336  
   337          inspections_.push_back(i.evar);
   338        }
   339      }
   340      if (!result_spec.empty()) {
   341        // result_ty has been defined and will always be a record with at least
   342        // one element; we need to terminate both the type definition and the
   343        // record expression.
   344        absl::StrAppend(&result_tyspec, "]");
   345        absl::StrAppend(&result_clause, "]");
   346      }
   347      code_ = absl::Substitute(kGlobalDecls, symbol_table.MustIntern(""),
   348                               symbol_table.MustIntern("/kythe/node/kind"),
   349                               symbol_table.MustIntern("anchor"),
   350                               symbol_table.MustIntern("/kythe/loc/start"),
   351                               symbol_table.MustIntern("/kythe/loc/end"),
   352                               result_tyspec, result_spec, result_argspec,
   353                               result_clause);
   354      absl::StrAppend(&code_, code);
   355    }
   356    absl::StrAppend(&code_, ".\n");
   357  #ifdef DEBUG_LOWERING
   358    fprintf(stderr, "<%s>\n", code_.c_str());
   359  #endif
   360    return true;
   361  }
   362  }  // namespace kythe::verifier