kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/verifier/verifier.cc (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "verifier.h"
    18  
    19  #include <fcntl.h>
    20  #include <sys/stat.h>
    21  #include <sys/types.h>
    22  #include <unistd.h>
    23  
    24  #include <memory>
    25  #include <optional>
    26  #include <string_view>
    27  
    28  #include "absl/log/check.h"
    29  #include "absl/log/log.h"
    30  #include "absl/strings/strip.h"
    31  #include "assertions.h"
    32  #include "google/protobuf/text_format.h"
    33  #include "google/protobuf/util/json_util.h"
    34  #include "kythe/cxx/common/kythe_uri.h"
    35  #include "kythe/cxx/common/scope_guard.h"
    36  #include "kythe/cxx/verifier/souffle_interpreter.h"
    37  #include "kythe/proto/common.pb.h"
    38  #include "kythe/proto/storage.pb.h"
    39  
    40  namespace kythe {
    41  namespace verifier {
    42  namespace {
    43  
    44  /// \brief The return code from a verifier thunk.
    45  using ThunkRet = size_t;
    46  /// \brief The operation failed normally.
    47  static ThunkRet kNoException = {0};
    48  /// \brief There is no more work to do, so unwind.
    49  static ThunkRet kSolved = {1};
    50  /// \brief The program is invalid, so unwind.
    51  static ThunkRet kInvalidProgram = {2};
    52  /// \brief The goal group is known to be impossible to solve.
    53  static ThunkRet kImpossible = {3};
    54  /// \brief ThunkRets >= kFirstCut should unwind to the frame
    55  /// establishing that cut without changing assignments.
    56  static ThunkRet kFirstCut = {4};
    57  
    58  typedef const std::function<ThunkRet()>& Thunk;
    59  
    60  static std::string* kDefaultDatabase = new std::string("builtin");
    61  static std::string* kStandardIn = new std::string("-");
    62  
    63  static bool EncodedIdentEqualTo(AstNode* a, AstNode* b) {
    64    Identifier* ia = a->AsIdentifier();
    65    Identifier* ib = b->AsIdentifier();
    66    return ia->symbol() == ib->symbol();
    67  }
    68  
    69  static bool EncodedIdentLessThan(AstNode* a, AstNode* b) {
    70    Identifier* ia = a->AsIdentifier();
    71    Identifier* ib = b->AsIdentifier();
    72    return ia->symbol() < ib->symbol();
    73  }
    74  
    75  static bool EncodedVNameEqualTo(App* a, App* b) {
    76    Tuple* ta = a->rhs()->AsTuple();
    77    Tuple* tb = b->rhs()->AsTuple();
    78    for (int i = 0; i < 5; ++i) {
    79      if (!EncodedIdentEqualTo(ta->element(i), tb->element(i))) {
    80        return false;
    81      }
    82    }
    83    return true;
    84  }
    85  
    86  static bool EncodedVNameLessThan(App* a, App* b) {
    87    Tuple* ta = a->rhs()->AsTuple();
    88    Tuple* tb = b->rhs()->AsTuple();
    89    for (int i = 0; i < 4; ++i) {
    90      if (EncodedIdentLessThan(ta->element(i), tb->element(i))) {
    91        return true;
    92      }
    93      if (!EncodedIdentEqualTo(ta->element(i), tb->element(i))) {
    94        return false;
    95      }
    96    }
    97    return EncodedIdentLessThan(ta->element(4), tb->element(4));
    98  }
    99  
   100  static bool EncodedVNameOrIdentLessThan(AstNode* a, AstNode* b) {
   101    App* aa = a->AsApp();  // nullptr if a is not a vname
   102    App* ab = b->AsApp();  // nullptr if b is not a vname
   103    if (aa && ab) {
   104      return EncodedVNameLessThan(aa, ab);
   105    } else if (!aa && ab) {
   106      // Arbitrarily, vname < ident.
   107      return true;
   108    } else if (aa && !ab) {
   109      return false;
   110    } else {
   111      return EncodedIdentLessThan(a, b);
   112    }
   113  }
   114  
   115  static bool EncodedVNameOrIdentEqualTo(AstNode* a, AstNode* b) {
   116    App* aa = a->AsApp();  // nullptr if a is not a vname
   117    App* ab = b->AsApp();  // nullptr if b is not a vname
   118    if (aa && ab) {
   119      return EncodedVNameEqualTo(aa, ab);
   120    } else if (!aa && ab) {
   121      return false;
   122    } else if (aa && !ab) {
   123      return false;
   124    } else {
   125      return EncodedIdentEqualTo(a, b);
   126    }
   127  }
   128  
   129  /// \brief Sort entries such that those that set fact values are adjacent.
   130  static bool EncodedFactLessThan(AstNode* a, AstNode* b) {
   131    Tuple* ta = a->AsApp()->rhs()->AsTuple();
   132    Tuple* tb = b->AsApp()->rhs()->AsTuple();
   133    if (EncodedVNameOrIdentLessThan(ta->element(0), tb->element(0))) {
   134      return true;
   135    }
   136    if (!EncodedVNameOrIdentEqualTo(ta->element(0), tb->element(0))) {
   137      return false;
   138    }
   139    if (EncodedIdentLessThan(ta->element(1), tb->element(1))) {
   140      return true;
   141    }
   142    if (!EncodedIdentEqualTo(ta->element(1), tb->element(1))) {
   143      return false;
   144    }
   145    if (EncodedVNameOrIdentLessThan(ta->element(2), tb->element(2))) {
   146      return true;
   147    }
   148    if (!EncodedVNameOrIdentEqualTo(ta->element(2), tb->element(2))) {
   149      return false;
   150    }
   151    if (EncodedIdentLessThan(ta->element(3), tb->element(3))) {
   152      return true;
   153    }
   154    if (!EncodedIdentEqualTo(ta->element(3), tb->element(3))) {
   155      return false;
   156    }
   157    if (EncodedIdentLessThan(ta->element(4), tb->element(4))) {
   158      return true;
   159    }
   160    return false;
   161  }
   162  
   163  static AstNode* DerefEVar(AstNode* node) {
   164    while (node) {
   165      if (auto* evar = node->AsEVar()) {
   166        node = evar->current();
   167      } else {
   168        break;
   169      }
   170    }
   171    return node;
   172  }
   173  
   174  static Identifier* SafeAsIdentifier(AstNode* node) {
   175    return node == nullptr ? nullptr : node->AsIdentifier();
   176  }
   177  
   178  struct AtomFactKey {
   179    Identifier* edge_kind;
   180    Identifier* fact_name;
   181    Identifier* fact_value;
   182    Identifier* source_vname[5] = {nullptr, nullptr, nullptr, nullptr, nullptr};
   183    Identifier* target_vname[5] = {nullptr, nullptr, nullptr, nullptr, nullptr};
   184    // fact_tuple is expected to be a full tuple from a Fact head
   185    AtomFactKey(AstNode* vname_head, Tuple* fact_tuple)
   186        : edge_kind(SafeAsIdentifier(DerefEVar(fact_tuple->element(1)))),
   187          fact_name(SafeAsIdentifier(DerefEVar(fact_tuple->element(3)))),
   188          fact_value(SafeAsIdentifier(DerefEVar(fact_tuple->element(4)))) {
   189      InitVNameFields(vname_head, fact_tuple->element(0), &source_vname[0]);
   190      InitVNameFields(vname_head, fact_tuple->element(2), &target_vname[0]);
   191    }
   192    void InitVNameFields(AstNode* vname_head, AstNode* maybe_vname,
   193                         Identifier** out) {
   194      maybe_vname = DerefEVar(maybe_vname);
   195      if (maybe_vname == nullptr) {
   196        return;
   197      }
   198      if (auto* app = maybe_vname->AsApp()) {
   199        if (DerefEVar(app->lhs()) != vname_head) {
   200          return;
   201        }
   202        AstNode* maybe_tuple = DerefEVar(app->rhs());
   203        if (maybe_tuple == nullptr) {
   204          return;
   205        }
   206        if (auto* tuple = maybe_tuple->AsTuple()) {
   207          if (tuple->size() != 5) {
   208            return;
   209          }
   210          for (size_t i = 0; i < 5; ++i) {
   211            out[i] = SafeAsIdentifier(DerefEVar(tuple->element(i)));
   212          }
   213        }
   214      }
   215    }
   216  };
   217  
   218  enum class Order { LT, EQ, GT };
   219  
   220  // How we order incomplete keys depends on whether we're looking for
   221  // an upper or lower bound. See below for details. The node passed in
   222  // must be an application of Fact to a full fact tuple.
   223  static Order CompareFactWithKey(Order incomplete, AstNode* a, AtomFactKey* k) {
   224    Tuple* ta = a->AsApp()->rhs()->AsTuple();
   225    if (k->edge_kind == nullptr) {
   226      return incomplete;
   227    } else if (EncodedIdentLessThan(ta->element(1), k->edge_kind)) {
   228      return Order::LT;
   229    } else if (!EncodedIdentEqualTo(ta->element(1), k->edge_kind)) {
   230      return Order::GT;
   231    }
   232    if (k->fact_name == nullptr) {
   233      return incomplete;
   234    } else if (EncodedIdentLessThan(ta->element(3), k->fact_name)) {
   235      return Order::LT;
   236    } else if (!EncodedIdentEqualTo(ta->element(3), k->fact_name)) {
   237      return Order::GT;
   238    }
   239    if (k->fact_value == nullptr) {
   240      return incomplete;
   241    } else if (EncodedIdentLessThan(ta->element(4), k->fact_value)) {
   242      return Order::LT;
   243    } else if (!EncodedIdentEqualTo(ta->element(4), k->fact_value)) {
   244      return Order::GT;
   245    }
   246    auto vname_compare = [incomplete](Tuple* va, Identifier* tuple[5]) {
   247      for (size_t i = 0; i < 5; ++i) {
   248        if (tuple[i] == nullptr) {
   249          return incomplete;
   250        }
   251        if (EncodedIdentLessThan(va->element(i), tuple[i])) {
   252          return Order::LT;
   253        }
   254        if (!EncodedIdentEqualTo(va->element(i), tuple[i])) {
   255          return Order::GT;
   256        }
   257      }
   258      return Order::EQ;
   259    };
   260    if (Tuple* vs = ta->element(0)->AsApp()->rhs()->AsTuple()) {
   261      auto ord = vname_compare(vs, k->source_vname);
   262      if (ord != Order::EQ) {
   263        return ord;
   264      }
   265    }
   266    if (auto* app = ta->element(2)->AsApp()) {
   267      if (Tuple* vt = app->rhs()->AsTuple()) {
   268        auto ord = vname_compare(vt, k->target_vname);
   269        if (ord != Order::EQ) {
   270          return ord;
   271        }
   272      }
   273    }
   274    return Order::EQ;
   275  }
   276  
   277  // We want to be able to find the following bounds:
   278  // (0,0,2,3) (0,1,2,3) (0,1,2,4) (1,1,2,4)
   279  //          ^---  (0,1,_,_)  ---^
   280  
   281  static bool FastLookupKeyLessThanFact(AtomFactKey* k, AstNode* a) {
   282    // This is used to find upper bounds, so keys with incomplete suffixes should
   283    // be ordered after all facts that share their complete prefixes.
   284    return CompareFactWithKey(Order::LT, a, k) == Order::GT;
   285  }
   286  
   287  static bool FastLookupFactLessThanKey(AstNode* a, AtomFactKey* k) {
   288    // This is used to find lower bounds, so keys with incomplete suffixes should
   289    // be ordered after facts with lower prefixes but before facts with complete
   290    // suffixes.
   291    return CompareFactWithKey(Order::GT, a, k) == Order::LT;
   292  }
   293  
   294  //  Sort entries in lexicographic order, collating as:
   295  // `(edge_kind, fact_name, fact_value, source_node, target_node)`.
   296  // In practice most unification was happening between tuples
   297  // with the first three fields present; then source_node
   298  // missing some of the time; then target_node missing most of
   299  // the time.
   300  static bool FastLookupFactLessThan(AstNode* a, AstNode* b) {
   301    Tuple* ta = a->AsApp()->rhs()->AsTuple();
   302    Tuple* tb = b->AsApp()->rhs()->AsTuple();
   303    if (EncodedIdentLessThan(ta->element(1), tb->element(1))) {
   304      return true;
   305    }
   306    if (!EncodedIdentEqualTo(ta->element(1), tb->element(1))) {
   307      return false;
   308    }
   309    if (EncodedIdentLessThan(ta->element(3), tb->element(3))) {
   310      return true;
   311    }
   312    if (!EncodedIdentEqualTo(ta->element(3), tb->element(3))) {
   313      return false;
   314    }
   315    if (EncodedIdentLessThan(ta->element(4), tb->element(4))) {
   316      return true;
   317    }
   318    if (!EncodedIdentEqualTo(ta->element(4), tb->element(4))) {
   319      return false;
   320    }
   321    if (EncodedVNameOrIdentLessThan(ta->element(0), tb->element(0))) {
   322      return true;
   323    }
   324    if (!EncodedVNameOrIdentEqualTo(ta->element(0), tb->element(0))) {
   325      return false;
   326    }
   327    if (EncodedVNameOrIdentLessThan(ta->element(2), tb->element(2))) {
   328      return true;
   329    }
   330    return false;
   331  }
   332  
   333  // The Solver acts in a closed world: any universal quantification can be
   334  // exhaustively tested against database facts.
   335  // Based on _A Semi-Functional Implementation of a Higher-Order Logic
   336  // Programming Language_ by Conal Elliott and Frank Pfenning (draft of
   337  // February 1990).
   338  // It is not our intention to build a particularly performant or complete
   339  // inference engine. If the solver starts to get too hairy we might want to
   340  // look at deferring to a pre-existing system.
   341  class Solver {
   342   public:
   343    Solver(Verifier* context, Database& database, AnchorMap& anchors,
   344           std::function<bool(Verifier*, const Inspection&)>& inspect)
   345        : context_(*context),
   346          database_(database),
   347          anchors_(anchors),
   348          inspect_(inspect) {}
   349  
   350    ThunkRet UnifyTuple(Tuple* st, Tuple* tt, size_t ofs, size_t max,
   351                        ThunkRet cut, Thunk f) {
   352      if (ofs == max) return f();
   353      return Unify(st->element(ofs), tt->element(ofs), cut,
   354                   [this, st, tt, ofs, max, cut, &f]() {
   355                     return UnifyTuple(st, tt, ofs + 1, max, cut, f);
   356                   });
   357    }
   358  
   359    ThunkRet Unify(AstNode* s, AstNode* t, ThunkRet cut, Thunk f) {
   360      if (EVar* e = s->AsEVar()) {
   361        return UnifyEVar(e, t, cut, f);
   362      } else if (EVar* e = t->AsEVar()) {
   363        return UnifyEVar(e, s, cut, f);
   364      } else if (Identifier* si = s->AsIdentifier()) {
   365        if (Identifier* ti = t->AsIdentifier()) {
   366          if (si->symbol() == ti->symbol()) {
   367            return f();
   368          }
   369        }
   370      } else if (App* sa = s->AsApp()) {
   371        if (App* ta = t->AsApp()) {
   372          return Unify(sa->lhs(), ta->lhs(), cut, [this, sa, ta, cut, &f]() {
   373            return Unify(sa->rhs(), ta->rhs(), cut, f);
   374          });
   375        }
   376      } else if (Tuple* st = s->AsTuple()) {
   377        if (Tuple* tt = t->AsTuple()) {
   378          if (st->size() != tt->size()) {
   379            return kNoException;
   380          }
   381          return UnifyTuple(st, tt, 0, st->size(), cut, f);
   382        }
   383      } else if (Range* sr = s->AsRange()) {
   384        if (Range* tr = t->AsRange()) {
   385          if (*sr == *tr) {
   386            return f();
   387          }
   388        }
   389      }
   390      return kNoException;
   391    }
   392  
   393    bool Occurs(EVar* e, AstNode* t) {
   394      if (App* a = t->AsApp()) {
   395        return Occurs(e, a->lhs()) || Occurs(e, a->rhs());
   396      } else if (EVar* ev = t->AsEVar()) {
   397        return ev->current() ? Occurs(e, ev->current()) : e == ev;
   398      } else if (Tuple* tu = t->AsTuple()) {
   399        for (size_t i = 0, c = tu->size(); i != c; ++i) {
   400          if (Occurs(e, tu->element(i))) {
   401            return true;
   402          }
   403        }
   404        return false;
   405      } else if (Range* r = t->AsRange()) {
   406        return false;
   407      } else {
   408        CHECK(t->AsIdentifier() && "Inexhaustive match.");
   409        return false;
   410      }
   411      return true;
   412    }
   413  
   414    ThunkRet UnifyEVar(EVar* e, AstNode* t, ThunkRet cut, Thunk f) {
   415      if (AstNode* ec = e->current()) {
   416        return Unify(ec, t, cut, f);
   417      }
   418      if (t->AsEVar() == e) {
   419        return f();
   420      }
   421      if (Occurs(e, t)) {
   422        FileHandlePrettyPrinter printer(stderr);
   423        printer.Print("Detected a cycle involving ");
   424        e->Dump(*context_.symbol_table(), &printer);
   425        printer.Print(" while unifying it with ");
   426        t->Dump(*context_.symbol_table(), &printer);
   427        printer.Print(".\n");
   428        return kInvalidProgram;
   429      }
   430      e->set_current(t);
   431      ThunkRet f_ret = f();
   432      if (f_ret != cut) {
   433        e->set_current(nullptr);
   434      }
   435      return f_ret;
   436    }
   437  
   438    ThunkRet MatchAtomVersusDatabase(AstNode* atom, ThunkRet cut, Thunk f) {
   439      if (auto* app = atom->AsApp()) {
   440        if (app->lhs() == context_.fact_id()) {
   441          if (auto* tuple = app->rhs()->AsTuple()) {
   442            if (tuple->size() == 5) {
   443              AtomFactKey key(context_.vname_id(), tuple);
   444              // Make use of the fast lookup sort order.
   445              auto begin = std::lower_bound(database_.begin(), database_.end(),
   446                                            &key, FastLookupFactLessThanKey);
   447              auto end = std::upper_bound(database_.begin(), database_.end(),
   448                                          &key, FastLookupKeyLessThanFact);
   449              for (auto i = begin; i != end; ++i) {
   450                ThunkRet exc = Unify(atom, *i, cut, f);
   451                if (exc != kNoException) {
   452                  return exc;
   453                }
   454              }
   455              return kNoException;
   456            }
   457          }
   458        }
   459      }
   460      // Not enough information to filter by.
   461      for (size_t fact = 0; fact < database_.size(); ++fact) {
   462        ThunkRet exc = Unify(atom, database_[fact], cut, f);
   463        if (exc != kNoException) {
   464          return exc;
   465        }
   466      }
   467      return kNoException;
   468    }
   469  
   470    /// \brief If `atom` has the syntactic form =(a, b), returns the tuple (a, b).
   471    /// Otherwise returns `null`.
   472    Tuple* MatchEqualsArgs(AstNode* atom) {
   473      if (App* a = atom->AsApp()) {
   474        if (Identifier* id = a->lhs()->AsIdentifier()) {
   475          if (id->symbol() == context_.eq_id()->symbol()) {
   476            if (Tuple* tu = a->rhs()->AsTuple()) {
   477              if (tu->size() == 2) {
   478                return tu;
   479              }
   480            }
   481          }
   482        }
   483      }
   484      return nullptr;
   485    }
   486  
   487    ThunkRet MatchAtom(AstNode* atom, AstNode* program, ThunkRet cut, Thunk f) {
   488      // We only have the database and eq-constraints right now.
   489      assert(program == nullptr);
   490      if (auto* tu = MatchEqualsArgs(atom)) {
   491        if (Range* r = tu->element(0)->AsRange()) {
   492          auto anchors =
   493              anchors_.equal_range(std::make_pair(r->begin(), r->end()));
   494          if (anchors.first == anchors.second) {
   495            // There's no anchor with this range in the database.
   496            // This goal can therefore never succeed.
   497            return kImpossible;
   498          }
   499          for (auto anchor = anchors.first; anchor != anchors.second; ++anchor) {
   500            ThunkRet unify_ret = Unify(anchor->second, tu->element(1), cut, f);
   501            if (unify_ret != kNoException) {
   502              return unify_ret;
   503            }
   504          }
   505          return kNoException;
   506        }
   507        // =(a, b) succeeds if unify(a, b) succeeds.
   508        return Unify(tu->element(0), tu->element(1), cut, f);
   509      }
   510      return MatchAtomVersusDatabase(atom, cut, f);
   511    }
   512  
   513    ThunkRet SolveGoal(AstNode* goal, ThunkRet cut, Thunk f) {
   514      // We only have atomic goals right now.
   515      if (App* a = goal->AsApp()) {
   516        return MatchAtom(goal, nullptr, cut, f);
   517      } else {
   518        // TODO(zarko): Replace with a configurable PrettyPrinter.
   519        LOG(ERROR) << "Invalid AstNode in goal-expression.";
   520        return kInvalidProgram;
   521      }
   522    }
   523  
   524    ThunkRet SolveGoalArray(GoalGroup* group, size_t cur, ThunkRet cut, Thunk f) {
   525      if (cur > highest_goal_reached_) {
   526        highest_goal_reached_ = cur;
   527      }
   528      if (cur == group->goals.size()) {
   529        return f();
   530      }
   531      return SolveGoal(group->goals[cur], cut, [this, group, cur, cut, &f]() {
   532        return SolveGoalArray(group, cur + 1, cut, f);
   533      });
   534    }
   535  
   536    bool PerformInspection() {
   537      for (const auto& inspection : context_.parser()->inspections()) {
   538        if (!inspect_(&context_, inspection)) {
   539          return false;
   540        }
   541      }
   542      return true;
   543    }
   544  
   545    ThunkRet SolveGoalGroups(AssertionParser* context, Thunk f) {
   546      for (size_t cur = 0, cut = kFirstCut; cur < context->groups().size();
   547           ++cur, ++cut) {
   548        auto* group = &context->groups()[cur];
   549        if (cur > highest_group_reached_) {
   550          highest_goal_reached_ = 0;
   551          highest_group_reached_ = cur;
   552        }
   553        ThunkRet result = SolveGoalArray(group, 0, cut, [cut]() { return cut; });
   554        // Lots of unwinding later...
   555        if (result == cut) {
   556          // That last goal group succeeded.
   557          if (group->accept_if != GoalGroup::kNoneMayFail) {
   558            return PerformInspection() ? kNoException : kInvalidProgram;
   559          }
   560        } else if (result == kNoException || result == kImpossible) {
   561          // That last goal group failed.
   562          if (group->accept_if != GoalGroup::kSomeMustFail) {
   563            return PerformInspection() ? kNoException : kInvalidProgram;
   564          }
   565        } else {
   566          return result;
   567        }
   568      }
   569      return PerformInspection() ? f() : kInvalidProgram;
   570    }
   571  
   572    bool Solve() {
   573      ThunkRet exn = SolveGoalGroups(context_.parser(), []() { return kSolved; });
   574      return exn == kSolved;
   575    }
   576  
   577    size_t highest_group_reached() const { return highest_group_reached_; }
   578  
   579    size_t highest_goal_reached() const { return highest_goal_reached_; }
   580  
   581   private:
   582    Verifier& context_;
   583    Database& database_;
   584    AnchorMap& anchors_;
   585    std::function<bool(Verifier*, const Inspection&)>& inspect_;
   586    size_t highest_group_reached_ = 0;
   587    size_t highest_goal_reached_ = 0;
   588  };
   589  
   590  enum class NodeKind { kFile, kAnchor, kOther };
   591  
   592  struct NodeFacts {
   593    NodeKind kind = NodeKind::kOther;
   594    absl::Span<AstNode* const> facts;
   595  };
   596  
   597  NodeFacts ReadNodeFacts(absl::Span<AstNode* const> entries, Verifier& ctx) {
   598    NodeFacts result = {
   599        .kind = NodeKind::kOther,
   600        .facts = entries,
   601    };
   602  
   603    if (entries.empty()) {
   604      return result;
   605    }
   606  
   607    Tuple* head = entries.front()->AsApp()->rhs()->AsTuple();
   608    for (size_t i = 0; i < entries.size(); ++i) {
   609      Tuple* current = entries[i]->AsApp()->rhs()->AsTuple();
   610      if (!EncodedVNameOrIdentEqualTo(current->element(0), head->element(0)) ||
   611          current->element(1) != ctx.empty_string_id()) {
   612        // Moved past the fact block or moved to a different source node;
   613        // we're done.
   614        result.facts = entries.subspan(0, i);
   615        break;
   616      }
   617      if (EncodedIdentEqualTo(current->element(3), ctx.kind_id())) {
   618        if (EncodedIdentEqualTo(current->element(4), ctx.anchor_id())) {
   619          result.kind = NodeKind::kAnchor;
   620        } else if (EncodedIdentEqualTo(current->element(4), ctx.file_id())) {
   621          result.kind = NodeKind::kFile;
   622        }
   623      }
   624    }
   625    return result;
   626  }
   627  }  // namespace
   628  
   629  Verifier::Verifier(bool trace_lex, bool trace_parse)
   630      : parser_(this, trace_lex, trace_parse),
   631        builtin_location_name_("builtins") {
   632    builtin_location_.initialize(&builtin_location_name_);
   633    builtin_location_.begin.column = 1;
   634    builtin_location_.end.column = 1;
   635    auto* empty_string = IdentifierFor(builtin_location_, "");
   636    empty_string_id_ = empty_string;
   637    empty_string_sym_ = empty_string->symbol();
   638    default_file_corpus_ = empty_string;
   639    fact_id_ = IdentifierFor(builtin_location_, "fact");
   640    vname_id_ = IdentifierFor(builtin_location_, "vname");
   641    kind_id_ = IdentifierFor(builtin_location_, "/kythe/node/kind");
   642    anchor_id_ = IdentifierFor(builtin_location_, "anchor");
   643    start_id_ = IdentifierFor(builtin_location_, "/kythe/loc/start");
   644    end_id_ = IdentifierFor(builtin_location_, "/kythe/loc/end");
   645    root_id_ = IdentifierFor(builtin_location_, "/");
   646    eq_id_ = IdentifierFor(builtin_location_, "=");
   647    ordinal_id_ = IdentifierFor(builtin_location_, "/kythe/ordinal");
   648    file_id_ = IdentifierFor(builtin_location_, "file");
   649    text_id_ = IdentifierFor(builtin_location_, "/kythe/text");
   650    code_id_ = IdentifierFor(builtin_location_, "/kythe/code");
   651    code_json_id_ = IdentifierFor(builtin_location_, "/kythe/code/json");
   652    marked_source_child_id_ =
   653        IdentifierFor(builtin_location_, "/kythe/edge/child");
   654    marked_source_box_id_ = IdentifierFor(builtin_location_, "BOX");
   655    marked_source_type_id_ = IdentifierFor(builtin_location_, "TYPE");
   656    marked_source_parameter_id_ = IdentifierFor(builtin_location_, "PARAMETER");
   657    marked_source_identifier_id_ = IdentifierFor(builtin_location_, "IDENTIFIER");
   658    marked_source_context_id_ = IdentifierFor(builtin_location_, "CONTEXT");
   659    marked_source_initializer_id_ =
   660        IdentifierFor(builtin_location_, "INITIALIZER");
   661    marked_source_modifier_id_ = IdentifierFor(builtin_location_, "MODIFIER");
   662    marked_source_parameter_lookup_by_param_id_ =
   663        IdentifierFor(builtin_location_, "PARAMETER_LOOKUP_BY_PARAM");
   664    marked_source_lookup_by_param_id_ =
   665        IdentifierFor(builtin_location_, "LOOKUP_BY_PARAM");
   666    marked_source_parameter_lookup_by_tparam_id_ =
   667        IdentifierFor(builtin_location_, "PARAMETER_LOOKUP_BY_TPARAM");
   668    marked_source_lookup_by_tparam_id_ =
   669        IdentifierFor(builtin_location_, "LOOKUP_BY_TPARAM");
   670    marked_source_parameter_lookup_by_param_with_defaults_id_ = IdentifierFor(
   671        builtin_location_, "PARAMETER_LOOKUP_BY_PARAM_WITH_DEFAULTS");
   672    marked_source_lookup_by_typed_id_ =
   673        IdentifierFor(builtin_location_, "LOOKUP_BY_TYPED");
   674    marked_source_kind_id_ = IdentifierFor(builtin_location_, "/kythe/kind");
   675    marked_source_pre_text_id_ =
   676        IdentifierFor(builtin_location_, "/kythe/pre_text");
   677    marked_source_post_child_text_id_ =
   678        IdentifierFor(builtin_location_, "/kythe/post_child_text");
   679    marked_source_post_text_id_ =
   680        IdentifierFor(builtin_location_, "/kythe/post_text");
   681    marked_source_lookup_index_id_ =
   682        IdentifierFor(builtin_location_, "/kythe/lookup_index");
   683    marked_source_default_children_count_id_ =
   684        IdentifierFor(builtin_location_, "/kythe/default_children_count");
   685    marked_source_add_final_list_token_id_ =
   686        IdentifierFor(builtin_location_, "/kythe/add_final_list_token");
   687    marked_source_link_id_ = IdentifierFor(builtin_location_, "/kythe/edge/link");
   688    marked_source_true_id_ = IdentifierFor(builtin_location_, "true");
   689    marked_source_code_edge_id_ =
   690        IdentifierFor(builtin_location_, "/kythe/edge/code");
   691    marked_source_false_id_ = IdentifierFor(builtin_location_, "false");
   692    known_file_sym_ = symbol_table_.unique();
   693    known_not_file_sym_ = symbol_table_.unique();
   694    SetGoalCommentPrefix("//-");
   695  }
   696  
   697  void Verifier::SetGoalCommentPrefix(const std::string& it) {
   698    std::string error;
   699    auto escaped = RE2::QuoteMeta(it);
   700    CHECK(SetGoalCommentRegex("\\s*" + escaped + "(.*)", &error)) << error;
   701  }
   702  
   703  bool Verifier::SetGoalCommentRegex(const std::string& regex,
   704                                     std::string* error) {
   705    auto re2 = std::make_unique<RE2>(regex);
   706    if (re2->error_code() != RE2::NoError) {
   707      if (error) {
   708        *error = re2->error();
   709        return false;
   710      }
   711    }
   712    if (re2->NumberOfCapturingGroups() != 1) {
   713      if (error) {
   714        *error = "Wrong number of capture groups in goal comment regex ";
   715        // This is useful to show, since the shell might unexpectedly shred
   716        // regexes.
   717        error->append(regex);
   718        error->append("(want 1).");
   719        return false;
   720      }
   721    }
   722    goal_comment_regex_ = std::move(re2);
   723    return true;
   724  }
   725  
   726  bool Verifier::LoadInlineProtoFile(const std::string& file_data,
   727                                     absl::string_view path,
   728                                     absl::string_view root,
   729                                     absl::string_view corpus) {
   730    kythe::proto::Entries entries;
   731    bool ok = google::protobuf::TextFormat::ParseFromString(file_data, &entries);
   732    if (!ok) {
   733      // TODO(zarko): Replace with a configurable PrettyPrinter.
   734      LOG(ERROR) << "Unable to parse text protobuf.";
   735      return false;
   736    }
   737    for (int i = 0; i < entries.entries_size(); ++i) {
   738      if (!AssertSingleFact(kDefaultDatabase, i, entries.entries(i))) {
   739        return false;
   740      }
   741    }
   742    Symbol empty = symbol_table_.intern("");
   743    return parser_.ParseInlineRuleString(
   744        file_data, *kStandardIn, symbol_table_.intern(std::string(path)),
   745        symbol_table_.intern(std::string(root)),
   746        symbol_table_.intern(std::string(corpus)), "\\s*\\#\\-(.*)");
   747  }
   748  
   749  bool Verifier::LoadInlineRuleFile(const std::string& filename) {
   750    int fd = ::open(filename.c_str(), 0);
   751    if (fd < 0) {
   752      LOG(ERROR) << "Can't open " << filename;
   753      return false;
   754    }
   755    auto guard = MakeScopeGuard([&] { ::close(fd); });
   756    struct stat fd_stat;
   757    if (::fstat(fd, &fd_stat) < 0) {
   758      LOG(ERROR) << "Can't stat " << filename;
   759      return false;
   760    }
   761    std::string content;
   762    content.resize(fd_stat.st_size);
   763    if (::read(fd, const_cast<char*>(content.data()), fd_stat.st_size) !=
   764        fd_stat.st_size) {
   765      LOG(ERROR) << "Can't read " << filename;
   766      return false;
   767    }
   768    Symbol content_sym = symbol_table_.intern(content);
   769    if (file_vnames_) {
   770      auto vname = content_to_vname_.find(content_sym);
   771      if (vname != content_to_vname_.end()) {
   772        return LoadInMemoryRuleFile(filename, vname->second, content_sym);
   773      }
   774      if (allow_missing_file_vnames_) {
   775        LOG(WARNING) << "Could not find a file node for " << filename
   776                     << "; using default.";
   777      } else {
   778        LOG(ERROR) << "Could not find a file node for " << filename;
   779        return false;
   780      }
   781    }
   782    AstNode** values = (AstNode**)arena_.New(sizeof(AstNode*) * 5);
   783    values[0] = empty_string_id_;
   784    values[1] = default_file_corpus_;
   785    values[2] = empty_string_id_;
   786    values[3] = IdentifierFor(yy::location{}, filename);
   787    values[4] = empty_string_id_;
   788    AstNode* default_vname_tuple = new (&arena_) Tuple(yy::location{}, 5, values);
   789    AstNode* default_vname = new (&arena_) App(vname_id_, default_vname_tuple);
   790    return LoadInMemoryRuleFile(filename, default_vname, content_sym);
   791  }
   792  
   793  bool Verifier::LoadInMemoryRuleFile(const std::string& filename, AstNode* vname,
   794                                      Symbol text) {
   795    Tuple* checked_tuple = nullptr;
   796    if (auto* app = vname->AsApp()) {
   797      if (auto* tuple = app->rhs()->AsTuple()) {
   798        if (tuple->size() == 5 && tuple->element(1)->AsIdentifier() &&
   799            tuple->element(2)->AsIdentifier() &&
   800            tuple->element(3)->AsIdentifier()) {
   801          checked_tuple = tuple;
   802        }
   803      }
   804    }
   805    if (checked_tuple == nullptr) {
   806      return false;
   807    }
   808    StringPrettyPrinter printer;
   809    vname->Dump(symbol_table_, &printer);
   810    fake_files_[printer.str()] = text;
   811    return parser_.ParseInlineRuleString(
   812        symbol_table_.text(text), filename.empty() ? printer.str() : filename,
   813        checked_tuple->element(3)->AsIdentifier()->symbol(),
   814        checked_tuple->element(2)->AsIdentifier()->symbol(),
   815        checked_tuple->element(1)->AsIdentifier()->symbol(),
   816        *goal_comment_regex_);
   817  }
   818  
   819  void Verifier::IgnoreDuplicateFacts() { ignore_dups_ = true; }
   820  
   821  void Verifier::IgnoreCodeConflicts() { ignore_code_conflicts_ = true; }
   822  
   823  void Verifier::SaveEVarAssignments() {
   824    saving_assignments_ = true;
   825    parser_.InspectAllEVars();
   826  }
   827  
   828  void Verifier::Verbose() { verbose_ = true; }
   829  
   830  void Verifier::ShowGoals() {
   831    FileHandlePrettyPrinter printer(stdout);
   832    for (auto& group : parser_.groups()) {
   833      if (group.accept_if == GoalGroup::kNoneMayFail) {
   834        printer.Print("group:\n");
   835      } else {
   836        printer.Print("negated group:\n");
   837      }
   838      for (auto* goal : group.goals) {
   839        printer.Print("  goal: ");
   840        goal->Dump(symbol_table_, &printer);
   841        printer.Print("\n");
   842      }
   843    }
   844  }
   845  
   846  static bool PrintInMemoryFileSection(const std::string& file_text,
   847                                       size_t start_line, size_t start_ix,
   848                                       size_t end_line, size_t end_ix,
   849                                       PrettyPrinter* printer) {
   850    size_t current_line = 0;
   851    size_t pos = 0;
   852    auto walk_lines = [&](size_t until_line) {
   853      if (until_line == current_line) {
   854        return pos;
   855      }
   856      do {
   857        auto endline = file_text.find('\n', pos);
   858        if (endline == std::string::npos) {
   859          return std::string::npos;
   860        }
   861        pos = endline + 1;
   862      } while (++current_line < start_line);
   863      return pos;
   864    };
   865    auto begin = walk_lines(start_line);
   866    auto end = begin == std::string::npos ? begin : walk_lines(end_line);
   867    auto begin_ofs = begin + start_ix;
   868    auto end_ofs = end + end_ix;
   869    if (begin == std::string::npos || end == std::string::npos ||
   870        begin_ofs > file_text.size() || end_ofs > file_text.size()) {
   871      printer->Print("(error line out of bounds)");
   872      return false;
   873    }
   874    printer->Print(file_text.substr(begin_ofs, end_ofs - begin_ofs));
   875    return true;
   876  }
   877  
   878  static bool PrintFileSection(FILE* file, size_t start_line, size_t start_ix,
   879                               size_t end_line, size_t end_ix,
   880                               PrettyPrinter* printer) {
   881    if (!file) {
   882      printer->Print("(null file)\n");
   883      return false;
   884    }
   885    char* lineptr = nullptr;
   886    size_t buf_length = 0;
   887    ssize_t line_length = 0;
   888    size_t line_number = 0;
   889    while ((line_length = getline(&lineptr, &buf_length, file)) != -1) {
   890      if (line_number >= start_line && line_number <= end_line) {
   891        std::string text(lineptr);
   892        size_t line_begin = 0, line_end = text.size();
   893        if (line_number == start_line) {
   894          line_begin = start_ix;
   895        }
   896        if (line_number == end_line) {
   897          line_end = end_ix;
   898        }
   899        if (line_end - line_begin > text.size()) {
   900          printer->Print("(error line too big for actual line)\n");
   901        } else {
   902          text = text.substr(line_begin, line_end - line_begin);
   903          printer->Print(text);
   904        }
   905      }
   906      if (line_number == end_line) {
   907        free(lineptr);
   908        return true;
   909      }
   910      ++line_number;
   911    }
   912    printer->Print("(error line out of bounds)\n");
   913    free(lineptr);
   914    return false;
   915  }
   916  
   917  void Verifier::DumpErrorGoal(size_t group, size_t index) {
   918    FileHandlePrettyPrinter printer(stderr);
   919    if (group >= parser_.groups().size()) {
   920      printer.Print("(invalid group index ");
   921      printer.Print(std::to_string(group));
   922      printer.Print(")\n");
   923    }
   924    if (index >= parser_.groups()[group].goals.size()) {
   925      if (index > parser_.groups()[group].goals.size() ||
   926          parser_.groups()[group].goals.empty()) {
   927        printer.Print("(invalid index ");
   928        printer.Print(std::to_string(group));
   929        printer.Print(":");
   930        printer.Print(std::to_string(index));
   931        printer.Print(")\n");
   932        return;
   933      }
   934      printer.Print("(past the end of a ");
   935      if (parser_.groups()[group].accept_if == GoalGroup::kSomeMustFail) {
   936        printer.Print("negated ");
   937      }
   938      printer.Print("group, whose last goal was)\n  ");
   939      --index;
   940    }
   941    auto* goal = parser_.groups()[group].goals[index];
   942    yy::location goal_location = goal->location();
   943    yy::position goal_begin = goal_location.begin;
   944    yy::position goal_end = goal_location.end;
   945    if (goal_end.filename) {
   946      printer.Print(*goal_end.filename);
   947    } else {
   948      printer.Print("-");
   949    }
   950    printer.Print(":");
   951    if (goal_begin.filename) {
   952      printer.Print(std::to_string(goal_begin.line) + ":" +
   953                    std::to_string(goal_begin.column));
   954    }
   955    printer.Print("-");
   956    if (goal_end.filename) {
   957      printer.Print(std::to_string(goal_end.line) + ":" +
   958                    std::to_string(goal_end.column));
   959    }
   960    printer.Print(" ");
   961    if (goal_end.filename) {
   962      auto has_symbol = fake_files_.find(*goal_end.filename);
   963      if (has_symbol != fake_files_.end()) {
   964        PrintInMemoryFileSection(symbol_table_.text(has_symbol->second),
   965                                 goal_begin.line - 1, goal_begin.column - 1,
   966                                 goal_end.line - 1, goal_end.column - 1,
   967                                 &printer);
   968      } else if (*goal_end.filename != *kStandardIn &&
   969                 *goal_begin.filename == *goal_end.filename) {
   970        FILE* f = fopen(goal_end.filename->c_str(), "r");
   971        if (f != nullptr) {
   972          PrintFileSection(f, goal_begin.line - 1, goal_begin.column - 1,
   973                           goal_end.line - 1, goal_end.column - 1, &printer);
   974          fclose(f);
   975        }
   976      }
   977    }
   978    printer.Print("\n  Goal: ");
   979    goal->Dump(symbol_table_, &printer);
   980    printer.Print("\n");
   981  }
   982  
   983  bool Verifier::VerifyAllGoals(
   984      std::function<bool(Verifier*, const Inspection&, std::string_view)>
   985          inspect) {
   986    if (use_fast_solver_) {
   987      auto result = RunSouffle(
   988          symbol_table_, parser_.groups(), facts_, anchors_,
   989          parser_.inspections(),
   990          [&](const Inspection& i, std::string_view o) {
   991            return inspect(this, i, o);
   992          },
   993          [&](Symbol s) { return symbol_table_.PrettyText(s); });
   994      highest_goal_reached_ = result.highest_goal_reached;
   995      highest_group_reached_ = result.highest_group_reached;
   996      return result.success;
   997    } else {
   998      if (!PrepareDatabase()) {
   999        return false;
  1000      }
  1001      std::function<bool(Verifier*, const Inspection&)> wi =
  1002          [&](Verifier* v, const Inspection& i) {
  1003            return inspect(v, i, v->InspectionString(i));
  1004          };
  1005      Solver solver(this, facts_, anchors_, wi);
  1006      bool result = solver.Solve();
  1007      highest_goal_reached_ = solver.highest_goal_reached();
  1008      highest_group_reached_ = solver.highest_group_reached();
  1009      return result;
  1010    }
  1011  }
  1012  
  1013  bool Verifier::VerifyAllGoals() {
  1014    return VerifyAllGoals([this](Verifier* context, const Inspection& inspection,
  1015                                 std::string_view str) {
  1016      if (inspection.kind == Inspection::Kind::EXPLICIT) {
  1017        absl::FPrintF(saving_assignments_ ? stderr : stdout, "%s: %s\n",
  1018                      inspection.label, str);
  1019      }
  1020      if (!str.empty()) {
  1021        saved_assignments_[inspection.label] = str;
  1022      } else if (inspection.evar->current() != nullptr) {
  1023        StringPrettyPrinter printer;
  1024        inspection.evar->current()->Dump(symbol_table_, &printer);
  1025        saved_assignments_[inspection.label] = printer.str();
  1026      }
  1027      return true;
  1028    });
  1029  }
  1030  
  1031  Identifier* Verifier::IdentifierFor(const yy::location& location,
  1032                                      const std::string& token) {
  1033    Symbol symbol = symbol_table_.intern(token);
  1034    return new (&arena_) Identifier(location, symbol);
  1035  }
  1036  
  1037  Identifier* Verifier::IdentifierFor(const yy::location& location, int integer) {
  1038    Symbol symbol = symbol_table_.intern(std::to_string(integer));
  1039    return new (&arena_) Identifier(location, symbol);
  1040  }
  1041  
  1042  AstNode* Verifier::MakePredicate(const yy::location& location, AstNode* head,
  1043                                   absl::Span<AstNode* const> values) {
  1044    size_t values_count = values.size();
  1045    AstNode** body = (AstNode**)arena_.New(values_count * sizeof(AstNode*));
  1046    size_t vn = 0;
  1047    for (AstNode* v : values) {
  1048      body[vn] = v;
  1049      ++vn;
  1050    }
  1051    AstNode* tuple = new (&arena_) Tuple(location, values_count, body);
  1052    return new (&arena_) App(location, head, tuple);
  1053  }
  1054  
  1055  /// \brief Sort nodes such that nodes and facts are grouped.
  1056  static bool GraphvizSortOrder(AstNode* a, AstNode* b) {
  1057    Tuple* ta = a->AsApp()->rhs()->AsTuple();
  1058    Tuple* tb = b->AsApp()->rhs()->AsTuple();
  1059    if (EncodedVNameOrIdentLessThan(ta->element(0), tb->element(0))) {
  1060      return true;
  1061    }
  1062    if (!EncodedVNameOrIdentEqualTo(ta->element(0), tb->element(0))) {
  1063      return false;
  1064    }
  1065    if (EncodedIdentLessThan(ta->element(1), tb->element(1))) {
  1066      return true;
  1067    }
  1068    if (!EncodedIdentEqualTo(ta->element(1), tb->element(1))) {
  1069      return false;
  1070    }
  1071    if (EncodedVNameOrIdentLessThan(ta->element(2), tb->element(2))) {
  1072      return true;
  1073    }
  1074    if (!EncodedVNameOrIdentEqualTo(ta->element(2), tb->element(2))) {
  1075      return false;
  1076    }
  1077    if (EncodedIdentLessThan(ta->element(3), tb->element(3))) {
  1078      return true;
  1079    }
  1080    if (!EncodedIdentEqualTo(ta->element(3), tb->element(3))) {
  1081      return false;
  1082    }
  1083    if (EncodedIdentLessThan(ta->element(4), tb->element(4))) {
  1084      return true;
  1085    }
  1086    return false;
  1087  }
  1088  
  1089  static bool EncodedFactEqualTo(AstNode* a, AstNode* b) {
  1090    Tuple* ta = a->AsApp()->rhs()->AsTuple();
  1091    Tuple* tb = b->AsApp()->rhs()->AsTuple();
  1092    return EncodedVNameOrIdentEqualTo(ta->element(0), tb->element(0)) &&
  1093           EncodedIdentEqualTo(ta->element(1), tb->element(1)) &&
  1094           EncodedVNameOrIdentEqualTo(ta->element(2), tb->element(2)) &&
  1095           EncodedIdentEqualTo(ta->element(3), tb->element(3)) &&
  1096           EncodedIdentEqualTo(ta->element(4), tb->element(4));
  1097  }
  1098  
  1099  static bool EncodedVNameHasValidForm(Verifier* cxt, AstNode* a) {
  1100    Tuple* ta = a->AsApp()->rhs()->AsTuple();
  1101    return ta->element(0) != cxt->empty_string_id() ||
  1102           ta->element(1) != cxt->empty_string_id() ||
  1103           ta->element(2) != cxt->empty_string_id() ||
  1104           ta->element(3) != cxt->empty_string_id() ||
  1105           ta->element(4) != cxt->empty_string_id();
  1106  }
  1107  
  1108  static bool EncodedFactHasValidForm(Verifier* cxt, AstNode* a) {
  1109    Tuple* ta = a->AsApp()->rhs()->AsTuple();
  1110    if (ta->element(0) == cxt->empty_string_id() ||
  1111        !EncodedVNameHasValidForm(cxt, ta->element(0))) {
  1112      // Always need a source.
  1113      return false;
  1114    }
  1115    if (ta->element(1) == cxt->empty_string_id()) {
  1116      // (source, "", "", string, _)
  1117      return ta->element(2) == cxt->empty_string_id() &&
  1118             ta->element(3) != cxt->empty_string_id();
  1119    } else {
  1120      // (source, edge, target, ...
  1121      if (ta->element(2) == cxt->empty_string_id() ||
  1122          !EncodedVNameHasValidForm(cxt, ta->element(2))) {
  1123        return false;
  1124      }
  1125      if (EncodedIdentEqualTo(ta->element(3), cxt->root_id())) {
  1126        // ... /, )
  1127        return EncodedIdentEqualTo(ta->element(4), cxt->empty_string_id());
  1128      } else {
  1129        // ... /kythe/ordinal, base10string )
  1130        if (!EncodedIdentEqualTo(ta->element(3), cxt->ordinal_id())) {
  1131          return false;
  1132        }
  1133        const std::string& ordinal_val =
  1134            cxt->symbol_table()->text(ta->element(4)->AsIdentifier()->symbol());
  1135        // TODO: check if valid int
  1136        return true;
  1137      }
  1138    }
  1139  }
  1140  
  1141  Verifier::InternedVName Verifier::InternVName(AstNode* node) {
  1142    auto* tuple = node->AsApp()->rhs()->AsTuple();
  1143    return {tuple->element(0)->AsIdentifier()->symbol(),
  1144            tuple->element(1)->AsIdentifier()->symbol(),
  1145            tuple->element(2)->AsIdentifier()->symbol(),
  1146            tuple->element(3)->AsIdentifier()->symbol(),
  1147            tuple->element(4)->AsIdentifier()->symbol()};
  1148  }
  1149  
  1150  AstNode* Verifier::FixFileVName(AstNode* node) {
  1151    if (auto* app = node->AsApp()) {
  1152      if (auto* tuple = app->rhs()->AsTuple(); tuple->size() == 5) {
  1153        if (auto* corpus_id = tuple->element(1)->AsIdentifier()) {
  1154          if (corpus_id->symbol() == empty_string_sym_) {
  1155            if (verbose_) {
  1156              if (auto* path_id = tuple->element(3)->AsIdentifier()) {
  1157                fprintf(stderr,
  1158                        "Warning: file '%s' is missing a corpus in its VName.\n",
  1159                        symbol_table_.text(path_id->symbol()).c_str());
  1160              }
  1161            }
  1162            AstNode** values = (AstNode**)arena_.New(sizeof(AstNode*) * 5);
  1163            values[0] = tuple->element(0);
  1164            values[1] = default_file_corpus_;
  1165            values[2] = tuple->element(2);
  1166            values[3] = tuple->element(3);
  1167            values[4] = tuple->element(4);
  1168            AstNode* new_tuple =
  1169                new (&arena_) Tuple(builtin_location_, 5, values);
  1170            return new (&arena_) App(vname_id_, new_tuple);
  1171          }
  1172        }
  1173      }
  1174    }
  1175    return node;
  1176  }
  1177  
  1178  bool Verifier::ProcessFactTupleForFastSolver(Tuple* tuple) {
  1179    // TODO(zarko): None of the text processing supports non-UTF8 encoded files.
  1180    if (tuple->element(1) == empty_string_id_ &&
  1181        tuple->element(2) == empty_string_id_) {
  1182      if (EncodedIdentEqualTo(tuple->element(3), kind_id_)) {
  1183        auto vname = InternVName(tuple->element(0));
  1184        if (EncodedIdentEqualTo(tuple->element(4), file_id_)) {
  1185          auto sym = fast_solver_files_.insert({vname, known_file_sym_});
  1186          if (!sym.second && sym.first->second != known_not_file_sym_) {
  1187            if (assertions_from_file_nodes_) {
  1188              return LoadInMemoryRuleFile("", FixFileVName(tuple->element(0)),
  1189                                          sym.first->second);
  1190            } else {
  1191              content_to_vname_[sym.first->second] =
  1192                  FixFileVName(tuple->element(0));
  1193            }
  1194          }
  1195        } else {
  1196          fast_solver_files_[vname] = known_not_file_sym_;
  1197        }
  1198      } else if (EncodedIdentEqualTo(tuple->element(3), text_id_)) {
  1199        auto vname = InternVName(tuple->element(0));
  1200        auto content = tuple->element(4)->AsIdentifier()->symbol();
  1201        auto file = fast_solver_files_.insert({vname, content});
  1202        if (!file.second && file.first->second == known_file_sym_) {
  1203          if (assertions_from_file_nodes_) {
  1204            return LoadInMemoryRuleFile("", FixFileVName(tuple->element(0)),
  1205                                        content);
  1206          } else {
  1207            content_to_vname_[content] = FixFileVName(tuple->element(0));
  1208          }
  1209        }
  1210      }
  1211    }
  1212    return true;
  1213  }
  1214  
  1215  bool Verifier::PrepareDatabase() {
  1216    if (database_prepared_) {
  1217      return true;
  1218    }
  1219    if (use_fast_solver_) {
  1220      LOG(WARNING) << "PrepareDatabase() called when fast solver was enabled";
  1221      return true;
  1222    }
  1223    // TODO(zarko): Make this configurable.
  1224    FileHandlePrettyPrinter printer(stderr);
  1225    // First, sort the tuples. As an invariant, we know they will be of the form
  1226    // fact (vname | ident, ident, vname | ident, ident, ident)
  1227    // vname (ident, ident, ident, ident, ident)
  1228    // and all idents will have been uniqued (so we can compare them purely
  1229    // by symbol ID).
  1230    std::sort(facts_.begin(), facts_.end(), EncodedFactLessThan);
  1231    // Now we can do a simple pairwise check on each of the facts to see
  1232    // whether the invariants hold.
  1233    bool is_ok = true;
  1234    AstNode* last_anchor_vname = nullptr;
  1235    AstNode* last_file_vname = nullptr;
  1236    size_t last_anchor_start = ~0;
  1237    for (size_t f = 0; f < facts_.size(); ++f) {
  1238      AstNode* fb = facts_[f];
  1239  
  1240      if (!EncodedFactHasValidForm(this, fb)) {
  1241        printer.Print("Fact has invalid form:\n  ");
  1242        fb->Dump(symbol_table_, &printer);
  1243        printer.Print("\n");
  1244        is_ok = false;
  1245        continue;
  1246      }
  1247      Tuple* tb = fb->AsApp()->rhs()->AsTuple();
  1248      if (tb->element(1) == empty_string_id_ &&
  1249          tb->element(2) == empty_string_id_) {
  1250        bool is_kind_fact = EncodedIdentEqualTo(tb->element(3), kind_id_);
  1251        // Check to see if this fact entry describes part of a file.
  1252        // NB: kind_id_ is ordered before text_id_.
  1253        if (is_kind_fact) {
  1254          if (EncodedIdentEqualTo(tb->element(4), file_id_)) {
  1255            last_file_vname = tb->element(0);
  1256          } else {
  1257            last_file_vname = nullptr;
  1258          }
  1259        } else if (last_file_vname != nullptr &&
  1260                   EncodedIdentEqualTo(tb->element(3), text_id_)) {
  1261          if (EncodedVNameOrIdentEqualTo(last_file_vname, tb->element(0))) {
  1262            if (assertions_from_file_nodes_) {
  1263              if (!LoadInMemoryRuleFile(
  1264                      "", FixFileVName(tb->element(0)),
  1265                      tb->element(4)->AsIdentifier()->symbol())) {
  1266                is_ok = false;
  1267              }
  1268            } else {
  1269              content_to_vname_[tb->element(4)->AsIdentifier()->symbol()] =
  1270                  FixFileVName(tb->element(0));
  1271            }
  1272          }
  1273          last_file_vname = nullptr;
  1274        }
  1275        // Check to see if this fact entry describes part of an anchor.
  1276        // We've arranged via EncodedFactLessThan to sort kind_id_ before
  1277        // start_id_ and start_id_ before end_id_ and to group all node facts
  1278        // together in uninterrupted runs.
  1279        if (is_kind_fact && EncodedIdentEqualTo(tb->element(4), anchor_id_)) {
  1280          // Start tracking a new anchor.
  1281          last_anchor_vname = tb->element(0);
  1282          last_anchor_start = ~0;
  1283        } else if (last_anchor_vname != nullptr &&
  1284                   EncodedIdentEqualTo(tb->element(3), start_id_) &&
  1285                   tb->element(4)->AsIdentifier()) {
  1286          if (EncodedVNameOrIdentEqualTo(last_anchor_vname, tb->element(0))) {
  1287            // This is a fact about the anchor we're tracking.
  1288            std::stringstream(
  1289                symbol_table_.text(tb->element(4)->AsIdentifier()->symbol())) >>
  1290                last_anchor_start;
  1291          } else {
  1292            // This is a fact about node we're not tracking; given our sort order,
  1293            // we'll never get enough information for the node we are tracking,
  1294            // so stop tracking it.
  1295            last_anchor_vname = nullptr;
  1296            last_anchor_start = ~0;
  1297          }
  1298        } else if (last_anchor_start != ~0 &&
  1299                   EncodedIdentEqualTo(tb->element(3), end_id_) &&
  1300                   tb->element(4)->AsIdentifier()) {
  1301          if (EncodedVNameOrIdentEqualTo(last_anchor_vname, tb->element(0))) {
  1302            // We have enough information about the anchor we're tracking.
  1303            size_t last_anchor_end = ~0;
  1304            std::stringstream(
  1305                symbol_table_.text(tb->element(4)->AsIdentifier()->symbol())) >>
  1306                last_anchor_end;
  1307            AddAnchor(last_anchor_vname, last_anchor_start, last_anchor_end);
  1308          }
  1309          last_anchor_vname = nullptr;
  1310          last_anchor_start = ~0;
  1311        }
  1312      }
  1313      if (f == 0) {
  1314        continue;
  1315      }
  1316  
  1317      AstNode* fa = facts_[f - 1];
  1318      if (!ignore_dups_ && EncodedFactEqualTo(fa, fb)) {
  1319        printer.Print("Two facts were equal:\n  ");
  1320        fa->Dump(symbol_table_, &printer);
  1321        printer.Print("\n  ");
  1322        fb->Dump(symbol_table_, &printer);
  1323        printer.Print("\n");
  1324        is_ok = false;
  1325        continue;
  1326      }
  1327      Tuple* ta = fa->AsApp()->rhs()->AsTuple();
  1328      if (EncodedVNameEqualTo(ta->element(0)->AsApp(), tb->element(0)->AsApp()) &&
  1329          ta->element(1) == empty_string_id_ &&
  1330          tb->element(1) == empty_string_id_ &&
  1331          ta->element(2) == empty_string_id_ &&
  1332          tb->element(2) == empty_string_id_ &&
  1333          EncodedIdentEqualTo(ta->element(3), tb->element(3)) &&
  1334          !EncodedIdentEqualTo(ta->element(4), tb->element(4))) {
  1335        if (EncodedIdentEqualTo(ta->element(3), code_id_) ||
  1336            EncodedIdentEqualTo(ta->element(3), code_json_id_)) {
  1337          if (!ignore_code_conflicts_) {
  1338            // TODO(#1553): (closed?) Add documentation for these new edges.
  1339            printer.Print(
  1340                "Two /kythe/code facts about a node differed in value:\n  ");
  1341            ta->element(0)->Dump(symbol_table_, &printer);
  1342            printer.Print("\n  ");
  1343            printer.Print("\nThe decoded values were:\n");
  1344            auto print_decoded = [&](AstNode* value) {
  1345              if (auto* ident = value->AsIdentifier()) {
  1346                proto::common::MarkedSource marked_source;
  1347                if (!marked_source.ParseFromString(
  1348                        symbol_table_.text(ident->symbol()))) {
  1349                  printer.Print("(failed to decode)\n");
  1350                } else {
  1351                  printer.Print(absl::StrCat(marked_source));
  1352                  printer.Print("\n");
  1353                }
  1354              } else {
  1355                printer.Print("(not an identifier)\n");
  1356              }
  1357            };
  1358            print_decoded(ta->element(4));
  1359            printer.Print("\n -----------------  versus  ----------------- \n\n");
  1360            print_decoded(tb->element(4));
  1361            is_ok = false;
  1362          }
  1363        } else {
  1364          printer.Print("Two facts about a node differed in value:\n  ");
  1365          fa->Dump(symbol_table_, &printer);
  1366          printer.Print("\n  ");
  1367          fb->Dump(symbol_table_, &printer);
  1368          printer.Print("\n");
  1369          is_ok = false;
  1370        }
  1371      }
  1372    }
  1373    if (is_ok) {
  1374      std::sort(facts_.begin(), facts_.end(), FastLookupFactLessThan);
  1375    }
  1376    database_prepared_ = is_ok;
  1377    return is_ok;
  1378  }
  1379  
  1380  std::string Verifier::InspectionString(const Inspection& i) {
  1381    StringPrettyPrinter printer;
  1382    if (i.evar == nullptr) {
  1383      printer.Print("nil");
  1384    } else {
  1385      i.evar->Dump(symbol_table_, &printer);
  1386    }
  1387    return printer.str();
  1388  }
  1389  
  1390  AstNode* Verifier::ConvertVName(const yy::location& loc,
  1391                                  const kythe::proto::VName& vname) {
  1392    AstNode** values = (AstNode**)arena_.New(sizeof(AstNode*) * 5);
  1393    values[0] = vname.signature().empty() ? empty_string_id_
  1394                                          : IdentifierFor(loc, vname.signature());
  1395    values[1] = vname.corpus().empty() ? empty_string_id_
  1396                                       : IdentifierFor(loc, vname.corpus());
  1397    values[2] = vname.root().empty() ? empty_string_id_
  1398                                     : IdentifierFor(loc, vname.root());
  1399    values[3] = vname.path().empty() ? empty_string_id_
  1400                                     : IdentifierFor(loc, vname.path());
  1401    values[4] = vname.language().empty() ? empty_string_id_
  1402                                         : IdentifierFor(loc, vname.language());
  1403    AstNode* tuple = new (&arena_) Tuple(loc, 5, values);
  1404    return new (&arena_) App(vname_id_, tuple);
  1405  }
  1406  
  1407  AstNode* Verifier::NewUniqueVName(const yy::location& loc) {
  1408    return MakePredicate(
  1409        loc, vname_id_,
  1410        {new (&arena_) Identifier(loc, symbol_table_.unique()), empty_string_id_,
  1411         empty_string_id_, empty_string_id_, empty_string_id_});
  1412  }
  1413  
  1414  AstNode* Verifier::ConvertCodeFact(const yy::location& loc,
  1415                                     const std::string& code_data) {
  1416    proto::common::MarkedSource marked_source;
  1417    if (!marked_source.ParseFromString(code_data)) {
  1418      LOG(ERROR) << loc << ": can't parse code protobuf" << std::endl;
  1419      return nullptr;
  1420    }
  1421    return ConvertMarkedSource(loc, marked_source);
  1422  }
  1423  
  1424  AstNode* Verifier::ConvertCodeJsonFact(const yy::location& loc,
  1425                                         const std::string& code_data) {
  1426    proto::common::MarkedSource marked_source;
  1427    if (!google::protobuf::util::JsonStringToMessage(code_data, &marked_source)
  1428             .ok()) {
  1429      LOG(ERROR) << loc << ": can't parse code/json protobuf" << std::endl;
  1430      return nullptr;
  1431    }
  1432    return ConvertMarkedSource(loc, marked_source);
  1433  }
  1434  
  1435  AstNode* Verifier::ConvertMarkedSource(
  1436      const yy::location& loc, const proto::common::MarkedSource& source) {
  1437    // Explode each MarkedSource message into a node with an unutterable vname.
  1438    auto* vname = NewUniqueVName(loc);
  1439    for (int child = 0; child < source.child_size(); ++child) {
  1440      auto* child_vname = ConvertMarkedSource(loc, source.child(child));
  1441      if (child_vname == nullptr) {
  1442        return nullptr;
  1443      }
  1444      facts_.push_back(MakePredicate(
  1445          loc, fact_id_,
  1446          {vname, marked_source_child_id_, child_vname, ordinal_id_,
  1447           IdentifierFor(builtin_location_, std::to_string(child))}));
  1448    }
  1449    for (const auto& link : source.link()) {
  1450      if (link.definition_size() != 1) {
  1451        std::cerr << loc << ": bad link: want one definition" << std::endl;
  1452        return nullptr;
  1453      }
  1454      auto from_uri = URI::FromString(link.definition(0));
  1455      if (!from_uri.first) {
  1456        std::cerr << loc << ": bad URI in link" << std::endl;
  1457        return nullptr;
  1458      }
  1459      facts_.push_back(MakePredicate(loc, fact_id_,
  1460                                     {vname, marked_source_link_id_,
  1461                                      ConvertVName(loc, from_uri.second.v_name()),
  1462                                      root_id_, empty_string_id_}));
  1463    }
  1464    auto emit_fact = [&](AstNode* fact_id, AstNode* fact_value) {
  1465      facts_.push_back(MakePredicate(
  1466          loc, fact_id_,
  1467          {vname, empty_string_id_, empty_string_id_, fact_id, fact_value}));
  1468    };
  1469    switch (source.kind()) {
  1470      case proto::common::MarkedSource::BOX:
  1471        emit_fact(marked_source_kind_id_, marked_source_box_id_);
  1472        break;
  1473      case proto::common::MarkedSource::TYPE:
  1474        emit_fact(marked_source_kind_id_, marked_source_type_id_);
  1475        break;
  1476      case proto::common::MarkedSource::PARAMETER:
  1477        emit_fact(marked_source_kind_id_, marked_source_parameter_id_);
  1478        break;
  1479      case proto::common::MarkedSource::IDENTIFIER:
  1480        emit_fact(marked_source_kind_id_, marked_source_identifier_id_);
  1481        break;
  1482      case proto::common::MarkedSource::CONTEXT:
  1483        emit_fact(marked_source_kind_id_, marked_source_context_id_);
  1484        break;
  1485      case proto::common::MarkedSource::INITIALIZER:
  1486        emit_fact(marked_source_kind_id_, marked_source_initializer_id_);
  1487        break;
  1488      case proto::common::MarkedSource::MODIFIER:
  1489        emit_fact(marked_source_kind_id_, marked_source_modifier_id_);
  1490        break;
  1491      case proto::common::MarkedSource::PARAMETER_LOOKUP_BY_PARAM:
  1492        emit_fact(marked_source_kind_id_,
  1493                  marked_source_parameter_lookup_by_param_id_);
  1494        break;
  1495      case proto::common::MarkedSource::LOOKUP_BY_PARAM:
  1496        emit_fact(marked_source_kind_id_, marked_source_lookup_by_param_id_);
  1497        break;
  1498      case proto::common::MarkedSource::PARAMETER_LOOKUP_BY_TPARAM:
  1499        emit_fact(marked_source_kind_id_,
  1500                  marked_source_parameter_lookup_by_tparam_id_);
  1501        break;
  1502      case proto::common::MarkedSource::LOOKUP_BY_TPARAM:
  1503        emit_fact(marked_source_kind_id_, marked_source_lookup_by_tparam_id_);
  1504        break;
  1505      case proto::common::MarkedSource::PARAMETER_LOOKUP_BY_PARAM_WITH_DEFAULTS:
  1506        emit_fact(marked_source_kind_id_,
  1507                  marked_source_parameter_lookup_by_param_with_defaults_id_);
  1508        break;
  1509      case proto::common::MarkedSource::LOOKUP_BY_TYPED:
  1510        emit_fact(marked_source_kind_id_, marked_source_lookup_by_typed_id_);
  1511        break;
  1512      // The proto enum is polluted with enumerators like
  1513      // MarkedSource_Kind_MarkedSource_Kind_INT_MIN_SENTINEL_DO_NOT_USE_.
  1514      default:
  1515        std::cerr << loc << ": unknown source kind for MarkedSource" << std::endl;
  1516    }
  1517    emit_fact(marked_source_pre_text_id_, IdentifierFor(loc, source.pre_text()));
  1518    emit_fact(marked_source_post_child_text_id_,
  1519              IdentifierFor(loc, source.post_child_text()));
  1520    emit_fact(marked_source_post_text_id_,
  1521              IdentifierFor(loc, source.post_text()));
  1522    emit_fact(marked_source_lookup_index_id_,
  1523              IdentifierFor(loc, std::to_string(source.lookup_index())));
  1524    emit_fact(
  1525        marked_source_default_children_count_id_,
  1526        IdentifierFor(loc, std::to_string(source.default_children_count())));
  1527    emit_fact(marked_source_add_final_list_token_id_,
  1528              source.add_final_list_token() ? marked_source_true_id_
  1529                                            : marked_source_false_id_);
  1530    return vname;
  1531  }
  1532  
  1533  bool Verifier::AssertSingleFact(std::string* database, unsigned int fact_id,
  1534                                  const kythe::proto::Entry& entry) {
  1535    yy::location loc;
  1536    loc.initialize(database);
  1537    loc.begin.column = 1;
  1538    loc.begin.line = fact_id;
  1539    loc.end = loc.begin;
  1540    Symbol code_symbol = code_id_->AsIdentifier()->symbol();
  1541    Symbol code_json_symbol = code_json_id_->AsIdentifier()->symbol();
  1542    AstNode** values = (AstNode**)arena_.New(sizeof(AstNode*) * 5);
  1543    values[0] =
  1544        entry.has_source() ? ConvertVName(loc, entry.source()) : empty_string_id_;
  1545    // We're removing support for ordinal facts. Support them during the
  1546    // transition, but also support the new dot-separated edge kinds that serve
  1547    // the same purpose.
  1548    auto dot_pos = entry.edge_kind().rfind('.');
  1549    bool is_code = false;
  1550    if (dot_pos != std::string::npos && dot_pos > 0 &&
  1551        dot_pos < entry.edge_kind().size() - 1) {
  1552      values[1] = IdentifierFor(loc, entry.edge_kind().substr(0, dot_pos));
  1553      values[3] = ordinal_id_;
  1554      values[4] = IdentifierFor(loc, entry.edge_kind().substr(dot_pos + 1));
  1555    } else {
  1556      values[1] = entry.edge_kind().empty()
  1557                      ? empty_string_id_
  1558                      : IdentifierFor(loc, entry.edge_kind());
  1559      values[3] = entry.fact_name().empty()
  1560                      ? empty_string_id_
  1561                      : IdentifierFor(loc, entry.fact_name());
  1562      if (values[3]->AsIdentifier()->symbol() == code_symbol &&
  1563          convert_marked_source_) {
  1564        // Code facts are turned into subgraphs, so this fact entry will turn
  1565        // into an edge entry.
  1566        if ((values[2] = ConvertCodeFact(loc, entry.fact_value())) == nullptr) {
  1567          return false;
  1568        }
  1569        values[1] = marked_source_code_edge_id_;
  1570        values[3] = root_id_;
  1571        values[4] = empty_string_id_;
  1572        is_code = true;
  1573      } else if (values[3]->AsIdentifier()->symbol() == code_json_symbol &&
  1574                 convert_marked_source_) {
  1575        // Code facts are turned into subgraphs, so this fact entry will turn
  1576        // into an edge entry.
  1577        if ((values[2] = ConvertCodeJsonFact(loc, entry.fact_value())) ==
  1578            nullptr) {
  1579          return false;
  1580        }
  1581        values[1] = marked_source_code_edge_id_;
  1582        values[3] = root_id_;
  1583        values[4] = empty_string_id_;
  1584        is_code = true;
  1585      } else {
  1586        values[4] = entry.fact_value().empty()
  1587                        ? empty_string_id_
  1588                        : IdentifierFor(loc, entry.fact_value());
  1589      }
  1590    }
  1591    if (!is_code) {
  1592      values[2] = entry.has_target() ? ConvertVName(loc, entry.target())
  1593                                     : empty_string_id_;
  1594    }
  1595  
  1596    Tuple* tuple = new (&arena_) Tuple(loc, 5, values);
  1597    AstNode* fact = new (&arena_) App(fact_id_, tuple);
  1598  
  1599    database_prepared_ = false;
  1600    facts_.push_back(fact);
  1601    if (use_fast_solver_) {
  1602      return ProcessFactTupleForFastSolver(tuple);
  1603    }
  1604    return true;
  1605  }
  1606  
  1607  void Verifier::DumpAsJson() {
  1608    if (!PrepareDatabase()) {
  1609      return;
  1610    }
  1611    // Use the same sort order as we do with Graphviz.
  1612    std::sort(facts_.begin(), facts_.end(), GraphvizSortOrder);
  1613    FileHandlePrettyPrinter printer(stdout);
  1614    QuoteEscapingPrettyPrinter escaping_printer(printer);
  1615    FileHandlePrettyPrinter dprinter(stderr);
  1616    auto DumpAsJson = [this, &printer, &escaping_printer](const char* label,
  1617                                                          AstNode* node) {
  1618      printer.Print(label);
  1619      if (node == empty_string_id()) {
  1620        // Canonicalize "" as null in the JSON output.
  1621        printer.Print("null");
  1622      } else {
  1623        printer.Print("\"");
  1624        node->Dump(symbol_table_, &escaping_printer);
  1625        printer.Print("\"");
  1626      }
  1627    };
  1628    auto DumpVName = [this, &printer, &DumpAsJson](const char* label,
  1629                                                   AstNode* node) {
  1630      printer.Print(label);
  1631      if (node == empty_string_id()) {
  1632        printer.Print("null");
  1633      } else {
  1634        Tuple* vname = node->AsApp()->rhs()->AsTuple();
  1635        printer.Print("{");
  1636        DumpAsJson("\"signature\":", vname->element(0));
  1637        DumpAsJson(",\"corpus\":", vname->element(1));
  1638        DumpAsJson(",\"root\":", vname->element(2));
  1639        DumpAsJson(",\"path\":", vname->element(3));
  1640        DumpAsJson(",\"language\":", vname->element(4));
  1641        printer.Print("}");
  1642      }
  1643    };
  1644    printer.Print("[");
  1645    for (size_t i = 0; i < facts_.size(); ++i) {
  1646      AstNode* fact = facts_[i];
  1647      Tuple* t = fact->AsApp()->rhs()->AsTuple();
  1648      printer.Print("{");
  1649      DumpVName("\"source\":", t->element(0));
  1650      DumpAsJson(",\"edge_kind\":", t->element(1));
  1651      DumpVName(",\"target\":", t->element(2));
  1652      DumpAsJson(",\"fact_name\":", t->element(3));
  1653      DumpAsJson(",\"fact_value\":", t->element(4));
  1654      printer.Print(i + 1 == facts_.size() ? "}" : "},");
  1655    }
  1656    printer.Print("]\n");
  1657  }
  1658  
  1659  void Verifier::DumpAsDot() {
  1660    if (!PrepareDatabase()) {
  1661      return;
  1662    }
  1663    std::map<std::string, std::string> vname_labels;
  1664    for (const auto& [label, str] : saved_assignments_) {
  1665      if (str.empty()) {
  1666        continue;
  1667      }
  1668      auto old_label = vname_labels.find(str);
  1669      if (old_label == vname_labels.end()) {
  1670        vname_labels[str] = label;
  1671      } else {
  1672        old_label->second += ", " + label;
  1673      }
  1674    }
  1675    auto GetLabel = [&](AstNode* node) {
  1676      if (!node) {
  1677        return std::string();
  1678      }
  1679      StringPrettyPrinter id_string;
  1680      node->Dump(symbol_table_, &id_string);
  1681      const auto& label = vname_labels.find(id_string.str());
  1682      if (label != vname_labels.end()) {
  1683        return label->second;
  1684      } else {
  1685        return std::string();
  1686      }
  1687    };
  1688    auto ElideNode = [&](AstNode* node) {
  1689      if (show_unlabeled_) {
  1690        return false;
  1691      }
  1692      return GetLabel(node).empty();
  1693    };
  1694  
  1695    std::sort(facts_.begin(), facts_.end(), GraphvizSortOrder);
  1696    FileHandlePrettyPrinter printer(stdout);
  1697    QuoteEscapingPrettyPrinter quote_printer(printer);
  1698    HtmlEscapingPrettyPrinter html_printer(printer);
  1699    FileHandlePrettyPrinter dprinter(stderr);
  1700  
  1701    auto PrintQuotedNodeId = [&](AstNode* node) {
  1702      printer.Print("\"");
  1703      if (std::string label = GetLabel(node);
  1704          show_labeled_vnames_ || label.empty()) {
  1705        node->Dump(symbol_table_, &quote_printer);
  1706      } else {
  1707        quote_printer.Print(label);
  1708      }
  1709      printer.Print("\"");
  1710    };
  1711  
  1712    auto FactName = [this](AstNode* node) {
  1713      StringPrettyPrinter printer;
  1714      node->Dump(symbol_table_, &printer);
  1715      if (show_fact_prefix_) {
  1716        return printer.str();
  1717      }
  1718      return std::string(absl::StripPrefix(printer.str(), "/kythe/"));
  1719    };
  1720  
  1721    auto EdgeName = [this](AstNode* node) {
  1722      StringPrettyPrinter printer;
  1723      node->Dump(symbol_table_, &printer);
  1724      if (show_fact_prefix_) {
  1725        return printer.str();
  1726      }
  1727      return std::string(absl::StripPrefix(printer.str(), "/kythe/edge/"));
  1728    };
  1729  
  1730    printer.Print("digraph G {\n");
  1731    for (size_t i = 0; i < facts_.size(); ++i) {
  1732      AstNode* fact = facts_[i];
  1733      Tuple* t = fact->AsApp()->rhs()->AsTuple();
  1734      if (t->element(1) == empty_string_id()) {
  1735        // Node. We sorted these above st all the facts should come subsequent.
  1736        // Figure out if the node is an anchor.
  1737        NodeFacts info =
  1738            ReadNodeFacts(absl::MakeConstSpan(facts_).subspan(i), *this);
  1739        if (!info.facts.empty()) {
  1740          // Skip over facts which correspond to this node.
  1741          i += info.facts.size() - 1;
  1742        }
  1743        if (ElideNode(t->element(0))) {
  1744          continue;
  1745        }
  1746        PrintQuotedNodeId(t->element(0));
  1747        std::string label = GetLabel(t->element(0));
  1748        if (info.kind == NodeKind::kAnchor && !show_anchors_) {
  1749          printer.Print(" [ shape=circle, label=\"");
  1750          if (label.empty()) {
  1751            printer.Print("@");
  1752          } else {
  1753            printer.Print(label);
  1754            printer.Print("\", color=\"blue");
  1755          }
  1756          printer.Print("\" ];\n");
  1757        } else {
  1758          printer.Print(" [ label=<<TABLE>");
  1759          printer.Print("<TR><TD COLSPAN=\"2\">");
  1760          Tuple* nt = info.facts.front()->AsApp()->rhs()->AsTuple();
  1761          if (label.empty() || show_labeled_vnames_) {
  1762            // Since all of our facts are well-formed, we know this is a vname.
  1763            nt->element(0)->AsApp()->rhs()->Dump(symbol_table_, &html_printer);
  1764          }
  1765          if (!label.empty()) {
  1766            if (show_labeled_vnames_) {
  1767              html_printer.Print(" = ");
  1768            }
  1769            html_printer.Print(label);
  1770          }
  1771          printer.Print("</TD></TR>");
  1772          for (AstNode* fact : info.facts) {
  1773            Tuple* nt = fact->AsApp()->rhs()->AsTuple();
  1774            printer.Print("<TR><TD>");
  1775            html_printer.Print(FactName(nt->element(3)));
  1776            printer.Print("</TD><TD>");
  1777            if (info.kind == NodeKind::kFile &&
  1778                EncodedIdentEqualTo(nt->element(3), text_id_)) {
  1779              // Don't clutter the graph with file content.
  1780              printer.Print("...");
  1781            } else if (EncodedIdentEqualTo(nt->element(3), code_id_)) {
  1782              // Don't print encoded proto data.
  1783              printer.Print("...");
  1784            } else {
  1785              nt->element(4)->Dump(symbol_table_, &html_printer);
  1786            }
  1787            printer.Print("</TD></TR>");
  1788          }
  1789          printer.Print("</TABLE>> shape=plaintext ");
  1790          if (!label.empty()) {
  1791            printer.Print(" color=blue ");
  1792          }
  1793          printer.Print("];\n");
  1794        }
  1795      } else {
  1796        // Edge.
  1797        if (ElideNode(t->element(0)) || ElideNode(t->element(2))) {
  1798          continue;
  1799        }
  1800        PrintQuotedNodeId(t->element(0));
  1801        printer.Print(" -> ");
  1802        PrintQuotedNodeId(t->element(2));
  1803        printer.Print(" [ label=\"");
  1804        quote_printer.Print(EdgeName(t->element(1)));
  1805        if (t->element(4) != empty_string_id()) {
  1806          printer.Print(".");
  1807          t->element(4)->Dump(symbol_table_, &quote_printer);
  1808        }
  1809        printer.Print("\" ];\n");
  1810      }
  1811    }
  1812    printer.Print("}\n");
  1813  }
  1814  
  1815  }  // namespace verifier
  1816  }  // namespace kythe