kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/verifier/assertion_ast.h (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #ifndef KYTHE_CXX_VERIFIER_ASSERTION_AST_H_
    18  #define KYTHE_CXX_VERIFIER_ASSERTION_AST_H_
    19  
    20  #include <ctype.h>
    21  
    22  #include <algorithm>
    23  #include <optional>
    24  #include <unordered_map>
    25  #include <vector>
    26  
    27  #include "absl/log/check.h"
    28  #include "absl/log/log.h"
    29  #include "absl/strings/escaping.h"
    30  #include "absl/strings/str_cat.h"
    31  #include "kythe/cxx/verifier/location.hh"
    32  #include "pretty_printer.h"
    33  #include "re2/re2.h"
    34  
    35  namespace kythe {
    36  namespace verifier {
    37  
    38  /// \brief Given a `SymbolTable`, uniquely identifies some string of text.
    39  /// If two `Symbol`s are equal, their original text is equal.
    40  typedef size_t Symbol;
    41  
    42  /// \brief Maps strings to `Symbol`s.
    43  class SymbolTable {
    44   public:
    45    explicit SymbolTable() : id_regex_("[%#]?[_a-zA-Z/][a-zA-Z_0-9/]*") {}
    46  
    47    /// \brief Returns the `Symbol` associated with `string` or `nullopt`.
    48    std::optional<Symbol> FindInterned(absl::string_view string) const {
    49      const auto old = symbols_.find(std::string(string));
    50      if (old == symbols_.end()) return std::nullopt;
    51      return old->second;
    52    }
    53  
    54    /// \brief Returns the `Symbol` associated with `string`, or aborts.
    55    Symbol MustIntern(absl::string_view string) const {
    56      auto sym = FindInterned(string);
    57      CHECK(sym) << "no symbol for " << string;
    58      return *sym;
    59    }
    60  
    61    /// \brief Returns the `Symbol` associated with `string`, or makes a new one.
    62    Symbol intern(const std::string& string) {
    63      const auto old = symbols_.find(string);
    64      if (old != symbols_.end()) {
    65        return old->second;
    66      }
    67      Symbol next_symbol = reverse_map_.size();
    68      symbols_[string] = next_symbol;
    69      // Note that references to elements of `unordered_map` are not invalidated
    70      // upon insert (really, upon rehash), so keeping around pointers in
    71      // `reverse_map_` is safe.
    72      reverse_map_.push_back(&symbols_.find(string)->first);
    73      return next_symbol;
    74    }
    75    /// \brief Returns the text associated with `symbol`.
    76    const std::string& text(Symbol symbol) const { return *reverse_map_[symbol]; }
    77  
    78    /// \brief Returns a string associated with `symbol` that disambiguates
    79    /// nonces.
    80    std::string PrettyText(Symbol symbol) const {
    81      auto* text = reverse_map_[symbol];
    82      if (text == &unique_symbol_) {
    83        return absl::StrCat("(unique#", std::to_string(symbol), ")");
    84      } else if (!text->empty() && RE2::FullMatch(*text, id_regex_)) {
    85        return *text;
    86      } else {
    87        return absl::StrCat("\"", absl::CHexEscape(*text), "\"");
    88      }
    89    }
    90  
    91    /// \brief Returns a `Symbol` that can never be spelled (but which still has
    92    /// a printable name).
    93    Symbol unique() {
    94      reverse_map_.push_back(&unique_symbol_);
    95      return reverse_map_.size() - 1;
    96    }
    97  
    98   private:
    99    /// Maps text to unique `Symbol`s.
   100    std::unordered_map<std::string, Symbol> symbols_;
   101    /// Maps `Symbol`s back to their original text.
   102    std::vector<const std::string*> reverse_map_;
   103    /// The text to use for unique() symbols.
   104    std::string unique_symbol_ = "(unique)";
   105    /// Used for quoting strings - see assertions.lex:
   106    RE2 id_regex_;
   107  };
   108  
   109  /// \brief Performs bump-pointer allocation of pointer-aligned memory.
   110  ///
   111  /// AST nodes do not need to be deallocated piecemeal. The interpreter
   112  /// does not permit uncontrolled mutable state: `EVars` are always unset
   113  /// when history is rewound, so links to younger memory that have leaked out
   114  /// into older memory at a choice point are severed when that choice point is
   115  /// reconsidered. This means that entire swaths of memory can safely be
   116  /// deallocated at once without calling individual destructors.
   117  ///
   118  /// \warning Since `Arena`-allocated objects never have their destructors
   119  /// called, any non-POD members they have will in turn never be destroyed.
   120  class Arena {
   121   public:
   122    Arena() : next_block_index_(0) {}
   123  
   124    ~Arena() {
   125      for (auto& b : blocks_) {
   126        delete[] b;
   127      }
   128    }
   129  
   130    /// \brief Allocate `bytes` bytes, aligned to `kPointerSize`, allocating
   131    /// new blocks from the system if necessary.
   132    void* New(size_t bytes) {
   133      // Align to kPointerSize bytes.
   134      bytes = (bytes + kPointerSize - 1) & kPointerSizeMask;
   135      CHECK(bytes < kBlockSize);
   136      offset_ += bytes;
   137      if (offset_ > kBlockSize) {
   138        if (next_block_index_ == blocks_.size()) {
   139          char* next_block = new char[kBlockSize];
   140          blocks_.push_back(next_block);
   141          current_block_ = next_block;
   142        } else {
   143          current_block_ = blocks_[next_block_index_];
   144        }
   145        ++next_block_index_;
   146        offset_ = bytes;
   147      }
   148      return current_block_ + offset_ - bytes;
   149    }
   150  
   151   private:
   152    /// The size of a pointer on this machine. We support only machines with
   153    /// power-of-two address size and alignment requirements.
   154    const size_t kPointerSize = sizeof(void*);
   155    /// `kPointerSize` (a power of two) sign-extended from its first set bit.
   156    const size_t kPointerSizeMask = ((~kPointerSize) + 1);
   157    /// The size of allocation requests to make from the normal heap.
   158    const size_t kBlockSize = 1024 * 64;
   159  
   160    /// The next offset in the current block to allocate. Should always be
   161    /// `<= kBlockSize`. If it is `== kBlockSize`, the current block is
   162    /// exhausted and the `Arena` moves on to the next block, allocating one
   163    /// if necessary.
   164    size_t offset_ = kBlockSize;
   165    /// The index of the next block to allocate from. Should always be
   166    /// `<= blocks_.size()`. If it is `== blocks_.size()`, a new block is
   167    /// allocated before the next `New` request completes.
   168    size_t next_block_index_;
   169    /// The block from which the `Arena` is currently making allocations. May
   170    /// be `nullptr` if no allocations have yet been made.
   171    char* current_block_;
   172    /// All blocks that the `Arena` has allocated so far.
   173    std::vector<char*> blocks_;
   174  };
   175  
   176  /// \brief An object that can be allocated inside an `Arena`.
   177  class ArenaObject {
   178   public:
   179    void* operator new(size_t size, Arena* arena) { return arena->New(size); }
   180    void operator delete(void*, size_t) {
   181      LOG(FATAL) << "Don't delete ArenaObjects.";
   182    }
   183    void operator delete(void* ptr, Arena* arena) {
   184      LOG(FATAL) << "Don't delete ArenaObjects.";
   185    }
   186  };
   187  
   188  class App;
   189  class EVar;
   190  class Identifier;
   191  class Range;
   192  class Tuple;
   193  
   194  /// \brief An object that is manipulated by the verifier during the course of
   195  /// interpretation.
   196  ///
   197  /// Some `AstNode`s are dynamically created as part of the execution process.
   198  /// Others are created during parsing. `AstNode`s are generally treated as
   199  /// immutable after their initial construction phase except for certain notable
   200  /// exceptions like `EVar`.
   201  class AstNode : public ArenaObject {
   202   public:
   203    explicit AstNode(const yy::location& location) : location_(location) {}
   204  
   205    /// \brief Returns the location where the `AstNode` was found if it came
   206    /// from source text.
   207    const yy::location& location() const { return location_; }
   208  
   209    /// \brief Dumps the `AstNode` to `printer`.
   210    virtual void Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) {}
   211  
   212    virtual App* AsApp() { return nullptr; }
   213    virtual EVar* AsEVar() { return nullptr; }
   214    virtual Identifier* AsIdentifier() { return nullptr; }
   215    virtual Range* AsRange() { return nullptr; }
   216    virtual Tuple* AsTuple() { return nullptr; }
   217  
   218   private:
   219    /// \brief The location where the `AstNode` can be found in source text, if
   220    /// any.
   221    yy::location location_;
   222  };
   223  
   224  /// \brief A range specification that can unify with one or more ranges.
   225  class Range : public AstNode {
   226   public:
   227    Range(const yy::location& location, size_t begin, size_t end, Symbol path,
   228          Symbol root, Symbol corpus)
   229        : AstNode(location),
   230          begin_(begin),
   231          end_(end),
   232          path_(path),
   233          root_(root),
   234          corpus_(corpus) {}
   235    Range* AsRange() override { return this; }
   236    void Dump(const SymbolTable&, PrettyPrinter*) override;
   237    size_t begin() const { return begin_; }
   238    size_t end() const { return end_; }
   239    size_t path() const { return path_; }
   240    size_t corpus() const { return corpus_; }
   241    size_t root() const { return root_; }
   242  
   243   private:
   244    /// The start of the range in bytes.
   245    size_t begin_;
   246    /// The end of the range in bytes.
   247    size_t end_;
   248    /// The source file path.
   249    Symbol path_;
   250    /// The source file root.
   251    Symbol root_;
   252    /// The source file corpus.
   253    Symbol corpus_;
   254  };
   255  
   256  inline bool operator==(const Range& l, const Range& r) {
   257    return l.begin() == r.begin() && l.end() == r.end() && l.path() == r.path() &&
   258           l.root() == r.root() && l.corpus() == r.corpus();
   259  }
   260  
   261  inline bool operator!=(const Range& l, const Range& r) { return !(l == r); }
   262  
   263  /// \brief A tuple of zero or more elements.
   264  class Tuple : public AstNode {
   265   public:
   266    /// \brief Constructs a new `Tuple`
   267    /// \param location Mark with this location
   268    /// \param element_count The number of elements in the tuple
   269    /// \param elements A preallocated buffer of `AstNode*` such that
   270    /// the total size of the buffer is equal to
   271    /// `element_count * sizeof(AstNode *)`
   272    Tuple(const yy::location& location, size_t element_count, AstNode** elements)
   273        : AstNode(location), element_count_(element_count), elements_(elements) {}
   274    Tuple* AsTuple() override { return this; }
   275    void Dump(const SymbolTable&, PrettyPrinter*) override;
   276    /// \brief Returns the number of elements in the `Tuple`.
   277    size_t size() const { return element_count_; }
   278    /// \brief Returns the `index`th element of the `Tuple`, counting from zero.
   279    AstNode* element(size_t index) const {
   280      CHECK(index < element_count_);
   281      return elements_[index];
   282    }
   283  
   284   private:
   285    /// The number of `AstNode *`s in `elements_`
   286    size_t element_count_;
   287    /// Storage for the `Tuple`'s elements.
   288    AstNode** elements_;
   289  };
   290  
   291  /// \brief An application (eg, `f(g)`).
   292  ///
   293  /// Generally an `App` will combine some `Identifier` head with a `Tuple` body,
   294  /// but this is not necessarily the case.
   295  class App : public AstNode {
   296   public:
   297    /// \brief Constructs a new `App` node, taking its location from the
   298    /// location of the left-hand side.
   299    /// \param lhs The left-hand side of the application (eg, an `Identifier`).
   300    /// \param rhs The right-hand side of the application (eg, a `Tuple`).
   301    App(AstNode* lhs, AstNode* rhs)
   302        : AstNode(lhs->location()), lhs_(lhs), rhs_(rhs) {}
   303    /// \brief Constructs a new `App` node with an explicit `location`.
   304    /// \param `location` The location to use for this `App`.
   305    /// \param lhs The left-hand side of the application (eg, an `Identifier`).
   306    /// \param rhs The right-hand side of the application (eg, a `Tuple`).
   307    App(const yy::location& location, AstNode* lhs, AstNode* rhs)
   308        : AstNode(location), lhs_(lhs), rhs_(rhs) {}
   309  
   310    App* AsApp() override { return this; }
   311    void Dump(const SymbolTable&, PrettyPrinter*) override;
   312  
   313    /// \brief The left-hand side (`f` in `f(g)`) of this `App`
   314    AstNode* lhs() const { return lhs_; }
   315    /// \brief The right-hand side (`(g)` in `f(g)`) of this `App
   316    AstNode* rhs() const { return rhs_; }
   317  
   318   private:
   319    AstNode* lhs_;
   320    AstNode* rhs_;
   321  };
   322  
   323  /// \brief An identifier (corresponding to some `Symbol`).
   324  class Identifier : public AstNode {
   325   public:
   326    Identifier(const yy::location& location, Symbol symbol)
   327        : AstNode(location), symbol_(symbol) {}
   328    /// \brief The `Symbol` this `Identifier` represents.
   329    Symbol symbol() const { return symbol_; }
   330    Identifier* AsIdentifier() override { return this; }
   331    void Dump(const SymbolTable&, PrettyPrinter*) override;
   332  
   333   private:
   334    Symbol symbol_;
   335  };
   336  
   337  /// \brief An existential variable.
   338  ///
   339  /// `EVars` are given assignments while the verifier solves for its goals.
   340  /// Once an `EVar` is given an assignment, that assignment will not change,
   341  /// unless it is undone by backtracking.
   342  class EVar : public AstNode {
   343   public:
   344    /// Constructs a new `EVar` with no assignment.
   345    explicit EVar(const yy::location& location)
   346        : AstNode(location), current_(nullptr) {}
   347    EVar* AsEVar() override { return this; }
   348    void Dump(const SymbolTable&, PrettyPrinter*) override;
   349  
   350    /// \brief Returns current assignment, or `nullptr` if one has not been made.
   351    AstNode* current() { return current_; }
   352  
   353    /// \brief Assigns this `EVar`.
   354    void set_current(AstNode* node) { current_ = node; }
   355  
   356   private:
   357    /// The `EVar`'s current assignment or `nullptr`.
   358    AstNode* current_;
   359  };
   360  
   361  /// \brief A set of goals to be handled atomically.
   362  struct GoalGroup {
   363    enum AcceptanceCriterion {
   364      kNoneMayFail,  ///< For this group to pass, no goals may fail.
   365      kSomeMustFail  ///< For this group to pass, some goals must fail.
   366    };
   367    AcceptanceCriterion accept_if;  ///< How this group is handled.
   368    std::vector<AstNode*> goals;    ///< Grouped goals, implicitly conjoined.
   369  };
   370  
   371  /// \brief A database of fact-shaped AstNodes.
   372  using Database = std::vector<AstNode*>;
   373  
   374  /// \brief Multimap from anchor offsets to anchor VName tuples.
   375  using AnchorMap = std::multimap<std::pair<size_t, size_t>, AstNode*>;
   376  
   377  /// An EVar whose assignment is interesting to display.
   378  struct Inspection {
   379   public:
   380    enum class Kind {
   381      EXPLICIT,  ///< The user requested this inspection (with "?").
   382      IMPLICIT   ///< This inspection was added by default.
   383    };
   384    std::string label;  ///< A label for user reference.
   385    EVar* evar;         ///< The EVar to inspect.
   386    Kind kind;          ///< Whether this inspection was added by default.
   387    Inspection(const std::string& label, EVar* evar, Kind kind)
   388        : label(label), evar(evar), kind(kind) {}
   389  };
   390  
   391  }  // namespace verifier
   392  }  // namespace kythe
   393  
   394  // Required by generated code.
   395  #define YY_DECL                                            \
   396    int kythe::verifier::AssertionParser::lex(               \
   397        YySemanticValue* yylval_param, yy::location* yylloc, \
   398        ::kythe::verifier::AssertionParser& context)
   399  namespace kythe {
   400  namespace verifier {
   401  class AssertionParser;
   402  }
   403  }  // namespace kythe
   404  struct YySemanticValue {
   405    std::string string;
   406    kythe::verifier::AstNode* node;
   407    int int_;
   408    size_t size_t_;
   409  };
   410  #define YYSTYPE YySemanticValue
   411  
   412  #endif  // KYTHE_CXX_VERIFIER_ASSERTION_AST_H_