kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/verifier/assertion_ast.h (about) 1 /* 2 * Copyright 2014 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef KYTHE_CXX_VERIFIER_ASSERTION_AST_H_ 18 #define KYTHE_CXX_VERIFIER_ASSERTION_AST_H_ 19 20 #include <ctype.h> 21 22 #include <algorithm> 23 #include <optional> 24 #include <unordered_map> 25 #include <vector> 26 27 #include "absl/log/check.h" 28 #include "absl/log/log.h" 29 #include "absl/strings/escaping.h" 30 #include "absl/strings/str_cat.h" 31 #include "kythe/cxx/verifier/location.hh" 32 #include "pretty_printer.h" 33 #include "re2/re2.h" 34 35 namespace kythe { 36 namespace verifier { 37 38 /// \brief Given a `SymbolTable`, uniquely identifies some string of text. 39 /// If two `Symbol`s are equal, their original text is equal. 40 typedef size_t Symbol; 41 42 /// \brief Maps strings to `Symbol`s. 43 class SymbolTable { 44 public: 45 explicit SymbolTable() : id_regex_("[%#]?[_a-zA-Z/][a-zA-Z_0-9/]*") {} 46 47 /// \brief Returns the `Symbol` associated with `string` or `nullopt`. 48 std::optional<Symbol> FindInterned(absl::string_view string) const { 49 const auto old = symbols_.find(std::string(string)); 50 if (old == symbols_.end()) return std::nullopt; 51 return old->second; 52 } 53 54 /// \brief Returns the `Symbol` associated with `string`, or aborts. 55 Symbol MustIntern(absl::string_view string) const { 56 auto sym = FindInterned(string); 57 CHECK(sym) << "no symbol for " << string; 58 return *sym; 59 } 60 61 /// \brief Returns the `Symbol` associated with `string`, or makes a new one. 62 Symbol intern(const std::string& string) { 63 const auto old = symbols_.find(string); 64 if (old != symbols_.end()) { 65 return old->second; 66 } 67 Symbol next_symbol = reverse_map_.size(); 68 symbols_[string] = next_symbol; 69 // Note that references to elements of `unordered_map` are not invalidated 70 // upon insert (really, upon rehash), so keeping around pointers in 71 // `reverse_map_` is safe. 72 reverse_map_.push_back(&symbols_.find(string)->first); 73 return next_symbol; 74 } 75 /// \brief Returns the text associated with `symbol`. 76 const std::string& text(Symbol symbol) const { return *reverse_map_[symbol]; } 77 78 /// \brief Returns a string associated with `symbol` that disambiguates 79 /// nonces. 80 std::string PrettyText(Symbol symbol) const { 81 auto* text = reverse_map_[symbol]; 82 if (text == &unique_symbol_) { 83 return absl::StrCat("(unique#", std::to_string(symbol), ")"); 84 } else if (!text->empty() && RE2::FullMatch(*text, id_regex_)) { 85 return *text; 86 } else { 87 return absl::StrCat("\"", absl::CHexEscape(*text), "\""); 88 } 89 } 90 91 /// \brief Returns a `Symbol` that can never be spelled (but which still has 92 /// a printable name). 93 Symbol unique() { 94 reverse_map_.push_back(&unique_symbol_); 95 return reverse_map_.size() - 1; 96 } 97 98 private: 99 /// Maps text to unique `Symbol`s. 100 std::unordered_map<std::string, Symbol> symbols_; 101 /// Maps `Symbol`s back to their original text. 102 std::vector<const std::string*> reverse_map_; 103 /// The text to use for unique() symbols. 104 std::string unique_symbol_ = "(unique)"; 105 /// Used for quoting strings - see assertions.lex: 106 RE2 id_regex_; 107 }; 108 109 /// \brief Performs bump-pointer allocation of pointer-aligned memory. 110 /// 111 /// AST nodes do not need to be deallocated piecemeal. The interpreter 112 /// does not permit uncontrolled mutable state: `EVars` are always unset 113 /// when history is rewound, so links to younger memory that have leaked out 114 /// into older memory at a choice point are severed when that choice point is 115 /// reconsidered. This means that entire swaths of memory can safely be 116 /// deallocated at once without calling individual destructors. 117 /// 118 /// \warning Since `Arena`-allocated objects never have their destructors 119 /// called, any non-POD members they have will in turn never be destroyed. 120 class Arena { 121 public: 122 Arena() : next_block_index_(0) {} 123 124 ~Arena() { 125 for (auto& b : blocks_) { 126 delete[] b; 127 } 128 } 129 130 /// \brief Allocate `bytes` bytes, aligned to `kPointerSize`, allocating 131 /// new blocks from the system if necessary. 132 void* New(size_t bytes) { 133 // Align to kPointerSize bytes. 134 bytes = (bytes + kPointerSize - 1) & kPointerSizeMask; 135 CHECK(bytes < kBlockSize); 136 offset_ += bytes; 137 if (offset_ > kBlockSize) { 138 if (next_block_index_ == blocks_.size()) { 139 char* next_block = new char[kBlockSize]; 140 blocks_.push_back(next_block); 141 current_block_ = next_block; 142 } else { 143 current_block_ = blocks_[next_block_index_]; 144 } 145 ++next_block_index_; 146 offset_ = bytes; 147 } 148 return current_block_ + offset_ - bytes; 149 } 150 151 private: 152 /// The size of a pointer on this machine. We support only machines with 153 /// power-of-two address size and alignment requirements. 154 const size_t kPointerSize = sizeof(void*); 155 /// `kPointerSize` (a power of two) sign-extended from its first set bit. 156 const size_t kPointerSizeMask = ((~kPointerSize) + 1); 157 /// The size of allocation requests to make from the normal heap. 158 const size_t kBlockSize = 1024 * 64; 159 160 /// The next offset in the current block to allocate. Should always be 161 /// `<= kBlockSize`. If it is `== kBlockSize`, the current block is 162 /// exhausted and the `Arena` moves on to the next block, allocating one 163 /// if necessary. 164 size_t offset_ = kBlockSize; 165 /// The index of the next block to allocate from. Should always be 166 /// `<= blocks_.size()`. If it is `== blocks_.size()`, a new block is 167 /// allocated before the next `New` request completes. 168 size_t next_block_index_; 169 /// The block from which the `Arena` is currently making allocations. May 170 /// be `nullptr` if no allocations have yet been made. 171 char* current_block_; 172 /// All blocks that the `Arena` has allocated so far. 173 std::vector<char*> blocks_; 174 }; 175 176 /// \brief An object that can be allocated inside an `Arena`. 177 class ArenaObject { 178 public: 179 void* operator new(size_t size, Arena* arena) { return arena->New(size); } 180 void operator delete(void*, size_t) { 181 LOG(FATAL) << "Don't delete ArenaObjects."; 182 } 183 void operator delete(void* ptr, Arena* arena) { 184 LOG(FATAL) << "Don't delete ArenaObjects."; 185 } 186 }; 187 188 class App; 189 class EVar; 190 class Identifier; 191 class Range; 192 class Tuple; 193 194 /// \brief An object that is manipulated by the verifier during the course of 195 /// interpretation. 196 /// 197 /// Some `AstNode`s are dynamically created as part of the execution process. 198 /// Others are created during parsing. `AstNode`s are generally treated as 199 /// immutable after their initial construction phase except for certain notable 200 /// exceptions like `EVar`. 201 class AstNode : public ArenaObject { 202 public: 203 explicit AstNode(const yy::location& location) : location_(location) {} 204 205 /// \brief Returns the location where the `AstNode` was found if it came 206 /// from source text. 207 const yy::location& location() const { return location_; } 208 209 /// \brief Dumps the `AstNode` to `printer`. 210 virtual void Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) {} 211 212 virtual App* AsApp() { return nullptr; } 213 virtual EVar* AsEVar() { return nullptr; } 214 virtual Identifier* AsIdentifier() { return nullptr; } 215 virtual Range* AsRange() { return nullptr; } 216 virtual Tuple* AsTuple() { return nullptr; } 217 218 private: 219 /// \brief The location where the `AstNode` can be found in source text, if 220 /// any. 221 yy::location location_; 222 }; 223 224 /// \brief A range specification that can unify with one or more ranges. 225 class Range : public AstNode { 226 public: 227 Range(const yy::location& location, size_t begin, size_t end, Symbol path, 228 Symbol root, Symbol corpus) 229 : AstNode(location), 230 begin_(begin), 231 end_(end), 232 path_(path), 233 root_(root), 234 corpus_(corpus) {} 235 Range* AsRange() override { return this; } 236 void Dump(const SymbolTable&, PrettyPrinter*) override; 237 size_t begin() const { return begin_; } 238 size_t end() const { return end_; } 239 size_t path() const { return path_; } 240 size_t corpus() const { return corpus_; } 241 size_t root() const { return root_; } 242 243 private: 244 /// The start of the range in bytes. 245 size_t begin_; 246 /// The end of the range in bytes. 247 size_t end_; 248 /// The source file path. 249 Symbol path_; 250 /// The source file root. 251 Symbol root_; 252 /// The source file corpus. 253 Symbol corpus_; 254 }; 255 256 inline bool operator==(const Range& l, const Range& r) { 257 return l.begin() == r.begin() && l.end() == r.end() && l.path() == r.path() && 258 l.root() == r.root() && l.corpus() == r.corpus(); 259 } 260 261 inline bool operator!=(const Range& l, const Range& r) { return !(l == r); } 262 263 /// \brief A tuple of zero or more elements. 264 class Tuple : public AstNode { 265 public: 266 /// \brief Constructs a new `Tuple` 267 /// \param location Mark with this location 268 /// \param element_count The number of elements in the tuple 269 /// \param elements A preallocated buffer of `AstNode*` such that 270 /// the total size of the buffer is equal to 271 /// `element_count * sizeof(AstNode *)` 272 Tuple(const yy::location& location, size_t element_count, AstNode** elements) 273 : AstNode(location), element_count_(element_count), elements_(elements) {} 274 Tuple* AsTuple() override { return this; } 275 void Dump(const SymbolTable&, PrettyPrinter*) override; 276 /// \brief Returns the number of elements in the `Tuple`. 277 size_t size() const { return element_count_; } 278 /// \brief Returns the `index`th element of the `Tuple`, counting from zero. 279 AstNode* element(size_t index) const { 280 CHECK(index < element_count_); 281 return elements_[index]; 282 } 283 284 private: 285 /// The number of `AstNode *`s in `elements_` 286 size_t element_count_; 287 /// Storage for the `Tuple`'s elements. 288 AstNode** elements_; 289 }; 290 291 /// \brief An application (eg, `f(g)`). 292 /// 293 /// Generally an `App` will combine some `Identifier` head with a `Tuple` body, 294 /// but this is not necessarily the case. 295 class App : public AstNode { 296 public: 297 /// \brief Constructs a new `App` node, taking its location from the 298 /// location of the left-hand side. 299 /// \param lhs The left-hand side of the application (eg, an `Identifier`). 300 /// \param rhs The right-hand side of the application (eg, a `Tuple`). 301 App(AstNode* lhs, AstNode* rhs) 302 : AstNode(lhs->location()), lhs_(lhs), rhs_(rhs) {} 303 /// \brief Constructs a new `App` node with an explicit `location`. 304 /// \param `location` The location to use for this `App`. 305 /// \param lhs The left-hand side of the application (eg, an `Identifier`). 306 /// \param rhs The right-hand side of the application (eg, a `Tuple`). 307 App(const yy::location& location, AstNode* lhs, AstNode* rhs) 308 : AstNode(location), lhs_(lhs), rhs_(rhs) {} 309 310 App* AsApp() override { return this; } 311 void Dump(const SymbolTable&, PrettyPrinter*) override; 312 313 /// \brief The left-hand side (`f` in `f(g)`) of this `App` 314 AstNode* lhs() const { return lhs_; } 315 /// \brief The right-hand side (`(g)` in `f(g)`) of this `App 316 AstNode* rhs() const { return rhs_; } 317 318 private: 319 AstNode* lhs_; 320 AstNode* rhs_; 321 }; 322 323 /// \brief An identifier (corresponding to some `Symbol`). 324 class Identifier : public AstNode { 325 public: 326 Identifier(const yy::location& location, Symbol symbol) 327 : AstNode(location), symbol_(symbol) {} 328 /// \brief The `Symbol` this `Identifier` represents. 329 Symbol symbol() const { return symbol_; } 330 Identifier* AsIdentifier() override { return this; } 331 void Dump(const SymbolTable&, PrettyPrinter*) override; 332 333 private: 334 Symbol symbol_; 335 }; 336 337 /// \brief An existential variable. 338 /// 339 /// `EVars` are given assignments while the verifier solves for its goals. 340 /// Once an `EVar` is given an assignment, that assignment will not change, 341 /// unless it is undone by backtracking. 342 class EVar : public AstNode { 343 public: 344 /// Constructs a new `EVar` with no assignment. 345 explicit EVar(const yy::location& location) 346 : AstNode(location), current_(nullptr) {} 347 EVar* AsEVar() override { return this; } 348 void Dump(const SymbolTable&, PrettyPrinter*) override; 349 350 /// \brief Returns current assignment, or `nullptr` if one has not been made. 351 AstNode* current() { return current_; } 352 353 /// \brief Assigns this `EVar`. 354 void set_current(AstNode* node) { current_ = node; } 355 356 private: 357 /// The `EVar`'s current assignment or `nullptr`. 358 AstNode* current_; 359 }; 360 361 /// \brief A set of goals to be handled atomically. 362 struct GoalGroup { 363 enum AcceptanceCriterion { 364 kNoneMayFail, ///< For this group to pass, no goals may fail. 365 kSomeMustFail ///< For this group to pass, some goals must fail. 366 }; 367 AcceptanceCriterion accept_if; ///< How this group is handled. 368 std::vector<AstNode*> goals; ///< Grouped goals, implicitly conjoined. 369 }; 370 371 /// \brief A database of fact-shaped AstNodes. 372 using Database = std::vector<AstNode*>; 373 374 /// \brief Multimap from anchor offsets to anchor VName tuples. 375 using AnchorMap = std::multimap<std::pair<size_t, size_t>, AstNode*>; 376 377 /// An EVar whose assignment is interesting to display. 378 struct Inspection { 379 public: 380 enum class Kind { 381 EXPLICIT, ///< The user requested this inspection (with "?"). 382 IMPLICIT ///< This inspection was added by default. 383 }; 384 std::string label; ///< A label for user reference. 385 EVar* evar; ///< The EVar to inspect. 386 Kind kind; ///< Whether this inspection was added by default. 387 Inspection(const std::string& label, EVar* evar, Kind kind) 388 : label(label), evar(evar), kind(kind) {} 389 }; 390 391 } // namespace verifier 392 } // namespace kythe 393 394 // Required by generated code. 395 #define YY_DECL \ 396 int kythe::verifier::AssertionParser::lex( \ 397 YySemanticValue* yylval_param, yy::location* yylloc, \ 398 ::kythe::verifier::AssertionParser& context) 399 namespace kythe { 400 namespace verifier { 401 class AssertionParser; 402 } 403 } // namespace kythe 404 struct YySemanticValue { 405 std::string string; 406 kythe::verifier::AstNode* node; 407 int int_; 408 size_t size_t_; 409 }; 410 #define YYSTYPE YySemanticValue 411 412 #endif // KYTHE_CXX_VERIFIER_ASSERTION_AST_H_