kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/verifier/verifier.h (about) 1 /* 2 * Copyright 2014 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef KYTHE_CXX_VERIFIER_H_ 18 #define KYTHE_CXX_VERIFIER_H_ 19 20 #include <functional> 21 #include <optional> 22 #include <string> 23 24 #include "absl/container/flat_hash_map.h" 25 #include "absl/types/span.h" 26 #include "assertions.h" 27 #include "kythe/proto/common.pb.h" 28 #include "kythe/proto/storage.pb.h" 29 30 namespace kythe { 31 namespace verifier { 32 /// \brief Runs logic programs. 33 /// 34 /// The `Verifier` combines an `AssertionContext` with a database of Kythe 35 /// facts. It can be used to determine whether the goals specified in the 36 /// `AssertionContext` are satisfiable. 37 class Verifier { 38 public: 39 /// \param trace_lex Dump lexing debug information 40 /// \param trace_parse Dump parsing debug information 41 explicit Verifier(bool trace_lex = false, bool trace_parse = false); 42 43 /// \brief Loads an in-memory source file. 44 /// \param filename The name to use for the file; may be blank. 45 /// \param vname The AstNode of the vname for the file. 46 /// \param text The symbol for the text to load 47 /// \return false if we failed. 48 bool LoadInMemoryRuleFile(const std::string& filename, AstNode* vname, 49 Symbol text); 50 51 /// \brief Loads a source file with goal comments indicating rules and data. 52 /// The VName for the source file will be determined by matching its content 53 /// against file nodes. 54 /// \param filename The filename to load 55 /// \return false if we failed 56 bool LoadInlineRuleFile(const std::string& filename); 57 58 /// \brief Loads a text proto with goal comments indicating rules and data. 59 /// The VName for the source file will be blank. 60 /// \param file_data The data to load 61 /// \param path the path to use for anchors 62 /// \param root the root to use for anchors 63 /// \param corpus the corpus to use for anchors 64 /// \return false if we failed 65 bool LoadInlineProtoFile(const std::string& file_data, 66 absl::string_view path = "", 67 absl::string_view root = "", 68 absl::string_view corpus = ""); 69 70 /// \brief During verification, ignore duplicate facts. 71 void IgnoreDuplicateFacts(); 72 73 /// \brief During verification, ignore conflicting /kythe/code facts. 74 void IgnoreCodeConflicts(); 75 76 /// \brief Save results of verification keyed by inspection label. 77 void SaveEVarAssignments(); 78 79 /// \brief Emit verbose logging. 80 void Verbose(); 81 82 /// \brief Dump all goals to standard out. 83 void ShowGoals(); 84 85 /// \brief Prints out a particular goal with its original source location 86 /// to standard error. 87 /// \param group_index The index of the goal's group. 88 /// \param goal_index The index of the goal to print 89 /// \sa highest_goal_reached, highest_group_reached 90 void DumpErrorGoal(size_t group_index, size_t goal_index); 91 92 /// \brief Dump known facts to standard out as a GraphViz graph. 93 void DumpAsDot(); 94 95 /// \brief Dump known facts to standard out as JSON. 96 void DumpAsJson(); 97 98 /// \brief Attempts to satisfy all goals from all loaded rule files and facts. 99 /// \param inspect function to call on any inspection request 100 /// \return true if all goals could be satisfied. 101 bool VerifyAllGoals(std::function<bool(Verifier* context, const Inspection&, 102 std::string_view)> 103 inspect); 104 105 /// \brief Attempts to satisfy all goals from all loaded rule files and facts. 106 /// \return true if all goals could be satisfied. 107 bool VerifyAllGoals(); 108 109 /// \brief Adds a single Kythe fact to the database. 110 /// \param database_name some name used to define the database; should live 111 /// as long as the `Verifier`. Used only for diagnostics. 112 /// \param fact_id some identifier for the fact. Used only for diagnostics. 113 /// \return false if something went wrong. 114 bool AssertSingleFact(std::string* database_name, unsigned int fact_id, 115 const kythe::proto::Entry& entry); 116 117 /// \brief Perform basic well-formedness checks on the input database. 118 /// \pre The database contains only fact-shaped terms, as generated by 119 /// `AssertSingleFact`. 120 /// \return false if the database was not well-formed. 121 bool PrepareDatabase(); 122 123 /// Arena for allocating memory for both static data loaded from the database 124 /// and dynamic data allocated during the course of evaluation. 125 Arena* arena() { return &arena_; } 126 127 /// Symbol table for uniquing strings. 128 SymbolTable* symbol_table() { return &symbol_table_; } 129 130 /// \brief Allocates an identifier for some token. 131 /// \param location The source location for the identifier. 132 /// \param token The text of the identifier. 133 /// \return An `Identifier`. This may not be unique. 134 Identifier* IdentifierFor(const yy::location& location, 135 const std::string& token); 136 137 /// \brief Stringifies an integer, then makes an identifier out of it. 138 /// \param location The source location for the identifier. 139 /// \param integer The integer to stringify. 140 /// \return An `Identifier`. This may not be unique. 141 Identifier* IdentifierFor(const yy::location& location, int integer); 142 143 /// \brief Convenience function to make `(App head (Tuple values))`. 144 /// \param location The source location for the predicate. 145 /// \param head The lhs of the `App` to allocate. 146 /// \param values The body of the `Tuple` to allocate. 147 AstNode* MakePredicate(const yy::location& location, AstNode* head, 148 absl::Span<AstNode* const> values); 149 150 /// \brief The head used for equality predicates. 151 Identifier* eq_id() { return eq_id_; } 152 153 /// \brief The head used for any VName predicate. 154 AstNode* vname_id() { return vname_id_; } 155 156 /// \brief The head used for any Fact predicate. 157 AstNode* fact_id() { return fact_id_; } 158 159 /// \brief The fact kind for an root/empty fact label. 160 AstNode* root_id() { return root_id_; } 161 162 /// \brief The empty string as an identifier. 163 AstNode* empty_string_id() { return empty_string_id_; } 164 165 /// \brief The fact kind for an edge ordinal. 166 AstNode* ordinal_id() { return ordinal_id_; } 167 168 /// \brief The fact kind used to assign a node its kind (eg /kythe/node/kind). 169 AstNode* kind_id() { return kind_id_; } 170 171 /// \brief The fact kind used for an anchor. 172 AstNode* anchor_id() { return anchor_id_; } 173 174 /// \brief The fact kind used for a file. 175 AstNode* file_id() { return file_id_; } 176 177 /// \brief Object for parsing and storing assertions. 178 AssertionParser* parser() { return &parser_; } 179 180 /// \brief Returns the highest group index the verifier reached during 181 /// solving. 182 size_t highest_group_reached() const { return highest_group_reached_; } 183 184 /// \brief Returns the highest goal index the verifier reached during 185 /// solving. 186 size_t highest_goal_reached() const { return highest_goal_reached_; } 187 188 /// \brief Change the regex used to identify goals in source text. 189 /// \return false on failure. 190 bool SetGoalCommentRegex(const std::string& regex, std::string* error); 191 192 /// \brief Use a prefix to match goals in source text. 193 void SetGoalCommentPrefix(const std::string& it); 194 195 /// \brief Look for assertions in file node text. 196 void UseFileNodes() { assertions_from_file_nodes_ = true; } 197 198 /// \brief Only raise a warning if a file VName is missing. 199 void AllowMissingFileVNames() { allow_missing_file_vnames_ = true; } 200 201 /// \brief Convert MarkedSource-valued facts to graphs. 202 void ConvertMarkedSource() { convert_marked_source_ = true; } 203 204 /// \brief Show anchor locations in graph dumps (instead of @). 205 void ShowAnchors() { show_anchors_ = true; } 206 207 /// \brief Show VNames for nodes which also have labels in graph dumps. 208 void ShowLabeledVnames() { show_labeled_vnames_ = true; } 209 210 /// \brief Show the /kythe and /kythe/edge prefixes in graph dumps. 211 void ShowFactPrefix() { show_fact_prefix_ = true; } 212 213 /// \brief Elide unlabeled nodes from graph dumps. 214 void ElideUnlabeled() { show_unlabeled_ = false; } 215 216 /// \brief Check for singleton EVars. 217 /// \return true if there were singletons. 218 bool CheckForSingletonEVars() { return parser_.CheckForSingletonEVars(); } 219 220 /// \brief Don't search for file vnames. 221 void IgnoreFileVnames() { file_vnames_ = false; } 222 223 /// \brief Use the fast solver. 224 void UseFastSolver(bool value) { use_fast_solver_ = value; } 225 226 /// \brief Gets a string representation of `i`. 227 /// \deprecated Inspection callbacks will be provided with strings and 228 /// will no longer have access to the internal AST. 229 std::string InspectionString(const Inspection& i); 230 231 /// \brief Use `corpus` for file nodes without a corpus set. 232 void UseDefaultFileCorpus(const std::string& corpus) { 233 default_file_corpus_ = IdentifierFor(builtin_location_, corpus); 234 } 235 236 private: 237 using InternedVName = std::tuple<Symbol, Symbol, Symbol, Symbol, Symbol>; 238 239 /// \brief Interns an AST node known to be a VName. 240 /// \param node the node to intern. 241 InternedVName InternVName(AstNode* node); 242 243 /// \return a new vname with its corpus filled with the default file corpus 244 /// if `node` is a vname without a corpus set; otherwise `node`. 245 AstNode* FixFileVName(AstNode* node); 246 247 /// \brief Generate a VName that will not conflict with any other VName. 248 AstNode* NewUniqueVName(const yy::location& loc); 249 250 /// \brief Converts an encoded /kythe/code fact to a form that's useful 251 /// to the verifier. 252 /// \param loc The location to use in diagnostics. 253 /// \return null if something went wrong; otherwise, an AstNode corresponding 254 /// to a VName of a synthetic node for `code_data`. 255 AstNode* ConvertCodeFact(const yy::location& loc, 256 const std::string& code_data); 257 258 /// \brief Converts an encoded /kythe/code/json fact to a form that's useful 259 /// to the verifier. 260 /// \param loc The location to use in diagnostics. 261 /// \return null if something went wrong; otherwise, an AstNode corresponding 262 /// to a VName of a synthetic node for `code_data`. 263 AstNode* ConvertCodeJsonFact(const yy::location& loc, 264 const std::string& code_data); 265 266 /// \brief Converts a MarkedSource message to a form that's useful 267 /// to the verifier. 268 /// \param loc The location to use in diagnostics. 269 /// \return null if something went wrong; otherwise, an AstNode corresponding 270 /// to a VName of a synthetic node for `marked_source`. 271 AstNode* ConvertMarkedSource( 272 const yy::location& loc, 273 const kythe::proto::common::MarkedSource& marked_source); 274 275 /// \brief Converts a VName proto to its AST representation. 276 AstNode* ConvertVName(const yy::location& location, 277 const kythe::proto::VName& vname); 278 279 /// \brief Adds an anchor VName. 280 void AddAnchor(AstNode* vname, size_t begin, size_t end) { 281 anchors_.emplace(std::make_pair(begin, end), vname); 282 } 283 284 /// \brief Processes a fact tuple for the fast solver. 285 /// \param tuple the five-tuple representation of a fact 286 /// \return true if successful. 287 bool ProcessFactTupleForFastSolver(Tuple* tuple); 288 289 /// \sa parser() 290 AssertionParser parser_; 291 292 /// \sa arena() 293 Arena arena_; 294 295 /// \sa symbol_table() 296 SymbolTable symbol_table_; 297 298 /// All known facts. 299 Database facts_; 300 301 /// Maps anchor offsets to anchor VName tuples. 302 AnchorMap anchors_; 303 304 /// Has the database been prepared? 305 bool database_prepared_ = false; 306 307 /// Emit verbose logging? 308 bool verbose_ = false; 309 310 /// Ignore duplicate facts during verification? 311 bool ignore_dups_ = false; 312 313 /// Ignore conflicting /kythe/code facts during verification? 314 bool ignore_code_conflicts_ = false; 315 316 /// Filename to use for builtin constants. 317 std::string builtin_location_name_; 318 319 /// Location to use for builtin constants. 320 yy::location builtin_location_; 321 322 /// Node to use for the `=` identifier. 323 Identifier* eq_id_; 324 325 /// Node to use for the `vname` constant. 326 AstNode* vname_id_; 327 328 /// Node to use for the `fact` constant. 329 AstNode* fact_id_; 330 331 /// Node to use for the `/` constant. 332 AstNode* root_id_; 333 334 /// Node to use for the empty string constant. 335 AstNode* empty_string_id_; 336 337 /// Node to use for the `/kythe/ordinal` constant. 338 AstNode* ordinal_id_; 339 340 /// Node to use for the `/kythe/node/kind` constant. 341 AstNode* kind_id_; 342 343 /// Node to use for the `anchor` constant. 344 AstNode* anchor_id_; 345 346 /// Node to use for the `/kythe/loc/start` constant. 347 AstNode* start_id_; 348 349 /// Node to use for the `/kythe/loc/end` constant. 350 AstNode* end_id_; 351 352 /// Node to use for the `file` node kind. 353 AstNode* file_id_; 354 355 /// Node to use for the `text` fact kind. 356 AstNode* text_id_; 357 358 /// Node to use for the `code` fact kind. The fact value should be a 359 /// serialized kythe.proto.MarkedSource message. 360 AstNode* code_id_; 361 362 /// Node to use for the `code/json` fact kind. The fact value should be a 363 /// JSON-serialized kythe.proto.MarkedSource message. 364 AstNode* code_json_id_; 365 366 /// The highest goal group reached during solving (often the culprit for why 367 /// the solution failed). 368 size_t highest_group_reached_ = 0; 369 370 /// The highest goal reached during solving (often the culprit for why 371 /// the solution failed). 372 size_t highest_goal_reached_ = 0; 373 374 /// Whether we save assignments to EVars (by inspection label). 375 bool saving_assignments_ = false; 376 377 /// A map from inspection label to saved assignment. Note that 378 /// duplicate labels will overwrite one another. This means that 379 /// it's important to disambiguate cases where this is likely 380 /// (e.g., we add line and column information to labels we generate 381 /// for anchors). 382 absl::flat_hash_map<std::string, std::string> saved_assignments_; 383 384 /// Maps from pretty-printed vnames to (parsed) file node text. 385 std::map<std::string, Symbol> fake_files_; 386 387 /// Read assertions from file nodes. 388 bool assertions_from_file_nodes_ = false; 389 390 /// The regex to look for to identify goal comments. Should have one match 391 /// group. 392 std::unique_ptr<RE2> goal_comment_regex_; 393 394 /// If true, convert MarkedSource-valued facts to subgraphs. If false, 395 /// MarkedSource-valued facts will be replaced with opaque but unique 396 /// identifiers. 397 bool convert_marked_source_ = false; 398 399 /// If true, show anchor locations in graph dumps (instead of @). 400 bool show_anchors_ = false; 401 402 /// If true, show unlabeled nodes in graph dumps. 403 bool show_unlabeled_ = true; 404 405 /// If true, show VNames for labeled nodes in graph dumps. 406 bool show_labeled_vnames_ = false; 407 408 /// If true, include the /kythe and /kythe/edge prefix on facts and edges. 409 bool show_fact_prefix_ = false; 410 411 /// Identifier for MarkedSource child edges. 412 AstNode* marked_source_child_id_; 413 414 /// Identifier for MarkedSource code edges. 415 AstNode* marked_source_code_edge_id_; 416 417 /// Identifier for MarkedSource BOX kinds. 418 AstNode* marked_source_box_id_; 419 420 /// Identifier for MarkedSource TYPE kinds. 421 AstNode* marked_source_type_id_; 422 423 /// Identifier for MarkedSource PARAMETER kinds. 424 AstNode* marked_source_parameter_id_; 425 426 /// Identifier for MarkedSource IDENTIFIER kinds. 427 AstNode* marked_source_identifier_id_; 428 429 /// Identifier for MarkedSource CONTEXT kinds. 430 AstNode* marked_source_context_id_; 431 432 /// Identifier for MarkedSource INITIALIZER kinds. 433 AstNode* marked_source_initializer_id_; 434 435 /// Identifier for MarkedSource MODIFIER kinds. 436 AstNode* marked_source_modifier_id_; 437 438 /// Identifier for MarkedSource PARAMETER_LOOKUP_BY_PARAM kinds. 439 AstNode* marked_source_parameter_lookup_by_param_id_; 440 441 /// Identifier for MarkedSource LOOKUP_BY_PARAM kinds. 442 AstNode* marked_source_lookup_by_param_id_; 443 444 /// Identifier for MarkedSource PARAMETER_LOOKUP_BY_TPARAM kinds. 445 AstNode* marked_source_parameter_lookup_by_tparam_id_; 446 447 /// Identifier for MarkedSource LOOKUP_BY_TPARAM kinds. 448 AstNode* marked_source_lookup_by_tparam_id_; 449 450 /// Identifier for MarkedSource LOOKUP_BY_PARAM_WITH_DEFAULTS kinds. 451 AstNode* marked_source_parameter_lookup_by_param_with_defaults_id_; 452 453 /// Identifier for MarkedSource LOOKUP_BY_TYPED kinds. 454 AstNode* marked_source_lookup_by_typed_id_; 455 456 /// Identifier for MarkedSource kind facts. 457 AstNode* marked_source_kind_id_; 458 459 /// Identifier for MarkedSource pre_text facts. 460 AstNode* marked_source_pre_text_id_; 461 462 /// Identifier for MarkedSource post_child_text facts. 463 AstNode* marked_source_post_child_text_id_; 464 465 /// Identifier for MarkedSource post_text facts. 466 AstNode* marked_source_post_text_id_; 467 468 /// Identifier for MarkedSource lookup_index facts. 469 AstNode* marked_source_lookup_index_id_; 470 471 /// Identifier for MarkedSource default_children_count facts. 472 AstNode* marked_source_default_children_count_id_; 473 474 /// Identifier for MarkedSource add_final_list_token facts. 475 AstNode* marked_source_add_final_list_token_id_; 476 477 /// Identifier for MarkedSource link edges. 478 AstNode* marked_source_link_id_; 479 480 /// Identifier for MarkedSource true values. 481 AstNode* marked_source_true_id_; 482 483 /// Identifier for MarkedSource false values. 484 AstNode* marked_source_false_id_; 485 486 /// Maps from file content to (verified) VName. 487 absl::flat_hash_map<Symbol, AstNode*> content_to_vname_; 488 489 /// Find file vnames by examining file content. 490 bool file_vnames_ = true; 491 492 /// If file_vnames_ is true, only warn if we can't find a file's VName. 493 bool allow_missing_file_vnames_ = false; 494 495 /// Use the fast solver. 496 bool use_fast_solver_ = false; 497 498 /// Sentinel value for a known file. 499 Symbol known_file_sym_; 500 501 /// Sentinel value for a known nonfile. 502 Symbol known_not_file_sym_; 503 504 /// Maps VNames to known_file_sym_, known_not_file_sym_, or file text. 505 absl::flat_hash_map<InternedVName, Symbol> fast_solver_files_; 506 507 /// File corpus to use if none is set on a file node. 508 AstNode* default_file_corpus_; 509 510 /// The symbol for the empty string. 511 Symbol empty_string_sym_; 512 }; 513 514 } // namespace verifier 515 } // namespace kythe 516 517 #endif // KYTHE_CXX_VERIFIER_H_