kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/verifier/verifier.h (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #ifndef KYTHE_CXX_VERIFIER_H_
    18  #define KYTHE_CXX_VERIFIER_H_
    19  
    20  #include <functional>
    21  #include <optional>
    22  #include <string>
    23  
    24  #include "absl/container/flat_hash_map.h"
    25  #include "absl/types/span.h"
    26  #include "assertions.h"
    27  #include "kythe/proto/common.pb.h"
    28  #include "kythe/proto/storage.pb.h"
    29  
    30  namespace kythe {
    31  namespace verifier {
    32  /// \brief Runs logic programs.
    33  ///
    34  /// The `Verifier` combines an `AssertionContext` with a database of Kythe
    35  /// facts. It can be used to determine whether the goals specified in the
    36  /// `AssertionContext` are satisfiable.
    37  class Verifier {
    38   public:
    39    /// \param trace_lex Dump lexing debug information
    40    /// \param trace_parse Dump parsing debug information
    41    explicit Verifier(bool trace_lex = false, bool trace_parse = false);
    42  
    43    /// \brief Loads an in-memory source file.
    44    /// \param filename The name to use for the file; may be blank.
    45    /// \param vname The AstNode of the vname for the file.
    46    /// \param text The symbol for the text to load
    47    /// \return false if we failed.
    48    bool LoadInMemoryRuleFile(const std::string& filename, AstNode* vname,
    49                              Symbol text);
    50  
    51    /// \brief Loads a source file with goal comments indicating rules and data.
    52    /// The VName for the source file will be determined by matching its content
    53    /// against file nodes.
    54    /// \param filename The filename to load
    55    /// \return false if we failed
    56    bool LoadInlineRuleFile(const std::string& filename);
    57  
    58    /// \brief Loads a text proto with goal comments indicating rules and data.
    59    /// The VName for the source file will be blank.
    60    /// \param file_data The data to load
    61    /// \param path the path to use for anchors
    62    /// \param root the root to use for anchors
    63    /// \param corpus the corpus to use for anchors
    64    /// \return false if we failed
    65    bool LoadInlineProtoFile(const std::string& file_data,
    66                             absl::string_view path = "",
    67                             absl::string_view root = "",
    68                             absl::string_view corpus = "");
    69  
    70    /// \brief During verification, ignore duplicate facts.
    71    void IgnoreDuplicateFacts();
    72  
    73    /// \brief During verification, ignore conflicting /kythe/code facts.
    74    void IgnoreCodeConflicts();
    75  
    76    /// \brief Save results of verification keyed by inspection label.
    77    void SaveEVarAssignments();
    78  
    79    /// \brief Emit verbose logging.
    80    void Verbose();
    81  
    82    /// \brief Dump all goals to standard out.
    83    void ShowGoals();
    84  
    85    /// \brief Prints out a particular goal with its original source location
    86    /// to standard error.
    87    /// \param group_index The index of the goal's group.
    88    /// \param goal_index The index of the goal to print
    89    /// \sa highest_goal_reached, highest_group_reached
    90    void DumpErrorGoal(size_t group_index, size_t goal_index);
    91  
    92    /// \brief Dump known facts to standard out as a GraphViz graph.
    93    void DumpAsDot();
    94  
    95    /// \brief Dump known facts to standard out as JSON.
    96    void DumpAsJson();
    97  
    98    /// \brief Attempts to satisfy all goals from all loaded rule files and facts.
    99    /// \param inspect function to call on any inspection request
   100    /// \return true if all goals could be satisfied.
   101    bool VerifyAllGoals(std::function<bool(Verifier* context, const Inspection&,
   102                                           std::string_view)>
   103                            inspect);
   104  
   105    /// \brief Attempts to satisfy all goals from all loaded rule files and facts.
   106    /// \return true if all goals could be satisfied.
   107    bool VerifyAllGoals();
   108  
   109    /// \brief Adds a single Kythe fact to the database.
   110    /// \param database_name some name used to define the database; should live
   111    /// as long as the `Verifier`. Used only for diagnostics.
   112    /// \param fact_id some identifier for the fact. Used only for diagnostics.
   113    /// \return false if something went wrong.
   114    bool AssertSingleFact(std::string* database_name, unsigned int fact_id,
   115                          const kythe::proto::Entry& entry);
   116  
   117    /// \brief Perform basic well-formedness checks on the input database.
   118    /// \pre The database contains only fact-shaped terms, as generated by
   119    /// `AssertSingleFact`.
   120    /// \return false if the database was not well-formed.
   121    bool PrepareDatabase();
   122  
   123    /// Arena for allocating memory for both static data loaded from the database
   124    /// and dynamic data allocated during the course of evaluation.
   125    Arena* arena() { return &arena_; }
   126  
   127    /// Symbol table for uniquing strings.
   128    SymbolTable* symbol_table() { return &symbol_table_; }
   129  
   130    /// \brief Allocates an identifier for some token.
   131    /// \param location The source location for the identifier.
   132    /// \param token The text of the identifier.
   133    /// \return An `Identifier`. This may not be unique.
   134    Identifier* IdentifierFor(const yy::location& location,
   135                              const std::string& token);
   136  
   137    /// \brief Stringifies an integer, then makes an identifier out of it.
   138    /// \param location The source location for the identifier.
   139    /// \param integer The integer to stringify.
   140    /// \return An `Identifier`. This may not be unique.
   141    Identifier* IdentifierFor(const yy::location& location, int integer);
   142  
   143    /// \brief Convenience function to make `(App head (Tuple values))`.
   144    /// \param location The source location for the predicate.
   145    /// \param head The lhs of the `App` to allocate.
   146    /// \param values The body of the `Tuple` to allocate.
   147    AstNode* MakePredicate(const yy::location& location, AstNode* head,
   148                           absl::Span<AstNode* const> values);
   149  
   150    /// \brief The head used for equality predicates.
   151    Identifier* eq_id() { return eq_id_; }
   152  
   153    /// \brief The head used for any VName predicate.
   154    AstNode* vname_id() { return vname_id_; }
   155  
   156    /// \brief The head used for any Fact predicate.
   157    AstNode* fact_id() { return fact_id_; }
   158  
   159    /// \brief The fact kind for an root/empty fact label.
   160    AstNode* root_id() { return root_id_; }
   161  
   162    /// \brief The empty string as an identifier.
   163    AstNode* empty_string_id() { return empty_string_id_; }
   164  
   165    /// \brief The fact kind for an edge ordinal.
   166    AstNode* ordinal_id() { return ordinal_id_; }
   167  
   168    /// \brief The fact kind used to assign a node its kind (eg /kythe/node/kind).
   169    AstNode* kind_id() { return kind_id_; }
   170  
   171    /// \brief The fact kind used for an anchor.
   172    AstNode* anchor_id() { return anchor_id_; }
   173  
   174    /// \brief The fact kind used for a file.
   175    AstNode* file_id() { return file_id_; }
   176  
   177    /// \brief Object for parsing and storing assertions.
   178    AssertionParser* parser() { return &parser_; }
   179  
   180    /// \brief Returns the highest group index the verifier reached during
   181    /// solving.
   182    size_t highest_group_reached() const { return highest_group_reached_; }
   183  
   184    /// \brief Returns the highest goal index the verifier reached during
   185    /// solving.
   186    size_t highest_goal_reached() const { return highest_goal_reached_; }
   187  
   188    /// \brief Change the regex used to identify goals in source text.
   189    /// \return false on failure.
   190    bool SetGoalCommentRegex(const std::string& regex, std::string* error);
   191  
   192    /// \brief Use a prefix to match goals in source text.
   193    void SetGoalCommentPrefix(const std::string& it);
   194  
   195    /// \brief Look for assertions in file node text.
   196    void UseFileNodes() { assertions_from_file_nodes_ = true; }
   197  
   198    /// \brief Only raise a warning if a file VName is missing.
   199    void AllowMissingFileVNames() { allow_missing_file_vnames_ = true; }
   200  
   201    /// \brief Convert MarkedSource-valued facts to graphs.
   202    void ConvertMarkedSource() { convert_marked_source_ = true; }
   203  
   204    /// \brief Show anchor locations in graph dumps (instead of @).
   205    void ShowAnchors() { show_anchors_ = true; }
   206  
   207    /// \brief Show VNames for nodes which also have labels in graph dumps.
   208    void ShowLabeledVnames() { show_labeled_vnames_ = true; }
   209  
   210    /// \brief Show the /kythe and /kythe/edge prefixes in graph dumps.
   211    void ShowFactPrefix() { show_fact_prefix_ = true; }
   212  
   213    /// \brief Elide unlabeled nodes from graph dumps.
   214    void ElideUnlabeled() { show_unlabeled_ = false; }
   215  
   216    /// \brief Check for singleton EVars.
   217    /// \return true if there were singletons.
   218    bool CheckForSingletonEVars() { return parser_.CheckForSingletonEVars(); }
   219  
   220    /// \brief Don't search for file vnames.
   221    void IgnoreFileVnames() { file_vnames_ = false; }
   222  
   223    /// \brief Use the fast solver.
   224    void UseFastSolver(bool value) { use_fast_solver_ = value; }
   225  
   226    /// \brief Gets a string representation of `i`.
   227    /// \deprecated Inspection callbacks will be provided with strings and
   228    /// will no longer have access to the internal AST.
   229    std::string InspectionString(const Inspection& i);
   230  
   231    /// \brief Use `corpus` for file nodes without a corpus set.
   232    void UseDefaultFileCorpus(const std::string& corpus) {
   233      default_file_corpus_ = IdentifierFor(builtin_location_, corpus);
   234    }
   235  
   236   private:
   237    using InternedVName = std::tuple<Symbol, Symbol, Symbol, Symbol, Symbol>;
   238  
   239    /// \brief Interns an AST node known to be a VName.
   240    /// \param node the node to intern.
   241    InternedVName InternVName(AstNode* node);
   242  
   243    /// \return a new vname with its corpus filled with the default file corpus
   244    /// if `node` is a vname without a corpus set; otherwise `node`.
   245    AstNode* FixFileVName(AstNode* node);
   246  
   247    /// \brief Generate a VName that will not conflict with any other VName.
   248    AstNode* NewUniqueVName(const yy::location& loc);
   249  
   250    /// \brief Converts an encoded /kythe/code fact to a form that's useful
   251    /// to the verifier.
   252    /// \param loc The location to use in diagnostics.
   253    /// \return null if something went wrong; otherwise, an AstNode corresponding
   254    /// to a VName of a synthetic node for `code_data`.
   255    AstNode* ConvertCodeFact(const yy::location& loc,
   256                             const std::string& code_data);
   257  
   258    /// \brief Converts an encoded /kythe/code/json fact to a form that's useful
   259    /// to the verifier.
   260    /// \param loc The location to use in diagnostics.
   261    /// \return null if something went wrong; otherwise, an AstNode corresponding
   262    /// to a VName of a synthetic node for `code_data`.
   263    AstNode* ConvertCodeJsonFact(const yy::location& loc,
   264                                 const std::string& code_data);
   265  
   266    /// \brief Converts a MarkedSource message to a form that's useful
   267    /// to the verifier.
   268    /// \param loc The location to use in diagnostics.
   269    /// \return null if something went wrong; otherwise, an AstNode corresponding
   270    /// to a VName of a synthetic node for `marked_source`.
   271    AstNode* ConvertMarkedSource(
   272        const yy::location& loc,
   273        const kythe::proto::common::MarkedSource& marked_source);
   274  
   275    /// \brief Converts a VName proto to its AST representation.
   276    AstNode* ConvertVName(const yy::location& location,
   277                          const kythe::proto::VName& vname);
   278  
   279    /// \brief Adds an anchor VName.
   280    void AddAnchor(AstNode* vname, size_t begin, size_t end) {
   281      anchors_.emplace(std::make_pair(begin, end), vname);
   282    }
   283  
   284    /// \brief Processes a fact tuple for the fast solver.
   285    /// \param tuple the five-tuple representation of a fact
   286    /// \return true if successful.
   287    bool ProcessFactTupleForFastSolver(Tuple* tuple);
   288  
   289    /// \sa parser()
   290    AssertionParser parser_;
   291  
   292    /// \sa arena()
   293    Arena arena_;
   294  
   295    /// \sa symbol_table()
   296    SymbolTable symbol_table_;
   297  
   298    /// All known facts.
   299    Database facts_;
   300  
   301    /// Maps anchor offsets to anchor VName tuples.
   302    AnchorMap anchors_;
   303  
   304    /// Has the database been prepared?
   305    bool database_prepared_ = false;
   306  
   307    /// Emit verbose logging?
   308    bool verbose_ = false;
   309  
   310    /// Ignore duplicate facts during verification?
   311    bool ignore_dups_ = false;
   312  
   313    /// Ignore conflicting /kythe/code facts during verification?
   314    bool ignore_code_conflicts_ = false;
   315  
   316    /// Filename to use for builtin constants.
   317    std::string builtin_location_name_;
   318  
   319    /// Location to use for builtin constants.
   320    yy::location builtin_location_;
   321  
   322    /// Node to use for the `=` identifier.
   323    Identifier* eq_id_;
   324  
   325    /// Node to use for the `vname` constant.
   326    AstNode* vname_id_;
   327  
   328    /// Node to use for the `fact` constant.
   329    AstNode* fact_id_;
   330  
   331    /// Node to use for the `/` constant.
   332    AstNode* root_id_;
   333  
   334    /// Node to use for the empty string constant.
   335    AstNode* empty_string_id_;
   336  
   337    /// Node to use for the `/kythe/ordinal` constant.
   338    AstNode* ordinal_id_;
   339  
   340    /// Node to use for the `/kythe/node/kind` constant.
   341    AstNode* kind_id_;
   342  
   343    /// Node to use for the `anchor` constant.
   344    AstNode* anchor_id_;
   345  
   346    /// Node to use for the `/kythe/loc/start` constant.
   347    AstNode* start_id_;
   348  
   349    /// Node to use for the `/kythe/loc/end` constant.
   350    AstNode* end_id_;
   351  
   352    /// Node to use for the `file` node kind.
   353    AstNode* file_id_;
   354  
   355    /// Node to use for the `text` fact kind.
   356    AstNode* text_id_;
   357  
   358    /// Node to use for the `code` fact kind. The fact value should be a
   359    /// serialized kythe.proto.MarkedSource message.
   360    AstNode* code_id_;
   361  
   362    /// Node to use for the `code/json` fact kind. The fact value should be a
   363    /// JSON-serialized kythe.proto.MarkedSource message.
   364    AstNode* code_json_id_;
   365  
   366    /// The highest goal group reached during solving (often the culprit for why
   367    /// the solution failed).
   368    size_t highest_group_reached_ = 0;
   369  
   370    /// The highest goal reached during solving (often the culprit for why
   371    /// the solution failed).
   372    size_t highest_goal_reached_ = 0;
   373  
   374    /// Whether we save assignments to EVars (by inspection label).
   375    bool saving_assignments_ = false;
   376  
   377    /// A map from inspection label to saved assignment. Note that
   378    /// duplicate labels will overwrite one another. This means that
   379    /// it's important to disambiguate cases where this is likely
   380    /// (e.g., we add line and column information to labels we generate
   381    /// for anchors).
   382    absl::flat_hash_map<std::string, std::string> saved_assignments_;
   383  
   384    /// Maps from pretty-printed vnames to (parsed) file node text.
   385    std::map<std::string, Symbol> fake_files_;
   386  
   387    /// Read assertions from file nodes.
   388    bool assertions_from_file_nodes_ = false;
   389  
   390    /// The regex to look for to identify goal comments. Should have one match
   391    /// group.
   392    std::unique_ptr<RE2> goal_comment_regex_;
   393  
   394    /// If true, convert MarkedSource-valued facts to subgraphs. If false,
   395    /// MarkedSource-valued facts will be replaced with opaque but unique
   396    /// identifiers.
   397    bool convert_marked_source_ = false;
   398  
   399    /// If true, show anchor locations in graph dumps (instead of @).
   400    bool show_anchors_ = false;
   401  
   402    /// If true, show unlabeled nodes in graph dumps.
   403    bool show_unlabeled_ = true;
   404  
   405    /// If true, show VNames for labeled nodes in graph dumps.
   406    bool show_labeled_vnames_ = false;
   407  
   408    /// If true, include the /kythe and /kythe/edge prefix on facts and edges.
   409    bool show_fact_prefix_ = false;
   410  
   411    /// Identifier for MarkedSource child edges.
   412    AstNode* marked_source_child_id_;
   413  
   414    /// Identifier for MarkedSource code edges.
   415    AstNode* marked_source_code_edge_id_;
   416  
   417    /// Identifier for MarkedSource BOX kinds.
   418    AstNode* marked_source_box_id_;
   419  
   420    /// Identifier for MarkedSource TYPE kinds.
   421    AstNode* marked_source_type_id_;
   422  
   423    /// Identifier for MarkedSource PARAMETER kinds.
   424    AstNode* marked_source_parameter_id_;
   425  
   426    /// Identifier for MarkedSource IDENTIFIER kinds.
   427    AstNode* marked_source_identifier_id_;
   428  
   429    /// Identifier for MarkedSource CONTEXT kinds.
   430    AstNode* marked_source_context_id_;
   431  
   432    /// Identifier for MarkedSource INITIALIZER kinds.
   433    AstNode* marked_source_initializer_id_;
   434  
   435    /// Identifier for MarkedSource MODIFIER kinds.
   436    AstNode* marked_source_modifier_id_;
   437  
   438    /// Identifier for MarkedSource PARAMETER_LOOKUP_BY_PARAM kinds.
   439    AstNode* marked_source_parameter_lookup_by_param_id_;
   440  
   441    /// Identifier for MarkedSource LOOKUP_BY_PARAM kinds.
   442    AstNode* marked_source_lookup_by_param_id_;
   443  
   444    /// Identifier for MarkedSource PARAMETER_LOOKUP_BY_TPARAM kinds.
   445    AstNode* marked_source_parameter_lookup_by_tparam_id_;
   446  
   447    /// Identifier for MarkedSource LOOKUP_BY_TPARAM kinds.
   448    AstNode* marked_source_lookup_by_tparam_id_;
   449  
   450    /// Identifier for MarkedSource LOOKUP_BY_PARAM_WITH_DEFAULTS kinds.
   451    AstNode* marked_source_parameter_lookup_by_param_with_defaults_id_;
   452  
   453    /// Identifier for MarkedSource LOOKUP_BY_TYPED kinds.
   454    AstNode* marked_source_lookup_by_typed_id_;
   455  
   456    /// Identifier for MarkedSource kind facts.
   457    AstNode* marked_source_kind_id_;
   458  
   459    /// Identifier for MarkedSource pre_text facts.
   460    AstNode* marked_source_pre_text_id_;
   461  
   462    /// Identifier for MarkedSource post_child_text facts.
   463    AstNode* marked_source_post_child_text_id_;
   464  
   465    /// Identifier for MarkedSource post_text facts.
   466    AstNode* marked_source_post_text_id_;
   467  
   468    /// Identifier for MarkedSource lookup_index facts.
   469    AstNode* marked_source_lookup_index_id_;
   470  
   471    /// Identifier for MarkedSource default_children_count facts.
   472    AstNode* marked_source_default_children_count_id_;
   473  
   474    /// Identifier for MarkedSource add_final_list_token facts.
   475    AstNode* marked_source_add_final_list_token_id_;
   476  
   477    /// Identifier for MarkedSource link edges.
   478    AstNode* marked_source_link_id_;
   479  
   480    /// Identifier for MarkedSource true values.
   481    AstNode* marked_source_true_id_;
   482  
   483    /// Identifier for MarkedSource false values.
   484    AstNode* marked_source_false_id_;
   485  
   486    /// Maps from file content to (verified) VName.
   487    absl::flat_hash_map<Symbol, AstNode*> content_to_vname_;
   488  
   489    /// Find file vnames by examining file content.
   490    bool file_vnames_ = true;
   491  
   492    /// If file_vnames_ is true, only warn if we can't find a file's VName.
   493    bool allow_missing_file_vnames_ = false;
   494  
   495    /// Use the fast solver.
   496    bool use_fast_solver_ = false;
   497  
   498    /// Sentinel value for a known file.
   499    Symbol known_file_sym_;
   500  
   501    /// Sentinel value for a known nonfile.
   502    Symbol known_not_file_sym_;
   503  
   504    /// Maps VNames to known_file_sym_, known_not_file_sym_, or file text.
   505    absl::flat_hash_map<InternedVName, Symbol> fast_solver_files_;
   506  
   507    /// File corpus to use if none is set on a file node.
   508    AstNode* default_file_corpus_;
   509  
   510    /// The symbol for the empty string.
   511    Symbol empty_string_sym_;
   512  };
   513  
   514  }  // namespace verifier
   515  }  // namespace kythe
   516  
   517  #endif  // KYTHE_CXX_VERIFIER_H_