kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/verifier/assertions.h (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #ifndef KYTHE_CXX_VERIFIER_ASSERTIONS_H_
    18  #define KYTHE_CXX_VERIFIER_ASSERTIONS_H_
    19  
    20  #include <deque>
    21  #include <unordered_map>
    22  #include <vector>
    23  
    24  namespace yy {
    25  class AssertionParserImpl;
    26  }
    27  
    28  #include "kythe/cxx/verifier/assertion_ast.h"
    29  #include "kythe/cxx/verifier/parser.yy.hh"
    30  #include "re2/re2.h"
    31  
    32  namespace kythe {
    33  namespace verifier {
    34  
    35  class Verifier;
    36  
    37  /// \brief Parses logic programs.
    38  ///
    39  /// `AssertionParser` collects together all goals and data that are part of
    40  /// a verification program. This program is then combined with a database of
    41  /// facts (which are merely terms represented in a different, perhaps indexed,
    42  /// format) by the `Verifier`.
    43  class AssertionParser {
    44   public:
    45    /// \param trace_lex Dump lexing debug information
    46    /// \param trace_parse Dump parsing debug information
    47    explicit AssertionParser(Verifier* verifier, bool trace_lex = false,
    48                             bool trace_parse = false);
    49  
    50    /// \brief Loads a file containing rules in marked comments.
    51    /// \param filename The filename of the file to load
    52    /// \param goal_comment_regex Lines matching this regex are goals. Goals
    53    /// will be read from the regex's first capture group.
    54    /// \return true if there were no errors
    55    bool ParseInlineRuleFile(const std::string& filename, Symbol path,
    56                             Symbol root, Symbol corpus,
    57                             const RE2& goal_comment_regex);
    58  
    59    /// \brief Loads a string containing rules in marked comments.
    60    /// \param content The content to parse and load
    61    /// \param fake_filename Some string to use when printing errors and locations
    62    /// \param goal_comment_regex Lines matching this regex are goals. Goals
    63    /// will be read from the regex's first capture group.
    64    /// \return true if there were no errors
    65    bool ParseInlineRuleString(const std::string& content,
    66                               const std::string& fake_filename, Symbol path,
    67                               Symbol root, Symbol corpus,
    68                               const RE2& goal_comment_regex);
    69  
    70    /// \brief The name of the current file being read. It is safe to take
    71    /// the address of this string (which shares the lifetime of this object.)
    72    std::string& file() { return files_.back(); }
    73  
    74    /// \brief This `AssertionParser`'s associated `Verifier`.
    75    Verifier& verifier() { return verifier_; }
    76  
    77    /// \brief All of the goal groups in this `AssertionParser`.
    78    std::vector<GoalGroup>& groups() { return groups_; }
    79  
    80    /// \brief All of the inspections in this `AssertionParser`.
    81    std::vector<Inspection>& inspections() { return inspections_; }
    82  
    83    /// \brief Unescapes a string literal (which is expected to include
    84    /// terminating quotes).
    85    /// \param yytext literal string to escape
    86    /// \param out pointer to a string to overwrite with `yytext` unescaped.
    87    /// \return true if `yytext` was a valid literal string; false otherwise.
    88    static bool Unescape(const char* yytext, std::string* out);
    89  
    90    /// Should every EVar be added by default to the inspection list?
    91    void InspectAllEVars() { default_inspect_ = true; }
    92  
    93    /// \brief Check that there are no singleton EVars.
    94    /// \return true if there were singletons.
    95    bool CheckForSingletonEVars();
    96  
    97   private:
    98    friend class yy::AssertionParserImpl;
    99  
   100    /// \brief Sets the scan buffer to a premarked string and turns on
   101    /// tracing.
   102    /// \note Implemented in `assertions.lex`.
   103    void SetScanBuffer(const std::string& scan_buffer, bool trace_scanning);
   104  
   105    /// \brief Resets recorded source text.
   106    void ResetLine();
   107  
   108    /// \brief Records source text after determining that it does not
   109    /// begin with a goal comment marker.
   110    /// \param yytext A 1-length string containing the character to append.
   111    void AppendToLine(const char* yytext);
   112  
   113    /// \brief Called at the end of an ordinary line of source text to resolve
   114    /// available forward location references.
   115    ///
   116    /// Certain syntactic features (like `@'token`) refer to elements on the
   117    /// next line of source text. After that next line is buffered using
   118    /// `AppendToLine`, the lexer calls to `ResolveLocations` to point those
   119    /// features at the correct locations.
   120    ///
   121    /// \return true if all locations could be resolved
   122    bool ResolveLocations(const yy::location& end_of_line,
   123                          size_t offset_after_endline, bool end_of_file);
   124  
   125    /// \brief Called by the lexer to save the end location of the current file
   126    /// or buffer.
   127    void save_eof(const yy::location& eof, size_t eof_ofs) {
   128      last_eof_ = eof;
   129      last_eof_ofs_ = eof_ofs;
   130    }
   131  
   132    /// \note Implemented by generated code care of flex.
   133    static int lex(YYSTYPE*, yy::location*, AssertionParser& context);
   134  
   135    /// \brief Used by the lexer and parser to report errors.
   136    /// \param location Source location where an error occurred.
   137    /// \param message Text of the error.
   138    void Error(const yy::location& location, const std::string& message);
   139  
   140    /// \brief Used by the lexer and parser to report errors.
   141    /// \param message Text of the error.
   142    void Error(const std::string& message);
   143  
   144    /// \brief Initializes the lexer to scan from file_.
   145    /// \param goal_comment_regex regex to identify goal comments.
   146    void ScanBeginFile(const RE2& goal_comment_regex, bool trace_scanning);
   147  
   148    /// \brief Initializes the lexer to scan from a string.
   149    /// \param goal_comment_regex regex to identify goal comments.
   150    void ScanBeginString(const RE2& goal_comment_regex, const std::string& data,
   151                         bool trace_scanning);
   152  
   153    /// \brief Handles end-of-scan actions and destroys any buffers.
   154    /// \note Implemented in `assertions.lex`.
   155    void ScanEnd(const yy::location& eof_loc, size_t eof_loc_ofs);
   156    AstNode** PopNodes(size_t node_count);
   157    void PushNode(AstNode* node);
   158    void AppendGoal(size_t group_id, AstNode* goal);
   159  
   160    /// \brief Generates deduplicated `Identifier`s or `EVar`s.
   161    /// \param location Source location of the token.
   162    /// \param for_token Token to check.
   163    /// \return An `EVar` if `for_token` starts with a capital letter;
   164    /// an `Identifier` otherwise.
   165    /// \sa CreateEVar, CreateIdentifier
   166    AstNode* CreateAtom(const yy::location& location,
   167                        const std::string& for_token);
   168  
   169    /// \brief Generates an equality constraint between the lhs and the rhs.
   170    /// \param location Source location of the "=" token.
   171    /// \param lhs The lhs of the equality.
   172    /// \param rhs The rhs of the equality.
   173    AstNode* CreateEqualityConstraint(const yy::location& location, AstNode* lhs,
   174                                      AstNode* rhs);
   175  
   176    /// \brief Generates deduplicated `EVar`s.
   177    /// \param location Source location of the token.
   178    /// \param for_token Token to use.
   179    /// \return A new `EVar` if `for_token` has not yet been made into
   180    /// an `EVar` already, or the previous `EVar` returned the last
   181    /// time `CreateEVar` was called.
   182    EVar* CreateEVar(const yy::location& location, const std::string& for_token);
   183  
   184    /// \brief Generates deduplicated `Identifier`s.
   185    /// \param location Source location of the text.
   186    /// \param for_text text to use.
   187    /// \return A new `Identifier` if `for_text` has not yet been made into
   188    /// an `Identifier` already, or the previous `Identifier` returned the last
   189    /// time `CreateIdenfier` was called.
   190    Identifier* CreateIdentifier(const yy::location& location,
   191                                 const std::string& for_text);
   192  
   193    /// \brief Creates an anonymous `EVar` to implement the `_` token.
   194    /// \param location Source location of the token.
   195    AstNode* CreateDontCare(const yy::location& location);
   196  
   197    /// \brief Adds an inspect post-action to the current goal.
   198    /// \param location Source location for the inspection.
   199    /// \param for_exp Expression to inspect.
   200    /// \return An inspection record.
   201    AstNode* CreateInspect(const yy::location& location,
   202                           const std::string& inspect_id, AstNode* to_inspect);
   203  
   204    void PushLocationSpec(const std::string& for_token);
   205  
   206    /// \brief Pushes a relative location spec (@token:+2).
   207    void PushRelativeLocationSpec(const std::string& for_token,
   208                                  const std::string& relative_spec);
   209  
   210    /// \brief Pushes an absolute location spec (@token:1234).
   211    void PushAbsoluteLocationSpec(const std::string& for_token,
   212                                  const std::string& absolute);
   213  
   214    /// \brief Changes the last-pushed location spec to match the `match_spec`th
   215    /// instance of its match string.
   216    void SetTopLocationSpecMatchNumber(const std::string& number);
   217  
   218    AstNode* CreateAnchorSpec(const yy::location& location);
   219  
   220    /// \brief Generates a new offset spec (equivalent to a string literal).
   221    /// \param location The location in the goal text of this offset spec.
   222    /// \param at_end should this offset spec be at the end of the search string?
   223    AstNode* CreateOffsetSpec(const yy::location& location, bool at_end);
   224  
   225    AstNode* CreateSimpleEdgeFact(const yy::location& location, AstNode* edge_lhs,
   226                                  const std::string& literal_kind,
   227                                  AstNode* edge_rhs, AstNode* ordinal);
   228  
   229    AstNode* CreateSimpleNodeFact(const yy::location& location, AstNode* lhs,
   230                                  const std::string& literal_key, AstNode* value);
   231  
   232    Identifier* PathIdentifierFor(const yy::location& location,
   233                                  const std::string& path_fragment,
   234                                  const std::string& default_root);
   235  
   236    /// \brief Enters a new goal group.
   237    /// \param location The location for diagnostics.
   238    /// \param negated true if this group is negated.
   239    /// Only one goal group may be entered at once.
   240    void EnterGoalGroup(const yy::location& location, bool negated);
   241  
   242    /// \brief Exits the last-entered goal group.
   243    void ExitGoalGroup(const yy::location& location);
   244  
   245    /// \brief The current goal group.
   246    size_t group_id() const {
   247      if (inside_goal_group_) {
   248        return groups_.size() - 1;
   249      } else {
   250        return 0;
   251      }
   252    }
   253  
   254    Verifier& verifier_;
   255  
   256    /// The arena from the verifier; needed by the parser implementation.
   257    Arena* arena_ = nullptr;
   258  
   259    std::vector<GoalGroup> groups_;
   260    bool inside_goal_group_ = false;
   261    /// \brief A record for some text to be matched to its location.
   262    struct UnresolvedLocation {
   263      enum Kind {
   264        kAnchor,       ///< An anchor (@tok).
   265        kOffsetBegin,  ///< The offset at the start of the location (@^tok).
   266        kOffsetEnd     ///< The offset at the end of the location (@$tok).
   267      };
   268      EVar* anchor_evar;        ///< The EVar to be solved.
   269      std::string anchor_text;  ///< The text to match.
   270      size_t line_number;       ///< The line to match text on.
   271      bool use_line_number;     ///< Whether to match with `line_number` or
   272                                ///< on the next possible non-goal line.
   273      size_t group_id;  ///< The group that will own the offset goals, if any.
   274      Kind kind;        ///< The flavor of UnresolvedLocation we are.
   275      bool must_be_unambiguous;  ///< If true, anchor_text must match only once.
   276      int match_number;  ///< If !`must_be_unambiguous`, match this instance of
   277                         ///< `anchor_text`.
   278    };
   279    std::vector<UnresolvedLocation> unresolved_locations_;
   280    std::vector<AstNode*> node_stack_;
   281    struct LocationSpec {
   282      std::string spec;
   283      int line_offset;
   284      bool is_absolute;
   285      bool must_be_unambiguous;
   286      int match_number;
   287    };
   288    std::vector<LocationSpec> location_spec_stack_;
   289    bool ValidateTopLocationSpec(const yy::location& location,
   290                                 size_t* line_number, bool* use_line_number,
   291                                 bool* must_be_unambiguous, int* match_number);
   292    /// Files we've parsed or are parsing (pushed onto the back).
   293    /// Note that location records will have internal pointers to these strings.
   294    std::deque<std::string> files_;
   295    std::string line_;
   296    /// Did we encounter errors during lexing or parsing?
   297    bool had_errors_ = false;
   298    /// Save the end-of-file location from the lexer.
   299    yy::location last_eof_;
   300    size_t last_eof_ofs_ = 0;
   301    /// Inspections to be performed after the verifier stops.
   302    std::vector<Inspection> inspections_;
   303    /// Context mapping symbols to AST nodes.
   304    std::unordered_map<Symbol, Identifier*> identifier_context_;
   305    std::unordered_map<Symbol, EVar*> evar_context_;
   306    std::unordered_map<EVar*, Symbol> singleton_evars_;
   307    /// Are we dumping lexer trace information?
   308    bool trace_lex_ = false;
   309    /// Are we dumping parser trace information?
   310    bool trace_parse_ = false;
   311    /// Should we inspect every user-provided EVar?
   312    bool default_inspect_ = false;
   313    /// The current file's path.
   314    Symbol path_;
   315    /// The current file's root.
   316    Symbol root_;
   317    /// The current file's corpus.
   318    Symbol corpus_;
   319  };
   320  }  // namespace verifier
   321  }  // namespace kythe
   322  
   323  #endif  // KYTHE_CXX_VERIFIER_ASSERTIONS_H_