kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/verifier/assertions.h (about) 1 /* 2 * Copyright 2014 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef KYTHE_CXX_VERIFIER_ASSERTIONS_H_ 18 #define KYTHE_CXX_VERIFIER_ASSERTIONS_H_ 19 20 #include <deque> 21 #include <unordered_map> 22 #include <vector> 23 24 namespace yy { 25 class AssertionParserImpl; 26 } 27 28 #include "kythe/cxx/verifier/assertion_ast.h" 29 #include "kythe/cxx/verifier/parser.yy.hh" 30 #include "re2/re2.h" 31 32 namespace kythe { 33 namespace verifier { 34 35 class Verifier; 36 37 /// \brief Parses logic programs. 38 /// 39 /// `AssertionParser` collects together all goals and data that are part of 40 /// a verification program. This program is then combined with a database of 41 /// facts (which are merely terms represented in a different, perhaps indexed, 42 /// format) by the `Verifier`. 43 class AssertionParser { 44 public: 45 /// \param trace_lex Dump lexing debug information 46 /// \param trace_parse Dump parsing debug information 47 explicit AssertionParser(Verifier* verifier, bool trace_lex = false, 48 bool trace_parse = false); 49 50 /// \brief Loads a file containing rules in marked comments. 51 /// \param filename The filename of the file to load 52 /// \param goal_comment_regex Lines matching this regex are goals. Goals 53 /// will be read from the regex's first capture group. 54 /// \return true if there were no errors 55 bool ParseInlineRuleFile(const std::string& filename, Symbol path, 56 Symbol root, Symbol corpus, 57 const RE2& goal_comment_regex); 58 59 /// \brief Loads a string containing rules in marked comments. 60 /// \param content The content to parse and load 61 /// \param fake_filename Some string to use when printing errors and locations 62 /// \param goal_comment_regex Lines matching this regex are goals. Goals 63 /// will be read from the regex's first capture group. 64 /// \return true if there were no errors 65 bool ParseInlineRuleString(const std::string& content, 66 const std::string& fake_filename, Symbol path, 67 Symbol root, Symbol corpus, 68 const RE2& goal_comment_regex); 69 70 /// \brief The name of the current file being read. It is safe to take 71 /// the address of this string (which shares the lifetime of this object.) 72 std::string& file() { return files_.back(); } 73 74 /// \brief This `AssertionParser`'s associated `Verifier`. 75 Verifier& verifier() { return verifier_; } 76 77 /// \brief All of the goal groups in this `AssertionParser`. 78 std::vector<GoalGroup>& groups() { return groups_; } 79 80 /// \brief All of the inspections in this `AssertionParser`. 81 std::vector<Inspection>& inspections() { return inspections_; } 82 83 /// \brief Unescapes a string literal (which is expected to include 84 /// terminating quotes). 85 /// \param yytext literal string to escape 86 /// \param out pointer to a string to overwrite with `yytext` unescaped. 87 /// \return true if `yytext` was a valid literal string; false otherwise. 88 static bool Unescape(const char* yytext, std::string* out); 89 90 /// Should every EVar be added by default to the inspection list? 91 void InspectAllEVars() { default_inspect_ = true; } 92 93 /// \brief Check that there are no singleton EVars. 94 /// \return true if there were singletons. 95 bool CheckForSingletonEVars(); 96 97 private: 98 friend class yy::AssertionParserImpl; 99 100 /// \brief Sets the scan buffer to a premarked string and turns on 101 /// tracing. 102 /// \note Implemented in `assertions.lex`. 103 void SetScanBuffer(const std::string& scan_buffer, bool trace_scanning); 104 105 /// \brief Resets recorded source text. 106 void ResetLine(); 107 108 /// \brief Records source text after determining that it does not 109 /// begin with a goal comment marker. 110 /// \param yytext A 1-length string containing the character to append. 111 void AppendToLine(const char* yytext); 112 113 /// \brief Called at the end of an ordinary line of source text to resolve 114 /// available forward location references. 115 /// 116 /// Certain syntactic features (like `@'token`) refer to elements on the 117 /// next line of source text. After that next line is buffered using 118 /// `AppendToLine`, the lexer calls to `ResolveLocations` to point those 119 /// features at the correct locations. 120 /// 121 /// \return true if all locations could be resolved 122 bool ResolveLocations(const yy::location& end_of_line, 123 size_t offset_after_endline, bool end_of_file); 124 125 /// \brief Called by the lexer to save the end location of the current file 126 /// or buffer. 127 void save_eof(const yy::location& eof, size_t eof_ofs) { 128 last_eof_ = eof; 129 last_eof_ofs_ = eof_ofs; 130 } 131 132 /// \note Implemented by generated code care of flex. 133 static int lex(YYSTYPE*, yy::location*, AssertionParser& context); 134 135 /// \brief Used by the lexer and parser to report errors. 136 /// \param location Source location where an error occurred. 137 /// \param message Text of the error. 138 void Error(const yy::location& location, const std::string& message); 139 140 /// \brief Used by the lexer and parser to report errors. 141 /// \param message Text of the error. 142 void Error(const std::string& message); 143 144 /// \brief Initializes the lexer to scan from file_. 145 /// \param goal_comment_regex regex to identify goal comments. 146 void ScanBeginFile(const RE2& goal_comment_regex, bool trace_scanning); 147 148 /// \brief Initializes the lexer to scan from a string. 149 /// \param goal_comment_regex regex to identify goal comments. 150 void ScanBeginString(const RE2& goal_comment_regex, const std::string& data, 151 bool trace_scanning); 152 153 /// \brief Handles end-of-scan actions and destroys any buffers. 154 /// \note Implemented in `assertions.lex`. 155 void ScanEnd(const yy::location& eof_loc, size_t eof_loc_ofs); 156 AstNode** PopNodes(size_t node_count); 157 void PushNode(AstNode* node); 158 void AppendGoal(size_t group_id, AstNode* goal); 159 160 /// \brief Generates deduplicated `Identifier`s or `EVar`s. 161 /// \param location Source location of the token. 162 /// \param for_token Token to check. 163 /// \return An `EVar` if `for_token` starts with a capital letter; 164 /// an `Identifier` otherwise. 165 /// \sa CreateEVar, CreateIdentifier 166 AstNode* CreateAtom(const yy::location& location, 167 const std::string& for_token); 168 169 /// \brief Generates an equality constraint between the lhs and the rhs. 170 /// \param location Source location of the "=" token. 171 /// \param lhs The lhs of the equality. 172 /// \param rhs The rhs of the equality. 173 AstNode* CreateEqualityConstraint(const yy::location& location, AstNode* lhs, 174 AstNode* rhs); 175 176 /// \brief Generates deduplicated `EVar`s. 177 /// \param location Source location of the token. 178 /// \param for_token Token to use. 179 /// \return A new `EVar` if `for_token` has not yet been made into 180 /// an `EVar` already, or the previous `EVar` returned the last 181 /// time `CreateEVar` was called. 182 EVar* CreateEVar(const yy::location& location, const std::string& for_token); 183 184 /// \brief Generates deduplicated `Identifier`s. 185 /// \param location Source location of the text. 186 /// \param for_text text to use. 187 /// \return A new `Identifier` if `for_text` has not yet been made into 188 /// an `Identifier` already, or the previous `Identifier` returned the last 189 /// time `CreateIdenfier` was called. 190 Identifier* CreateIdentifier(const yy::location& location, 191 const std::string& for_text); 192 193 /// \brief Creates an anonymous `EVar` to implement the `_` token. 194 /// \param location Source location of the token. 195 AstNode* CreateDontCare(const yy::location& location); 196 197 /// \brief Adds an inspect post-action to the current goal. 198 /// \param location Source location for the inspection. 199 /// \param for_exp Expression to inspect. 200 /// \return An inspection record. 201 AstNode* CreateInspect(const yy::location& location, 202 const std::string& inspect_id, AstNode* to_inspect); 203 204 void PushLocationSpec(const std::string& for_token); 205 206 /// \brief Pushes a relative location spec (@token:+2). 207 void PushRelativeLocationSpec(const std::string& for_token, 208 const std::string& relative_spec); 209 210 /// \brief Pushes an absolute location spec (@token:1234). 211 void PushAbsoluteLocationSpec(const std::string& for_token, 212 const std::string& absolute); 213 214 /// \brief Changes the last-pushed location spec to match the `match_spec`th 215 /// instance of its match string. 216 void SetTopLocationSpecMatchNumber(const std::string& number); 217 218 AstNode* CreateAnchorSpec(const yy::location& location); 219 220 /// \brief Generates a new offset spec (equivalent to a string literal). 221 /// \param location The location in the goal text of this offset spec. 222 /// \param at_end should this offset spec be at the end of the search string? 223 AstNode* CreateOffsetSpec(const yy::location& location, bool at_end); 224 225 AstNode* CreateSimpleEdgeFact(const yy::location& location, AstNode* edge_lhs, 226 const std::string& literal_kind, 227 AstNode* edge_rhs, AstNode* ordinal); 228 229 AstNode* CreateSimpleNodeFact(const yy::location& location, AstNode* lhs, 230 const std::string& literal_key, AstNode* value); 231 232 Identifier* PathIdentifierFor(const yy::location& location, 233 const std::string& path_fragment, 234 const std::string& default_root); 235 236 /// \brief Enters a new goal group. 237 /// \param location The location for diagnostics. 238 /// \param negated true if this group is negated. 239 /// Only one goal group may be entered at once. 240 void EnterGoalGroup(const yy::location& location, bool negated); 241 242 /// \brief Exits the last-entered goal group. 243 void ExitGoalGroup(const yy::location& location); 244 245 /// \brief The current goal group. 246 size_t group_id() const { 247 if (inside_goal_group_) { 248 return groups_.size() - 1; 249 } else { 250 return 0; 251 } 252 } 253 254 Verifier& verifier_; 255 256 /// The arena from the verifier; needed by the parser implementation. 257 Arena* arena_ = nullptr; 258 259 std::vector<GoalGroup> groups_; 260 bool inside_goal_group_ = false; 261 /// \brief A record for some text to be matched to its location. 262 struct UnresolvedLocation { 263 enum Kind { 264 kAnchor, ///< An anchor (@tok). 265 kOffsetBegin, ///< The offset at the start of the location (@^tok). 266 kOffsetEnd ///< The offset at the end of the location (@$tok). 267 }; 268 EVar* anchor_evar; ///< The EVar to be solved. 269 std::string anchor_text; ///< The text to match. 270 size_t line_number; ///< The line to match text on. 271 bool use_line_number; ///< Whether to match with `line_number` or 272 ///< on the next possible non-goal line. 273 size_t group_id; ///< The group that will own the offset goals, if any. 274 Kind kind; ///< The flavor of UnresolvedLocation we are. 275 bool must_be_unambiguous; ///< If true, anchor_text must match only once. 276 int match_number; ///< If !`must_be_unambiguous`, match this instance of 277 ///< `anchor_text`. 278 }; 279 std::vector<UnresolvedLocation> unresolved_locations_; 280 std::vector<AstNode*> node_stack_; 281 struct LocationSpec { 282 std::string spec; 283 int line_offset; 284 bool is_absolute; 285 bool must_be_unambiguous; 286 int match_number; 287 }; 288 std::vector<LocationSpec> location_spec_stack_; 289 bool ValidateTopLocationSpec(const yy::location& location, 290 size_t* line_number, bool* use_line_number, 291 bool* must_be_unambiguous, int* match_number); 292 /// Files we've parsed or are parsing (pushed onto the back). 293 /// Note that location records will have internal pointers to these strings. 294 std::deque<std::string> files_; 295 std::string line_; 296 /// Did we encounter errors during lexing or parsing? 297 bool had_errors_ = false; 298 /// Save the end-of-file location from the lexer. 299 yy::location last_eof_; 300 size_t last_eof_ofs_ = 0; 301 /// Inspections to be performed after the verifier stops. 302 std::vector<Inspection> inspections_; 303 /// Context mapping symbols to AST nodes. 304 std::unordered_map<Symbol, Identifier*> identifier_context_; 305 std::unordered_map<Symbol, EVar*> evar_context_; 306 std::unordered_map<EVar*, Symbol> singleton_evars_; 307 /// Are we dumping lexer trace information? 308 bool trace_lex_ = false; 309 /// Are we dumping parser trace information? 310 bool trace_parse_ = false; 311 /// Should we inspect every user-provided EVar? 312 bool default_inspect_ = false; 313 /// The current file's path. 314 Symbol path_; 315 /// The current file's root. 316 Symbol root_; 317 /// The current file's corpus. 318 Symbol corpus_; 319 }; 320 } // namespace verifier 321 } // namespace kythe 322 323 #endif // KYTHE_CXX_VERIFIER_ASSERTIONS_H_