kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/verifier/assertions.cc (about) 1 /* 2 * Copyright 2014 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "assertions.h" 18 19 #include "absl/strings/str_cat.h" 20 #include "absl/strings/string_view.h" 21 #include "kythe/cxx/common/file_utils.h" 22 #include "verifier.h" 23 24 namespace kythe { 25 namespace verifier { 26 27 void EVar::Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) { 28 if (AstNode* node = current()) { 29 node->Dump(symbol_table, printer); 30 } else { 31 printer->Print("<null>"); 32 } 33 } 34 35 void Identifier::Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) { 36 printer->Print(symbol_table.PrettyText(symbol_)); 37 } 38 39 void Range::Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) { 40 printer->Print("Range("); 41 printer->Print(symbol_table.PrettyText(corpus_)); 42 printer->Print(","); 43 printer->Print(symbol_table.PrettyText(root_)); 44 printer->Print(","); 45 printer->Print(symbol_table.PrettyText(path_)); 46 printer->Print(","); 47 printer->Print(std::to_string(begin_)); 48 printer->Print(","); 49 printer->Print(std::to_string(end_)); 50 printer->Print(")"); 51 } 52 53 void Tuple::Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) { 54 printer->Print("("); 55 for (size_t v = 0; v < element_count_; ++v) { 56 elements_[v]->Dump(symbol_table, printer); 57 if (v + 1 < element_count_) { 58 printer->Print(", "); 59 } 60 } 61 printer->Print(")"); 62 } 63 64 void App::Dump(const SymbolTable& symbol_table, PrettyPrinter* printer) { 65 lhs_->Dump(symbol_table, printer); 66 // rhs_ should be a Tuple, which outputs "(...)" around itself. 67 rhs_->Dump(symbol_table, printer); 68 } 69 70 bool AssertionParser::ParseInlineRuleString(const std::string& content, 71 const std::string& fake_filename, 72 Symbol path, Symbol root, 73 Symbol corpus, 74 const RE2& goal_comment_regex) { 75 path_ = path; 76 root_ = root; 77 corpus_ = corpus; 78 had_errors_ = false; 79 files_.push_back(fake_filename); 80 ResetLine(); 81 ScanBeginString(goal_comment_regex, content, trace_lex_); 82 yy::AssertionParserImpl parser(*this); 83 parser.set_debug_level(trace_parse_); 84 int result = parser.parse(); 85 ScanEnd(last_eof_, last_eof_ofs_); 86 return result == 0 && !had_errors_; 87 } 88 89 bool AssertionParser::ParseInlineRuleFile(const std::string& filename, 90 Symbol path, Symbol root, 91 Symbol corpus, 92 const RE2& goal_comment_regex) { 93 path_ = path; 94 root_ = root; 95 corpus_ = corpus; 96 files_.push_back(filename); 97 had_errors_ = false; 98 ResetLine(); 99 ScanBeginFile(goal_comment_regex, trace_lex_); 100 yy::AssertionParserImpl parser(*this); 101 parser.set_debug_level(trace_parse_); 102 int result = parser.parse(); 103 ScanEnd(last_eof_, last_eof_ofs_); 104 return result == 0 && !had_errors_; 105 } 106 107 void AssertionParser::Error(const yy::location& location, 108 const std::string& message) { 109 // TODO(zarko): replace with a PrettyPrinter 110 std::cerr << location << ": " << message << std::endl; 111 had_errors_ = true; 112 } 113 114 void AssertionParser::Error(const std::string& message) { 115 // TODO(zarko): replace with a PrettyPrinter 116 std::cerr << "When trying " << file() << ": " << message << std::endl; 117 had_errors_ = true; 118 } 119 120 bool AssertionParser::CheckForSingletonEVars() { 121 bool old_had_errors = had_errors_; 122 for (const auto& singleton : singleton_evars_) { 123 Error(singleton.first->location(), 124 "singleton variable " + 125 verifier_.symbol_table()->text(singleton.second) + 126 " used only here"); 127 } 128 had_errors_ = old_had_errors; 129 return !singleton_evars_.empty(); 130 } 131 132 AssertionParser::AssertionParser(Verifier* verifier, bool trace_lex, 133 bool trace_parse) 134 : verifier_(*verifier), 135 arena_(verifier->arena()), 136 trace_lex_(trace_lex), 137 trace_parse_(trace_parse) { 138 groups_.push_back(GoalGroup{GoalGroup::kNoneMayFail}); 139 } 140 141 bool AssertionParser::Unescape(const char* yytext, std::string* out) { 142 if (out == nullptr || *yytext != '\"') { 143 return false; 144 } 145 ++yytext; // Skip initial ". 146 out->clear(); 147 char current = *yytext++; // yytext will always immediately follow `current`. 148 for (; current != '\0' && current != '\"'; current = *yytext++) { 149 if (current == '\\') { 150 current = *yytext++; 151 switch (current) { 152 case '\"': 153 out->push_back(current); 154 break; 155 case '\\': 156 out->push_back(current); 157 break; 158 case 'n': 159 out->push_back('\n'); 160 break; 161 default: 162 return false; 163 } 164 } else { 165 out->push_back(current); 166 } 167 } 168 return (current == '\"' && *yytext == '\0'); 169 } 170 171 void AssertionParser::ResetLine() { line_.clear(); } 172 173 void AssertionParser::PushLocationSpec(const std::string& for_token) { 174 location_spec_stack_.emplace_back(LocationSpec{for_token, -1, false, true}); 175 } 176 177 void AssertionParser::PushRelativeLocationSpec(const std::string& for_token, 178 const std::string& relative) { 179 location_spec_stack_.emplace_back( 180 LocationSpec{for_token, atoi(relative.c_str()), false, true}); 181 } 182 183 void AssertionParser::PushAbsoluteLocationSpec(const std::string& for_token, 184 const std::string& absolute) { 185 location_spec_stack_.emplace_back( 186 LocationSpec{for_token, atoi(absolute.c_str()), true, true}); 187 } 188 189 void AssertionParser::SetTopLocationSpecMatchNumber(const std::string& number) { 190 if (!location_spec_stack_.empty()) { 191 // number is "#"{blank}*{int} 192 location_spec_stack_.back().must_be_unambiguous = false; 193 location_spec_stack_.back().match_number = atoi(number.c_str() + 1); 194 } 195 } 196 197 Identifier* AssertionParser::PathIdentifierFor( 198 const yy::location& location, const std::string& path_frag, 199 const std::string& default_root) { 200 if (path_frag.empty()) { 201 return verifier_.IdentifierFor(location, "/"); 202 } 203 std::string sigil; 204 if (path_frag[0] == '#' || path_frag[0] == '%') { 205 sigil = path_frag[0]; 206 if (path_frag.size() == 1) { 207 return verifier_.IdentifierFor(location, sigil); 208 } 209 } 210 if (path_frag[sigil.size()] != '/') { 211 return verifier_.IdentifierFor( 212 location, sigil + default_root + path_frag.substr(sigil.size())); 213 } 214 return verifier_.IdentifierFor(location, path_frag); 215 } 216 217 AstNode* AssertionParser::CreateEqualityConstraint(const yy::location& location, 218 AstNode* lhs, AstNode* rhs) { 219 return verifier_.MakePredicate(location, verifier_.eq_id(), {lhs, rhs}); 220 } 221 222 AstNode* AssertionParser::CreateSimpleEdgeFact(const yy::location& location, 223 AstNode* edge_lhs, 224 const std::string& literal_kind, 225 AstNode* edge_rhs, 226 AstNode* ordinal) { 227 if (ordinal) { 228 return verifier_.MakePredicate( 229 location, verifier_.fact_id(), 230 {edge_lhs, PathIdentifierFor(location, literal_kind, "/kythe/edge/"), 231 edge_rhs, verifier_.ordinal_id(), ordinal}); 232 } else { 233 return verifier_.MakePredicate( 234 location, verifier_.fact_id(), 235 {edge_lhs, PathIdentifierFor(location, literal_kind, "/kythe/edge/"), 236 edge_rhs, verifier_.root_id(), verifier_.empty_string_id()}); 237 } 238 } 239 240 AstNode* AssertionParser::CreateSimpleNodeFact(const yy::location& location, 241 AstNode* lhs, 242 const std::string& literal_key, 243 AstNode* value) { 244 return verifier_.MakePredicate( 245 location, verifier_.fact_id(), 246 {lhs, verifier_.empty_string_id(), verifier_.empty_string_id(), 247 PathIdentifierFor(location, literal_key, "/kythe/"), value}); 248 } 249 250 AstNode* AssertionParser::CreateInspect(const yy::location& location, 251 const std::string& inspect_id, 252 AstNode* to_inspect) { 253 if (EVar* evar = to_inspect->AsEVar()) { 254 singleton_evars_.erase(evar); 255 inspections_.emplace_back(inspect_id, evar, Inspection::Kind::EXPLICIT); 256 return to_inspect; 257 } else { 258 Error(location, "Inspecting something that's not an EVar."); 259 return to_inspect; 260 } 261 } 262 263 AstNode* AssertionParser::CreateDontCare(const yy::location& location) { 264 return new (verifier_.arena()) EVar(location); 265 } 266 267 AstNode* AssertionParser::CreateAtom(const yy::location& location, 268 const std::string& for_token) { 269 if (!for_token.empty() && for_token[0] == '_') { 270 return CreateDontCare(location); 271 } else if (!for_token.empty() && isupper(for_token[0])) { 272 return CreateEVar(location, for_token); 273 } else { 274 return CreateIdentifier(location, for_token); 275 } 276 } 277 278 Identifier* AssertionParser::CreateIdentifier(const yy::location& location, 279 const std::string& for_text) { 280 Symbol symbol = verifier_.symbol_table()->intern(for_text); 281 const auto old_binding = identifier_context_.find(symbol); 282 if (old_binding == identifier_context_.end()) { 283 Identifier* new_id = new (verifier_.arena()) Identifier(location, symbol); 284 identifier_context_.emplace(symbol, new_id); 285 return new_id; 286 } else { 287 return old_binding->second; 288 } 289 } 290 291 EVar* AssertionParser::CreateEVar(const yy::location& location, 292 const std::string& for_token) { 293 Symbol symbol = verifier_.symbol_table()->intern(for_token); 294 const auto old_binding = evar_context_.find(symbol); 295 if (old_binding == evar_context_.end()) { 296 EVar* new_evar = new (verifier_.arena()) EVar(location); 297 evar_context_.emplace(symbol, new_evar); 298 if (default_inspect_) { 299 inspections_.emplace_back(for_token, new_evar, 300 Inspection::Kind::IMPLICIT); 301 } 302 singleton_evars_[new_evar] = symbol; 303 return new_evar; 304 } else { 305 singleton_evars_.erase(old_binding->second); 306 return old_binding->second; 307 } 308 } 309 310 bool AssertionParser::ValidateTopLocationSpec(const yy::location& location, 311 size_t* line_number, 312 bool* use_line_number, 313 bool* must_be_unambiguous, 314 int* match_number) { 315 if (location_spec_stack_.empty()) { 316 Error(location, "No locations on location stack."); 317 return verifier_.empty_string_id(); 318 } 319 const auto& spec = location_spec_stack_.back(); 320 *must_be_unambiguous = spec.must_be_unambiguous; 321 *match_number = spec.match_number; 322 if (spec.line_offset == 0) { 323 Error(location, "This line offset is invalid."); 324 return verifier_.empty_string_id(); 325 } else if (spec.line_offset < 0) { 326 *use_line_number = false; 327 *line_number = 0; 328 return true; 329 } 330 *use_line_number = true; 331 *line_number = spec.is_absolute ? spec.line_offset 332 : spec.line_offset + location.begin.line; 333 if (*line_number <= location.begin.line) { 334 Error(location, "This line offset points to a previous or equal line."); 335 return false; 336 } 337 return true; 338 } 339 340 AstNode* AssertionParser::CreateAnchorSpec(const yy::location& location) { 341 size_t line_number = -1; 342 bool use_line_number = false; 343 bool must_be_unambiguous = false; 344 int match_number = -1; 345 if (!ValidateTopLocationSpec(location, &line_number, &use_line_number, 346 &must_be_unambiguous, &match_number)) { 347 return verifier_.empty_string_id(); 348 } 349 const auto& spec = location_spec_stack_.back(); 350 EVar* new_evar = new (verifier_.arena()) EVar(location); 351 unresolved_locations_.push_back(UnresolvedLocation{ 352 new_evar, spec.spec, line_number, use_line_number, group_id(), 353 UnresolvedLocation::Kind::kAnchor, must_be_unambiguous, match_number}); 354 location_spec_stack_.pop_back(); 355 return new_evar; 356 } 357 358 AstNode* AssertionParser::CreateOffsetSpec(const yy::location& location, 359 bool at_end) { 360 size_t line_number = -1; 361 bool use_line_number = false; 362 bool must_be_unambiguous = false; 363 int match_number = -1; 364 if (!ValidateTopLocationSpec(location, &line_number, &use_line_number, 365 &must_be_unambiguous, &match_number)) { 366 return verifier_.empty_string_id(); 367 } 368 const auto& spec = location_spec_stack_.back(); 369 EVar* new_evar = new (verifier_.arena()) EVar(location); 370 unresolved_locations_.push_back(UnresolvedLocation{ 371 new_evar, spec.spec, line_number, use_line_number, group_id(), 372 at_end ? UnresolvedLocation::Kind::kOffsetEnd 373 : UnresolvedLocation::Kind::kOffsetBegin, 374 must_be_unambiguous, match_number}); 375 location_spec_stack_.pop_back(); 376 return new_evar; 377 } 378 379 bool AssertionParser::ResolveLocations(const yy::location& end_of_line, 380 size_t offset_after_endline, 381 bool end_of_file) { 382 bool was_ok = true; 383 std::vector<UnresolvedLocation> succ_lines; 384 for (auto& record : unresolved_locations_) { 385 EVar* evar = record.anchor_evar; 386 std::string& token = record.anchor_text; 387 yy::location location = evar->location(); 388 location.columns(token.size()); 389 if (record.use_line_number && 390 (record.line_number != end_of_line.begin.line)) { 391 if (end_of_file) { 392 Error(location, token + ":" + std::to_string(record.line_number) + 393 " not found before end of file."); 394 was_ok = false; 395 } else { 396 succ_lines.push_back(record); 397 } 398 continue; 399 } 400 size_t group_id = record.group_id; 401 auto col = line_.find(token); 402 if (col == std::string::npos) { 403 Error(location, token + " not found."); 404 was_ok = false; 405 continue; 406 } 407 if (record.must_be_unambiguous) { 408 if (line_.find(token, col + 1) != std::string::npos) { 409 Error(location, token + " is ambiguous."); 410 was_ok = false; 411 continue; 412 } 413 } else { 414 int match_number = 0; 415 while (match_number != record.match_number) { 416 col = line_.find(token, col + 1); 417 if (col == std::string::npos) { 418 break; 419 } 420 ++match_number; 421 } 422 if (match_number != record.match_number) { 423 Error(location, token + " has no match #" + 424 std::to_string(record.match_number) + "."); 425 was_ok = false; 426 continue; 427 } 428 } 429 size_t line_start = offset_after_endline - line_.size() - 1; 430 switch (record.kind) { 431 case UnresolvedLocation::Kind::kOffsetBegin: 432 if (evar->current()) { 433 Error(location, token + " already resolved."); 434 was_ok = false; 435 continue; 436 } 437 evar->set_current(verifier_.IdentifierFor( 438 location, std::to_string(line_start + col))); 439 break; 440 case UnresolvedLocation::Kind::kOffsetEnd: 441 if (evar->current()) { 442 Error(location, token + " already resolved."); 443 was_ok = false; 444 continue; 445 } 446 evar->set_current(verifier_.IdentifierFor( 447 location, std::to_string(line_start + col + token.size()))); 448 break; 449 case UnresolvedLocation::Kind::kAnchor: 450 if (default_inspect_) { 451 inspections_.emplace_back( 452 absl::StrCat("@", token, ":", location.begin.line, ".", col), 453 evar, Inspection::Kind::IMPLICIT); 454 } 455 AppendGoal(group_id, verifier_.MakePredicate( 456 location, verifier_.eq_id(), 457 {new (verifier_.arena()) 458 Range(location, line_start + col, 459 line_start + col + token.size(), 460 path_, root_, corpus_), 461 evar})); 462 break; 463 } 464 } 465 unresolved_locations_.swap(succ_lines); 466 ResetLine(); 467 return was_ok; 468 } 469 470 void AssertionParser::AppendToLine(const char* yytext) { line_.append(yytext); } 471 472 void AssertionParser::PushNode(AstNode* node) { node_stack_.push_back(node); } 473 474 AstNode** AssertionParser::PopNodes(size_t count) { 475 AstNode** nodes = (AstNode**)verifier_.arena()->New(count * sizeof(AstNode*)); 476 size_t start = node_stack_.size() - count; 477 for (size_t c = 0; c < count; ++c) { 478 nodes[c] = node_stack_[start + c]; 479 } 480 node_stack_.resize(start); 481 return nodes; 482 } 483 484 void AssertionParser::AppendGoal(size_t group_id, AstNode* goal) { 485 assert(group_id < groups_.size()); 486 groups_[group_id].goals.push_back(goal); 487 } 488 489 void AssertionParser::EnterGoalGroup(const yy::location& location, 490 bool negated) { 491 if (inside_goal_group_) { 492 Error(location, "It is not valid to enter nested goal groups."); 493 return; 494 } 495 inside_goal_group_ = true; 496 groups_.push_back( 497 GoalGroup{negated ? GoalGroup::kSomeMustFail : GoalGroup::kNoneMayFail}); 498 } 499 500 void AssertionParser::ExitGoalGroup(const yy::location& location) { 501 if (!inside_goal_group_) { 502 Error(location, "You've left a goal group before you've entered it."); 503 return; 504 } 505 inside_goal_group_ = false; 506 } 507 508 void AssertionParser::ScanBeginString(const RE2& goal_comment_regex, 509 const std::string& data, 510 bool trace_scanning) { 511 // Preprocess the input by adding a - to the left of every goal line and a 512 // . to the left of every non-goal line. From every goal line remove any 513 // character that is not part of the goal regex's capture group. This means 514 // that we don't have to push RE2 deeper into the lexer; it also preserves 515 // file locations for diagnostics (after taking into account the constant 516 // 1 offset). 517 std::string yy_buf; 518 size_t next_line_begin = 0; 519 auto append_line = [&](size_t line_end) { 520 absl::string_view match_region; 521 size_t line_length = line_end - next_line_begin; 522 auto is_goal = RE2::FullMatch( 523 absl::string_view(data.data() + next_line_begin, line_length), 524 goal_comment_regex, &match_region); 525 if (is_goal == 1) { 526 yy_buf.push_back('-'); 527 size_t pre_pad = match_region.data() - data.data() - next_line_begin; 528 for (size_t s = 0; s < pre_pad; ++s) { 529 yy_buf.push_back(' '); 530 } 531 yy_buf.append(match_region.data(), match_region.size()); 532 size_t post_pad = line_length - pre_pad - match_region.size(); 533 for (size_t s = 0; s < post_pad; ++s) { 534 yy_buf.push_back(' '); 535 } 536 } else { 537 yy_buf.push_back('.'); 538 yy_buf.append(data, next_line_begin, line_length); 539 } 540 if (line_end != data.size()) { 541 yy_buf.push_back('\n'); 542 } 543 next_line_begin = line_end + 1; 544 }; 545 auto endline = data.find('\n'); 546 while (endline != std::string::npos) { 547 append_line(endline); 548 endline = data.find('\n', next_line_begin); 549 } 550 append_line(data.size()); 551 SetScanBuffer(yy_buf, trace_scanning); 552 } 553 554 void AssertionParser::ScanBeginFile(const RE2& goal_comment_regex, 555 bool trace_scanning) { 556 if (file().empty() || file() == "-") { 557 Error("will not read goals from stdin"); 558 exit(EXIT_FAILURE); 559 } 560 std::string buffer = LoadFileOrDie(file()); 561 ScanBeginString(goal_comment_regex, buffer, trace_scanning); 562 } 563 564 } // namespace verifier 565 } // namespace kythe