kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/indexer/proto/file_descriptor_walker.cc (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "kythe/cxx/indexer/proto/file_descriptor_walker.h" 18 19 #include <optional> 20 21 #include "absl/log/check.h" 22 #include "absl/log/log.h" 23 #include "absl/status/statusor.h" 24 #include "absl/strings/str_cat.h" 25 #include "absl/strings/str_join.h" 26 #include "absl/strings/str_split.h" 27 #include "absl/strings/string_view.h" 28 #include "google/protobuf/descriptor.h" 29 #include "google/protobuf/repeated_field.h" 30 #include "kythe/cxx/common/kythe_metadata_file.h" 31 #include "kythe/cxx/common/schema/edges.h" 32 #include "kythe/cxx/indexer/proto/marked_source.h" 33 #include "kythe/cxx/indexer/proto/offset_util.h" 34 #include "kythe/cxx/indexer/proto/proto_graph_builder.h" 35 #include "kythe/proto/generated_message_info.pb.h" 36 #include "re2/re2.h" 37 38 namespace kythe { 39 namespace lang_proto { 40 namespace { 41 42 using ::google::protobuf::Descriptor; 43 using ::google::protobuf::DescriptorProto; 44 using ::google::protobuf::EnumDescriptor; 45 using ::google::protobuf::EnumDescriptorProto; 46 using ::google::protobuf::EnumValueDescriptor; 47 using ::google::protobuf::EnumValueDescriptorProto; 48 using ::google::protobuf::FieldDescriptor; 49 using ::google::protobuf::FieldDescriptorProto; 50 using ::google::protobuf::FileDescriptorProto; 51 using ::google::protobuf::MethodDescriptor; 52 using ::google::protobuf::MethodDescriptorProto; 53 using ::google::protobuf::OneofDescriptor; 54 using ::google::protobuf::ServiceDescriptor; 55 using ::google::protobuf::ServiceDescriptorProto; 56 using ::google::protobuf::SourceCodeInfo; 57 using ::kythe::proto::VName; 58 59 // Pushes a value onto a proto location lookup path, and automatically 60 // removes it when destroyed. See the documentation for 61 // proto2.Descriptor.SourceCodeInfo.Location.path for more information 62 // on how these paths work. 63 class ScopedLookup { 64 public: 65 // Does not take ownership of lookup_path; caller must ensure it 66 // stays around until this ScopedLookup is destroyed. 67 explicit ScopedLookup(std::vector<int>* lookup_path, int component) 68 : lookup_path_(lookup_path), component_(component) { 69 lookup_path->push_back(component); 70 } 71 ~ScopedLookup() { 72 CHECK(!lookup_path_->empty()); 73 CHECK_EQ(component_, lookup_path_->back()); 74 lookup_path_->pop_back(); 75 } 76 77 private: 78 std::vector<int>* lookup_path_; 79 const int component_; 80 }; 81 82 std::optional<absl::string_view> TypeName(const EnumDescriptor& desc) { 83 return desc.name(); 84 } 85 86 std::optional<absl::string_view> TypeName(const Descriptor& desc) { 87 return desc.name(); 88 } 89 90 std::optional<absl::string_view> TypeName(const FieldDescriptor& field) { 91 if (field.is_map()) { 92 return std::nullopt; 93 } 94 if (const EnumDescriptor* desc = field.enum_type()) { 95 return TypeName(*desc); 96 } 97 if (const Descriptor* desc = field.message_type()) { 98 return TypeName(*desc); 99 } 100 return std::nullopt; 101 } 102 103 template <typename DescriptorType> 104 void TruncateLocationToTypeName(Location& location, 105 const DescriptorType& desc) { 106 std::optional<absl::string_view> type_name = TypeName(desc); 107 if (!type_name.has_value() || location.end <= location.begin || 108 (location.end - location.begin) <= type_name->size()) { 109 return; 110 } 111 location.begin = (location.end - type_name->size()); 112 } 113 114 } // namespace 115 116 int FileDescriptorWalker::ComputeByteOffset(int line_number, 117 int column_number) const { 118 int byte_offset_of_start_of_line = 119 line_index_.ComputeByteOffset(line_number, 0); 120 absl::string_view line_text = line_index_.GetLine(line_number); 121 int byte_offset_into_line = 122 ByteOffsetOfTabularColumn(line_text, column_number); 123 if (byte_offset_into_line < 0) { 124 return byte_offset_into_line; 125 } 126 return byte_offset_of_start_of_line + byte_offset_into_line; 127 } 128 129 Location FileDescriptorWalker::LocationOfLeadingComments( 130 const Location& entity_location, int entity_start_line, 131 int entity_start_column, const std::string& comments) const { 132 int line_offset_of_entity = ByteOffsetOfTabularColumn( 133 line_index_.GetLine(entity_start_line), entity_start_column); 134 if (line_offset_of_entity < 0) { 135 return entity_location; 136 } 137 Location comment_location; 138 comment_location.file = entity_location.file; 139 comment_location.begin = entity_location.begin - line_offset_of_entity; 140 comment_location.end = entity_location.begin - line_offset_of_entity - 1; 141 int next_line_number = entity_start_line - 1; 142 absl::string_view bottom_line = line_index_.GetLine(next_line_number); 143 while (RE2::FullMatch(bottom_line, R"((\s*\*/?\s*)|(\s*//\n))")) { 144 comment_location.begin -= bottom_line.size(); 145 --next_line_number; 146 bottom_line = line_index_.GetLine(next_line_number); 147 } 148 std::vector<std::string> comment_lines = absl::StrSplit(comments, '\n'); 149 while (!comment_lines.empty() && comment_lines.back().empty()) { 150 comment_lines.pop_back(); 151 } 152 while (!comment_lines.empty()) { 153 const std::string& comment_line = comment_lines.back(); 154 absl::string_view actual_line = line_index_.GetLine(next_line_number); 155 std::string comment_re = 156 absl::StrCat(R"(\s*(?://|/?\*\s*))", RE2::QuoteMeta(comment_line), 157 R"(\s*(?:\*/)?\s*)"); 158 if (!RE2::FullMatch(actual_line, comment_re)) { 159 LOG(ERROR) << "Leading comment line mismatch: [" << comment_line 160 << "] vs. [" << actual_line << "]" 161 << "(line " << next_line_number << ")"; 162 return comment_location; 163 } 164 comment_location.begin -= actual_line.size(); 165 --next_line_number; 166 comment_lines.pop_back(); 167 } 168 return comment_location; 169 } 170 171 Location FileDescriptorWalker::LocationOfTrailingComments( 172 const Location& entity_location, int entity_start_line, 173 int entity_start_column, const std::string& comments) const { 174 Location comment_location; 175 comment_location.file = entity_location.file; 176 std::vector<std::string> comment_lines = absl::StrSplit(comments, '\n'); 177 while (!comment_lines.empty() && comment_lines.back().empty()) { 178 comment_lines.pop_back(); 179 } 180 if (comment_lines.empty()) { 181 LOG(ERROR) << "Trailing comment listed as present but was empty."; 182 return entity_location; 183 } 184 std::string top_comment_line_re = absl::StrCat( 185 R"(\s*(?:/\*|//)\s*)", RE2::QuoteMeta(comment_lines.front())); 186 int line_number = entity_start_line; 187 for (; line_number <= line_index_.line_count(); ++line_number) { 188 absl::string_view entity_line = line_index_.GetLine(line_number); 189 absl::string_view comment_start; 190 if (RE2::PartialMatch(entity_line, R"((\s*(?:/\*|//)))", &comment_start)) { 191 comment_location.begin = line_index_.ComputeByteOffset(line_number, 0) + 192 (comment_start.data() - entity_line.data()); 193 comment_location.end = 194 line_index_.ComputeByteOffset(line_number + 1, 0) - 1; 195 if (RE2::PartialMatch(entity_line, top_comment_line_re)) { 196 comment_lines.erase(comment_lines.begin()); 197 } 198 break; 199 } 200 } 201 if (line_number > line_index_.line_count()) { 202 LOG(ERROR) << "Never found trailing comment \"" << comments << "\""; 203 return entity_location; 204 } 205 ++line_number; 206 for (const std::string& comment_line : comment_lines) { 207 absl::string_view actual_line = line_index_.GetLine(line_number); 208 std::string comment_re = 209 absl::StrCat(R"(\s*(?://|/?\*\s*))", RE2::QuoteMeta(comment_line), 210 R"(\s*(?:\*/)?\s*)"); 211 if (!RE2::FullMatch(actual_line, comment_re)) { 212 LOG(ERROR) << "Trailing comment line mismatch: [" << comment_line 213 << "] vs. [" << actual_line << "]" 214 << "(line " << line_number << ")"; 215 return comment_location; 216 } 217 comment_location.end += actual_line.size(); 218 ++line_number; 219 } 220 221 absl::string_view bottom_line = line_index_.GetLine(line_number); 222 while (RE2::FullMatch(bottom_line, R"(\s*\*/?\s*)")) { 223 comment_location.end += bottom_line.size(); 224 ++line_number; 225 bottom_line = line_index_.GetLine(line_number); 226 } 227 return comment_location; 228 } 229 230 absl::StatusOr<PartialLocation> FileDescriptorWalker::ParseLocation( 231 const std::vector<int>& span) const { 232 PartialLocation location; 233 if (span.size() == 4) { 234 location.start_line = span[0] + 1; 235 location.end_line = span[2] + 1; 236 location.start_column = span[1]; 237 location.end_column = span[3]; 238 } else if (span.size() == 3) { 239 location.start_line = span[0] + 1; 240 location.end_line = span[0] + 1; 241 location.start_column = span[1]; 242 location.end_column = span[2]; 243 } else { 244 return absl::UnknownError(""); 245 } 246 return location; 247 } 248 249 void FileDescriptorWalker::InitializeLocation(const std::vector<int>& span, 250 Location* loc) { 251 loc->file = file_name_; 252 absl::StatusOr<PartialLocation> possible_location = ParseLocation(span); 253 if (possible_location.ok()) { 254 PartialLocation partial_location = *possible_location; 255 loc->begin = ComputeByteOffset(partial_location.start_line, 256 partial_location.start_column); 257 loc->end = ComputeByteOffset(partial_location.end_line, 258 partial_location.end_column); 259 } else { 260 // Some error in the span, create a dummy location for now 261 // Happens in case of proto1 files 262 LOG(ERROR) << "Unexpected location vector [" << absl::StrJoin(span, ":") 263 << "] while walking " << file_name_.path(); 264 loc->begin = 0; 265 loc->end = 0; 266 } 267 } 268 269 void FileDescriptorWalker::BuildLocationMap( 270 const SourceCodeInfo& source_code_info) { 271 for (int i = 0; i < source_code_info.location_size(); i++) { 272 const SourceCodeInfo::Location& location = source_code_info.location(i); 273 std::vector<int> path(location.path().begin(), location.path().end()); 274 std::vector<int> span(location.span().begin(), location.span().end()); 275 location_map_[path] = span; 276 path_location_map_[path] = location; 277 } 278 } 279 280 void FileDescriptorWalker::VisitImports() { 281 { 282 // Direct dependencies, from `import "foo.proto"` statements. 283 std::vector<int> path = {FileDescriptorProto::kDependencyFieldNumber}; 284 for (int i = 0; i < file_descriptor_->dependency_count(); i++) { 285 ScopedLookup import_lookup(&path, i); 286 Location location; 287 InitializeLocation(location_map_[path], &location); 288 builder_->AddImport(file_descriptor_->dependency(i)->name(), location); 289 } 290 } 291 { 292 // Weak dependencies, from `import weak "foo.proto"` statements. 293 std::vector<int> path = {FileDescriptorProto::kWeakDependencyFieldNumber}; 294 for (int i = 0; i < file_descriptor_->weak_dependency_count(); i++) { 295 ScopedLookup import_lookup(&path, i); 296 Location location; 297 InitializeLocation(location_map_[path], &location); 298 builder_->AddImport(file_descriptor_->weak_dependency(i)->name(), 299 location); 300 } 301 } 302 { 303 // Public dependencies, from `import public "foo.proto"` statements 304 std::vector<int> path = {FileDescriptorProto::kPublicDependencyFieldNumber}; 305 for (int i = 0; i < file_descriptor_->public_dependency_count(); i++) { 306 ScopedLookup import_lookup(&path, i); 307 Location location; 308 InitializeLocation(location_map_[path], &location); 309 builder_->AddImport(file_descriptor_->public_dependency(i)->name(), 310 location); 311 } 312 } 313 } 314 315 namespace { 316 std::string SignAnnotation( 317 const google::protobuf::GeneratedCodeInfo::Annotation& annotation) { 318 return absl::StrJoin(annotation.path(), "."); 319 } 320 321 VName VNameForAnnotation( 322 const VName& context_vname, 323 const google::protobuf::GeneratedCodeInfo::Annotation& annotation) { 324 VName out; 325 out.set_corpus(context_vname.corpus()); 326 out.set_path(annotation.source_file()); 327 out.set_signature(SignAnnotation(annotation)); 328 out.set_language(kLanguageName); 329 return out; 330 } 331 } // anonymous namespace 332 333 void FileDescriptorWalker::VisitGeneratedProtoInfo() { 334 if (!file_descriptor_->options().HasExtension(proto::generated_proto_info)) { 335 return; 336 } 337 const google::protobuf::GeneratedCodeInfo& info = 338 file_descriptor_->options() 339 .GetExtension(proto::generated_proto_info) 340 .generated_code_info(); 341 342 std::vector<MetadataFile::Rule> rules; 343 int file_rule = -1; 344 for (const auto& annotation : info.annotation()) { 345 MetadataFile::Rule rule{}; 346 rule.whole_file = false; 347 rule.begin = annotation.begin(); 348 rule.end = annotation.end(); 349 rule.vname = VNameForAnnotation(file_name_, annotation); 350 rule.edge_in = kythe::common::schema::kDefinesBinding; 351 rule.edge_out = kythe::common::schema::kGenerates; 352 rule.reverse_edge = true; 353 rule.generate_anchor = false; 354 rule.anchor_begin = 0; 355 rule.anchor_end = 0; 356 rules.push_back(rule); 357 if (!rule.vname.path().empty()) { 358 if (file_rule < 0 || rule.begin > rules[file_rule].begin) { 359 file_rule = rules.size() - 1; 360 } 361 } 362 } 363 364 // Add a file-scoped rule for the last encountered vname. 365 if (file_rule >= 0) { 366 MetadataFile::Rule rule{}; 367 rule.whole_file = true; 368 rule.vname = rules[file_rule].vname; 369 rule.vname.set_signature(""); 370 rule.vname.set_language(""); 371 rule.edge_out = kythe::common::schema::kGenerates; 372 rule.reverse_edge = true; 373 rule.generate_anchor = false; 374 rules.push_back(rule); 375 } 376 377 auto meta = MetadataFile::LoadFromRules(file_name_.path(), rules.begin(), 378 rules.end()); 379 builder_->SetMetadata(std::move(meta)); 380 builder_->MaybeAddMetadataFileRules(file_name_); 381 } 382 383 namespace { 384 std::optional<proto::VName> VNameForBuiltinType(FieldDescriptor::Type type) { 385 // TODO(zrlk): Emit builtins. 386 return std::nullopt; 387 } 388 } // anonymous namespace 389 390 std::optional<proto::VName> FileDescriptorWalker::VNameForFieldType( 391 const FieldDescriptor* field_proto) { 392 if (field_proto->is_map()) { 393 // Maps are technically TYPE_MESSAGE, but don't have a useful VName. 394 return std::nullopt; 395 } 396 if (field_proto->type() == FieldDescriptor::TYPE_MESSAGE || 397 field_proto->type() == FieldDescriptor::TYPE_GROUP) { 398 return builder_->VNameForDescriptor(field_proto->message_type()); 399 } else if (field_proto->type() == FieldDescriptor::TYPE_ENUM) { 400 return builder_->VNameForDescriptor(field_proto->enum_type()); 401 } else { 402 return VNameForBuiltinType(field_proto->type()); 403 } 404 } 405 406 void FileDescriptorWalker::AttachMarkedSource( 407 const proto::VName& vname, const std::optional<MarkedSource>& code) { 408 if (code) { 409 builder_->AddCodeFact(vname, *code); 410 } 411 } 412 413 void FileDescriptorWalker::VisitField(const std::string* parent_name, 414 const VName* parent, 415 const std::string& message_name, 416 const VName& message, 417 const FieldDescriptor* field, 418 std::vector<int> lookup_path) { 419 std::string vname = absl::StrCat(message_name, ".", field->name()); 420 VName v_name = builder_->VNameForDescriptor(field); 421 AddComments(v_name, lookup_path); 422 423 { 424 // Get location of declaration and add as Grok binding 425 ScopedLookup name_num(&lookup_path, FieldDescriptorProto::kNameFieldNumber); 426 const std::vector<int>& span = location_map_[lookup_path]; 427 Location location; 428 InitializeLocation(span, &location); 429 430 VName oneof; 431 bool in_oneof = false; 432 if (field->containing_oneof() != nullptr) { 433 in_oneof = true; 434 oneof = builder_->VNameForDescriptor(field->containing_oneof()); 435 } 436 437 builder_->AddFieldToMessage(parent, message, in_oneof ? &oneof : nullptr, 438 v_name, location); 439 } 440 441 AttachMarkedSource(v_name, 442 GenerateMarkedSourceForDescriptor(field, builder_)); 443 444 // Check for [deprecated=true] annotations and emit deprecation tags. 445 if (field->options().deprecated()) { 446 builder_->SetDeprecated(v_name); 447 } 448 449 Location type_location; 450 { 451 ScopedLookup type_num(&lookup_path, 452 FieldDescriptorProto::kTypeNameFieldNumber); 453 if (location_map_.find(lookup_path) == location_map_.end()) { 454 // the type was primitive, ignore for now 455 return; 456 } 457 const std::vector<int>& type_span = location_map_[lookup_path]; 458 InitializeLocation(type_span, &type_location); 459 460 // If we're in a message or enum type, decorate only the span 461 // covering the type name itself, not the full package name. 462 // This is consistent with other languages and avoids the possibility 463 // of a multi-line span, which some UIs have problems with. 464 TruncateLocationToTypeName(type_location, *field); 465 } 466 if (auto type = VNameForFieldType(field)) { 467 // TODO: add value_type back in at some point. 468 // Add reference for this field's type. We assume it to be output 469 // processing a dependency, but in the worst case this might introduce 470 // an edge to no VName (presumably in turn introducing a Lost node). 471 builder_->AddReference(*type, type_location); 472 builder_->AddTyping(v_name, *type); 473 } 474 475 if (field->is_map()) { 476 // Map key/value types do not have SourceCodeInfo locations; we have to 477 // find them within the outer "map<...>" type location. 478 absl::string_view content = absl::string_view(content_); 479 absl::string_view type_name = content.substr( 480 type_location.begin, type_location.end - type_location.begin); 481 absl::string_view key, val; 482 if (RE2::FullMatch(type_name, R"(\s*map\s*<\s*(\S+)\s*,\s*(\S+)\s*>\s*)", 483 &key, &val)) { 484 // Add references to map type components. 485 if (auto key_type = VNameForFieldType(field->message_type()->field(0))) { 486 size_t key_start = key.data() - content.data(); 487 builder_->AddReference( 488 *key_type, {type_location.file, key_start, key_start + key.size()}); 489 } 490 491 if (auto val_type = VNameForFieldType(field->message_type()->field(1))) { 492 size_t val_start = val.data() - content.data(); 493 builder_->AddReference( 494 *val_type, {type_location.file, val_start, val_start + val.size()}); 495 } 496 // TODO(schroederc): emit map type node 497 } 498 } 499 500 if (field->has_default_value()) { 501 const EnumValueDescriptor* default_value = field->default_value_enum(); 502 VName value = builder_->VNameForDescriptor(default_value); 503 // Find reference location 504 ScopedLookup default_num(&lookup_path, 505 FieldDescriptorProto::kDefaultValueFieldNumber); 506 507 const std::vector<int>& value_span = location_map_[lookup_path]; 508 Location value_location; 509 InitializeLocation(value_span, &value_location); 510 builder_->AddReference(value, value_location); 511 } 512 } 513 514 void FileDescriptorWalker::VisitFields(const std::string& message_name, 515 const Descriptor* dp, 516 std::vector<int> lookup_path) { 517 VName message = VNameForProtoPath(file_name_, lookup_path); 518 if (visited_messages_.find(URI(message).ToString()) != 519 visited_messages_.end()) { 520 return; 521 } 522 visited_messages_.insert(URI(message).ToString()); 523 { 524 ScopedLookup field_num(&lookup_path, DescriptorProto::kFieldFieldNumber); 525 for (int i = 0; i < dp->field_count(); i++) { 526 ScopedLookup field_index(&lookup_path, i); 527 528 VisitField(&message_name, &message, message_name, message, dp->field(i), 529 lookup_path); 530 } 531 } 532 { 533 ScopedLookup extension_num(&lookup_path, 534 DescriptorProto::kExtensionFieldNumber); 535 for (int i = 0; i < dp->extension_count(); i++) { 536 ScopedLookup extension_index(&lookup_path, i); 537 VisitExtension(&message_name, &message, dp->extension(i), lookup_path); 538 } 539 } 540 } 541 542 void FileDescriptorWalker::VisitNestedEnumTypes(const std::string& message_name, 543 const VName* message, 544 const Descriptor* dp, 545 std::vector<int> lookup_path) { 546 ScopedLookup enum_num(&lookup_path, DescriptorProto::kEnumTypeFieldNumber); 547 for (int i = 0; i < dp->enum_type_count(); i++) { 548 const EnumDescriptor* nested_proto = dp->enum_type(i); 549 550 // Get the path that corresponds to the name of the enum 551 ScopedLookup enum_index(&lookup_path, i); 552 553 std::string vname = absl::StrCat(message_name, ".", nested_proto->name()); 554 555 VName v_name = builder_->VNameForDescriptor(nested_proto); 556 AddComments(v_name, lookup_path); 557 558 { 559 ScopedLookup name_num(&lookup_path, 560 EnumDescriptorProto::kNameFieldNumber); 561 const std::vector<int>& span = location_map_[lookup_path]; 562 Location location; 563 InitializeLocation(span, &location); 564 565 builder_->AddEnumType(message, v_name, location); 566 if (nested_proto->options().deprecated()) { 567 builder_->SetDeprecated(v_name); 568 } 569 AttachMarkedSource( 570 v_name, GenerateMarkedSourceForDescriptor(nested_proto, builder_)); 571 } 572 573 // Visit values 574 VisitEnumValues(nested_proto, &v_name, lookup_path); 575 } 576 } 577 578 void FileDescriptorWalker::VisitNestedTypes(const std::string& message_name, 579 const VName* message, 580 const Descriptor* dp, 581 std::vector<int> lookup_path) { 582 ScopedLookup nested_type_num(&lookup_path, 583 DescriptorProto::kNestedTypeFieldNumber); 584 585 for (int i = 0; i < dp->nested_type_count(); i++) { 586 ScopedLookup nested_index(&lookup_path, i); 587 const Descriptor* nested_proto = dp->nested_type(i); 588 589 // The proto compiler synthesizes types to represent map entries. For 590 // example, a "map<string, string> my_map" field would cause a type 591 // "MyMapEntry" to be generated. Because it doesn't actually exist in the 592 // source .proto file, we ignore it. 593 if (nested_proto->options().map_entry()) { 594 continue; 595 } 596 597 std::string vname = absl::StrCat(message_name, ".", nested_proto->name()); 598 599 VName v_name = VNameForProtoPath(file_name_, lookup_path); 600 AddComments(v_name, lookup_path); 601 602 { 603 // Also push kNameFieldNumber for location of declaration 604 ScopedLookup name_num(&lookup_path, DescriptorProto::kNameFieldNumber); 605 606 const std::vector<int>& span = location_map_[lookup_path]; 607 Location location; 608 InitializeLocation(span, &location); 609 610 builder_->AddMessageType(message, v_name, location); 611 if (nested_proto->options().deprecated()) { 612 builder_->SetDeprecated(v_name); 613 } 614 AttachMarkedSource( 615 v_name, GenerateMarkedSourceForDescriptor(nested_proto, builder_)); 616 } 617 618 // Need to visit nested enum and message types first! 619 VisitNestedTypes(vname, &v_name, nested_proto, lookup_path); 620 VisitNestedEnumTypes(vname, &v_name, nested_proto, lookup_path); 621 VisitOneofs(vname, v_name, nested_proto, lookup_path); 622 } 623 } 624 625 void FileDescriptorWalker::VisitOneofs(const std::string& message_name, 626 const VName& message, 627 const Descriptor* dp, 628 std::vector<int> lookup_path) { 629 ScopedLookup nested_type_num(&lookup_path, 630 DescriptorProto::kOneofDeclFieldNumber); 631 632 for (int i = 0; i < dp->oneof_decl_count(); i++) { 633 ScopedLookup nested_index(&lookup_path, i); 634 const OneofDescriptor* oneof = dp->oneof_decl(i); 635 std::string vname = absl::StrCat(message_name, ".", oneof->name()); 636 637 VName v_name = builder_->VNameForDescriptor(oneof); 638 AddComments(v_name, lookup_path); 639 640 { 641 // TODO: verify that this is correct for oneofs 642 ScopedLookup name_num(&lookup_path, DescriptorProto::kNameFieldNumber); 643 644 const std::vector<int>& span = location_map_[lookup_path]; 645 Location location; 646 InitializeLocation(span, &location); 647 648 builder_->AddOneofToMessage(message, v_name, location); 649 AttachMarkedSource(v_name, 650 GenerateMarkedSourceForDescriptor(oneof, builder_)); 651 } 652 653 // No need to add fields; they're also fields of the message 654 } 655 } 656 657 void FileDescriptorWalker::VisitMessagesAndEnums(const std::string* ns_name, 658 const VName* ns) { 659 std::vector<int> lookup_path; 660 for (int i = 0; i < file_descriptor_->message_type_count(); i++) { 661 ScopedLookup message_num(&lookup_path, 662 FileDescriptorProto::kMessageTypeFieldNumber); 663 664 const Descriptor* dp = file_descriptor_->message_type(i); 665 666 ScopedLookup message_index(&lookup_path, i); 667 668 std::string vname = dp->name(); 669 if (ns_name != nullptr) { 670 vname = absl::StrCat(*ns_name, ".", vname); 671 } 672 673 VName v_name = VNameForProtoPath(file_name_, lookup_path); 674 AddComments(v_name, lookup_path); 675 676 { 677 ScopedLookup name_num(&lookup_path, DescriptorProto::kNameFieldNumber); 678 const std::vector<int>& span = location_map_[lookup_path]; 679 Location location; 680 InitializeLocation(span, &location); 681 682 builder_->AddMessageType(ns, v_name, location); 683 AttachMarkedSource(v_name, 684 GenerateMarkedSourceForDescriptor(dp, builder_)); 685 if (dp->options().deprecated()) { 686 builder_->SetDeprecated(v_name); 687 } 688 } 689 690 // Visit nested types first and fields later for easy type resolution 691 VisitNestedTypes(vname, &v_name, dp, lookup_path); 692 VisitNestedEnumTypes(vname, &v_name, dp, lookup_path); 693 VisitOneofs(vname, v_name, dp, lookup_path); 694 } 695 696 // Add top-level ENUM bindings 697 for (int i = 0; i < file_descriptor_->enum_type_count(); i++) { 698 ScopedLookup enum_num(&lookup_path, 699 FileDescriptorProto::kEnumTypeFieldNumber); 700 const EnumDescriptor* dp = file_descriptor_->enum_type(i); 701 ScopedLookup enum_index(&lookup_path, i); 702 703 std::string vname = dp->name(); 704 if (ns_name != nullptr) { 705 vname = absl::StrCat(*ns_name, ".", vname); 706 } 707 VName v_name = builder_->VNameForDescriptor(dp); 708 AddComments(v_name, lookup_path); 709 710 { 711 ScopedLookup name_num(&lookup_path, 712 EnumDescriptorProto::kNameFieldNumber); 713 const std::vector<int>& span = location_map_[lookup_path]; 714 Location location; 715 InitializeLocation(span, &location); 716 717 builder_->AddEnumType(ns, v_name, location); 718 AttachMarkedSource(v_name, 719 GenerateMarkedSourceForDescriptor(dp, builder_)); 720 } 721 722 // Visit enum values and add kythe bindings for them 723 VisitEnumValues(dp, &v_name, lookup_path); 724 } 725 } 726 727 void FileDescriptorWalker::VisitEnumValues(const EnumDescriptor* dp, 728 const VName* enum_node, 729 std::vector<int> lookup_path) { 730 ScopedLookup value_num(&lookup_path, EnumDescriptorProto::kValueFieldNumber); 731 732 for (int j = 0; j < dp->value_count(); j++) { 733 const EnumValueDescriptor* val_dp = dp->value(j); 734 735 ScopedLookup value_index(&lookup_path, j); 736 VName v_name = builder_->VNameForDescriptor(val_dp); 737 AddComments(v_name, lookup_path); 738 739 ScopedLookup name_num(&lookup_path, 740 EnumValueDescriptorProto::kNameFieldNumber); 741 Location value_location; 742 InitializeLocation(location_map_[lookup_path], &value_location); 743 744 builder_->AddValueToEnum(*enum_node, v_name, value_location); 745 if (val_dp->options().deprecated()) { 746 builder_->SetDeprecated(v_name); 747 } 748 AttachMarkedSource(v_name, 749 GenerateMarkedSourceForDescriptor(val_dp, builder_)); 750 } 751 } 752 753 void FileDescriptorWalker::VisitAllFields(const std::string* ns_name, 754 const VName* ns) { 755 std::vector<int> lookup_path; 756 { 757 ScopedLookup message_num(&lookup_path, 758 FileDescriptorProto::kMessageTypeFieldNumber); 759 760 // For each top-level message in the file, add the field bindings 761 for (int i = 0; i < file_descriptor_->message_type_count(); i++) { 762 const Descriptor* dp = file_descriptor_->message_type(i); 763 std::string vname = dp->name(); 764 if (ns_name != nullptr) { 765 vname = *ns_name + "." + vname; 766 } 767 768 ScopedLookup message_index(&lookup_path, i); 769 770 // Visit fields within the message 771 VisitFields(vname, dp, lookup_path); 772 773 // Visit fields in nested mesages 774 VisitNestedFields(vname, dp, lookup_path); 775 } 776 } 777 778 { 779 ScopedLookup extension_num(&lookup_path, 780 FileDescriptorProto::kExtensionFieldNumber); 781 782 // For each top-level extension in the file, add the field bindings 783 for (int i = 0; i < file_descriptor_->extension_count(); i++) { 784 ScopedLookup extension_index(&lookup_path, i); 785 VisitExtension(ns_name, ns, file_descriptor_->extension(i), lookup_path); 786 } 787 } 788 } 789 790 void FileDescriptorWalker::VisitExtension(const std::string* parent_name, 791 const VName* parent, 792 const FieldDescriptor* field, 793 std::vector<int> lookup_path) { 794 std::string message_name = field->containing_type()->full_name(); 795 VName message = builder_->VNameForDescriptor(field->containing_type()); 796 { 797 // In a block like this: 798 // extend A { 799 // optional string b = 1; 800 // optional string c = 2; 801 // } 802 // 803 // Link the name of the extended message "A" to the original 804 // definition. Each of "b" and "c" will generate this reference 805 // which can result in duplicate references if more than one 806 // field is declared in a single extend block. 807 ScopedLookup extendee_num(&lookup_path, 808 FieldDescriptorProto::kExtendeeFieldNumber); 809 const std::vector<int>& extendee_span = location_map_[lookup_path]; 810 Location extendee_location; 811 InitializeLocation(extendee_span, &extendee_location); 812 builder_->AddReference(message, extendee_location); 813 } 814 815 VisitField(parent_name, parent, message_name, message, field, lookup_path); 816 } 817 818 void FileDescriptorWalker::VisitNestedFields(const std::string& name_prefix, 819 const Descriptor* dp, 820 std::vector<int> lookup_path) { 821 ScopedLookup nested_num(&lookup_path, 822 DescriptorProto::kNestedTypeFieldNumber); 823 824 for (int j = 0; j < dp->nested_type_count(); j++) { 825 const Descriptor* nested_dp = dp->nested_type(j); 826 const std::string nested_name_prefix = 827 absl::StrCat(name_prefix, ".", nested_dp->name()); 828 829 // The proto compiler synthesizes types to represent map entries. For 830 // example, a "map<string, string> my_map" field would cause a type 831 // "MyMapEntry" to be generated. Because it doesn't actually exist in the 832 // source .proto file, we ignore it. 833 if (nested_dp->options().map_entry()) { 834 continue; 835 } 836 837 ScopedLookup nested_index(&lookup_path, j); 838 839 // Visit fields within the message 840 VisitFields(nested_name_prefix, nested_dp, lookup_path); 841 842 // Visit fields in nested mesages 843 VisitNestedFields(nested_name_prefix, nested_dp, lookup_path); 844 } 845 } 846 847 void FileDescriptorWalker::AddComments(const VName& v_name, 848 const std::vector<int>& path) { 849 auto protoc_iter = path_location_map_.find(path); 850 if (protoc_iter == path_location_map_.end()) { 851 return; 852 } 853 const auto& protoc_location = protoc_iter->second; 854 absl::StatusOr<PartialLocation> readable_location = 855 ParseLocation(location_map_[path]); 856 if (!readable_location.ok()) { 857 return; 858 } 859 Location entity_location; 860 InitializeLocation(location_map_[path], &entity_location); 861 PartialLocation coordinates = *readable_location; 862 if (protoc_location.has_leading_comments()) { 863 Location comment_location = LocationOfLeadingComments( 864 entity_location, coordinates.start_line, coordinates.start_column, 865 protoc_location.leading_comments()); 866 builder_->AddDocComment(v_name, comment_location); 867 } 868 if (protoc_location.has_trailing_comments()) { 869 Location comment_location = LocationOfTrailingComments( 870 entity_location, coordinates.start_line, coordinates.start_column, 871 protoc_location.trailing_comments()); 872 builder_->AddDocComment(v_name, comment_location); 873 } 874 } 875 876 void FileDescriptorWalker::VisitRpcServices(const std::string* ns_name, 877 const VName* ns) { 878 std::vector<int> lookup_path; 879 ScopedLookup service_num(&lookup_path, 880 FileDescriptorProto::kServiceFieldNumber); 881 for (int i = 0; i < file_descriptor_->service_count(); i++) { 882 const ServiceDescriptor* dp = file_descriptor_->service(i); 883 ScopedLookup service_index(&lookup_path, i); 884 885 std::string service_vname = dp->name(); 886 if (ns_name != nullptr) { 887 service_vname = absl::StrCat(*ns_name, ".", service_vname); 888 } 889 VName v_name = builder_->VNameForDescriptor(dp); 890 AddComments(v_name, lookup_path); 891 892 { 893 ScopedLookup name_num(&lookup_path, 894 ServiceDescriptorProto::kNameFieldNumber); 895 const std::vector<int>& span = location_map_[lookup_path]; 896 Location location; 897 InitializeLocation(span, &location); 898 899 builder_->AddService(ns, v_name, location); 900 AttachMarkedSource(v_name, 901 GenerateMarkedSourceForDescriptor(dp, builder_)); 902 } 903 904 // Visit methods 905 ScopedLookup method_num(&lookup_path, 906 ServiceDescriptorProto::kMethodFieldNumber); 907 for (int j = 0; j < dp->method_count(); j++) { 908 const MethodDescriptor* method_dp = dp->method(j); 909 ScopedLookup method_index(&lookup_path, j); 910 std::string method_vname = 911 absl::StrCat(service_vname, ".", method_dp->name()); 912 VName method = builder_->VNameForDescriptor(method_dp); 913 AddComments(method, lookup_path); 914 915 { 916 // Add method as a declaration 917 ScopedLookup name_num(&lookup_path, 918 MethodDescriptorProto::kNameFieldNumber); 919 Location method_location; 920 InitializeLocation(location_map_[lookup_path], &method_location); 921 AttachMarkedSource( 922 method, GenerateMarkedSourceForDescriptor(method_dp, builder_)); 923 builder_->AddMethodToService(v_name, method, method_location); 924 } 925 926 VName input_sig; 927 { 928 // Add rpc method's input argument 929 ScopedLookup input_num(&lookup_path, 930 MethodDescriptorProto::kInputTypeFieldNumber); 931 Location input_location; 932 InitializeLocation(location_map_[lookup_path], &input_location); 933 const Descriptor* input = method_dp->input_type(); 934 // Only decorate the type name, not the full <package>.<type> span. 935 TruncateLocationToTypeName(input_location, *input); 936 937 input_sig = builder_->VNameForDescriptor(input); 938 builder_->AddArgumentToMethod(method, input_sig, input_location); 939 } 940 941 VName output_sig; 942 { 943 // Add rpc method's output argument 944 ScopedLookup output_num(&lookup_path, 945 MethodDescriptorProto::kOutputTypeFieldNumber); 946 Location output_location; 947 InitializeLocation(location_map_[lookup_path], &output_location); 948 const Descriptor* output = method_dp->output_type(); 949 // Only decorate the type name, not the full <package>.<type> span. 950 TruncateLocationToTypeName(output_location, *output); 951 952 output_sig = builder_->VNameForDescriptor(output); 953 builder_->AddArgumentToMethod(method, output_sig, output_location); 954 } 955 builder_->AddMethodType(method, input_sig, output_sig); 956 } 957 } 958 } 959 960 void FileDescriptorWalker::PopulateCodeGraph() { 961 BuildLocationMap(*source_code_info_); 962 VisitGeneratedProtoInfo(); 963 VisitImports(); 964 965 const VName* ns = nullptr; 966 const std::string* ns_name = nullptr; 967 VName v_name; 968 const std::string& package = file_descriptor_->package(); 969 if (!package.empty()) { 970 std::vector<int> lookup_path; 971 ScopedLookup package_num(&lookup_path, 972 FileDescriptorProto::kPackageFieldNumber); 973 const std::vector<int>& span = location_map_[lookup_path]; 974 Location location; 975 InitializeLocation(span, &location); 976 v_name.set_language(kLanguageName); 977 v_name.set_corpus(file_name_.corpus()); 978 v_name.set_signature(package); 979 builder_->AddNamespace(v_name, location); 980 ns = &v_name; 981 ns_name = &package; 982 } 983 984 VisitMessagesAndEnums(ns_name, ns); 985 VisitAllFields(ns_name, ns); 986 VisitRpcServices(ns_name, ns); 987 } 988 989 } // namespace lang_proto 990 } // namespace kythe