kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/indexer/proto/file_descriptor_walker.cc (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "kythe/cxx/indexer/proto/file_descriptor_walker.h"
    18  
    19  #include <optional>
    20  
    21  #include "absl/log/check.h"
    22  #include "absl/log/log.h"
    23  #include "absl/status/statusor.h"
    24  #include "absl/strings/str_cat.h"
    25  #include "absl/strings/str_join.h"
    26  #include "absl/strings/str_split.h"
    27  #include "absl/strings/string_view.h"
    28  #include "google/protobuf/descriptor.h"
    29  #include "google/protobuf/repeated_field.h"
    30  #include "kythe/cxx/common/kythe_metadata_file.h"
    31  #include "kythe/cxx/common/schema/edges.h"
    32  #include "kythe/cxx/indexer/proto/marked_source.h"
    33  #include "kythe/cxx/indexer/proto/offset_util.h"
    34  #include "kythe/cxx/indexer/proto/proto_graph_builder.h"
    35  #include "kythe/proto/generated_message_info.pb.h"
    36  #include "re2/re2.h"
    37  
    38  namespace kythe {
    39  namespace lang_proto {
    40  namespace {
    41  
    42  using ::google::protobuf::Descriptor;
    43  using ::google::protobuf::DescriptorProto;
    44  using ::google::protobuf::EnumDescriptor;
    45  using ::google::protobuf::EnumDescriptorProto;
    46  using ::google::protobuf::EnumValueDescriptor;
    47  using ::google::protobuf::EnumValueDescriptorProto;
    48  using ::google::protobuf::FieldDescriptor;
    49  using ::google::protobuf::FieldDescriptorProto;
    50  using ::google::protobuf::FileDescriptorProto;
    51  using ::google::protobuf::MethodDescriptor;
    52  using ::google::protobuf::MethodDescriptorProto;
    53  using ::google::protobuf::OneofDescriptor;
    54  using ::google::protobuf::ServiceDescriptor;
    55  using ::google::protobuf::ServiceDescriptorProto;
    56  using ::google::protobuf::SourceCodeInfo;
    57  using ::kythe::proto::VName;
    58  
    59  // Pushes a value onto a proto location lookup path, and automatically
    60  // removes it when destroyed.  See the documentation for
    61  // proto2.Descriptor.SourceCodeInfo.Location.path for more information
    62  // on how these paths work.
    63  class ScopedLookup {
    64   public:
    65    // Does not take ownership of lookup_path; caller must ensure it
    66    // stays around until this ScopedLookup is destroyed.
    67    explicit ScopedLookup(std::vector<int>* lookup_path, int component)
    68        : lookup_path_(lookup_path), component_(component) {
    69      lookup_path->push_back(component);
    70    }
    71    ~ScopedLookup() {
    72      CHECK(!lookup_path_->empty());
    73      CHECK_EQ(component_, lookup_path_->back());
    74      lookup_path_->pop_back();
    75    }
    76  
    77   private:
    78    std::vector<int>* lookup_path_;
    79    const int component_;
    80  };
    81  
    82  std::optional<absl::string_view> TypeName(const EnumDescriptor& desc) {
    83    return desc.name();
    84  }
    85  
    86  std::optional<absl::string_view> TypeName(const Descriptor& desc) {
    87    return desc.name();
    88  }
    89  
    90  std::optional<absl::string_view> TypeName(const FieldDescriptor& field) {
    91    if (field.is_map()) {
    92      return std::nullopt;
    93    }
    94    if (const EnumDescriptor* desc = field.enum_type()) {
    95      return TypeName(*desc);
    96    }
    97    if (const Descriptor* desc = field.message_type()) {
    98      return TypeName(*desc);
    99    }
   100    return std::nullopt;
   101  }
   102  
   103  template <typename DescriptorType>
   104  void TruncateLocationToTypeName(Location& location,
   105                                  const DescriptorType& desc) {
   106    std::optional<absl::string_view> type_name = TypeName(desc);
   107    if (!type_name.has_value() || location.end <= location.begin ||
   108        (location.end - location.begin) <= type_name->size()) {
   109      return;
   110    }
   111    location.begin = (location.end - type_name->size());
   112  }
   113  
   114  }  // namespace
   115  
   116  int FileDescriptorWalker::ComputeByteOffset(int line_number,
   117                                              int column_number) const {
   118    int byte_offset_of_start_of_line =
   119        line_index_.ComputeByteOffset(line_number, 0);
   120    absl::string_view line_text = line_index_.GetLine(line_number);
   121    int byte_offset_into_line =
   122        ByteOffsetOfTabularColumn(line_text, column_number);
   123    if (byte_offset_into_line < 0) {
   124      return byte_offset_into_line;
   125    }
   126    return byte_offset_of_start_of_line + byte_offset_into_line;
   127  }
   128  
   129  Location FileDescriptorWalker::LocationOfLeadingComments(
   130      const Location& entity_location, int entity_start_line,
   131      int entity_start_column, const std::string& comments) const {
   132    int line_offset_of_entity = ByteOffsetOfTabularColumn(
   133        line_index_.GetLine(entity_start_line), entity_start_column);
   134    if (line_offset_of_entity < 0) {
   135      return entity_location;
   136    }
   137    Location comment_location;
   138    comment_location.file = entity_location.file;
   139    comment_location.begin = entity_location.begin - line_offset_of_entity;
   140    comment_location.end = entity_location.begin - line_offset_of_entity - 1;
   141    int next_line_number = entity_start_line - 1;
   142    absl::string_view bottom_line = line_index_.GetLine(next_line_number);
   143    while (RE2::FullMatch(bottom_line, R"((\s*\*/?\s*)|(\s*//\n))")) {
   144      comment_location.begin -= bottom_line.size();
   145      --next_line_number;
   146      bottom_line = line_index_.GetLine(next_line_number);
   147    }
   148    std::vector<std::string> comment_lines = absl::StrSplit(comments, '\n');
   149    while (!comment_lines.empty() && comment_lines.back().empty()) {
   150      comment_lines.pop_back();
   151    }
   152    while (!comment_lines.empty()) {
   153      const std::string& comment_line = comment_lines.back();
   154      absl::string_view actual_line = line_index_.GetLine(next_line_number);
   155      std::string comment_re =
   156          absl::StrCat(R"(\s*(?://|/?\*\s*))", RE2::QuoteMeta(comment_line),
   157                       R"(\s*(?:\*/)?\s*)");
   158      if (!RE2::FullMatch(actual_line, comment_re)) {
   159        LOG(ERROR) << "Leading comment line mismatch: [" << comment_line
   160                   << "] vs. [" << actual_line << "]"
   161                   << "(line " << next_line_number << ")";
   162        return comment_location;
   163      }
   164      comment_location.begin -= actual_line.size();
   165      --next_line_number;
   166      comment_lines.pop_back();
   167    }
   168    return comment_location;
   169  }
   170  
   171  Location FileDescriptorWalker::LocationOfTrailingComments(
   172      const Location& entity_location, int entity_start_line,
   173      int entity_start_column, const std::string& comments) const {
   174    Location comment_location;
   175    comment_location.file = entity_location.file;
   176    std::vector<std::string> comment_lines = absl::StrSplit(comments, '\n');
   177    while (!comment_lines.empty() && comment_lines.back().empty()) {
   178      comment_lines.pop_back();
   179    }
   180    if (comment_lines.empty()) {
   181      LOG(ERROR) << "Trailing comment listed as present but was empty.";
   182      return entity_location;
   183    }
   184    std::string top_comment_line_re = absl::StrCat(
   185        R"(\s*(?:/\*|//)\s*)", RE2::QuoteMeta(comment_lines.front()));
   186    int line_number = entity_start_line;
   187    for (; line_number <= line_index_.line_count(); ++line_number) {
   188      absl::string_view entity_line = line_index_.GetLine(line_number);
   189      absl::string_view comment_start;
   190      if (RE2::PartialMatch(entity_line, R"((\s*(?:/\*|//)))", &comment_start)) {
   191        comment_location.begin = line_index_.ComputeByteOffset(line_number, 0) +
   192                                 (comment_start.data() - entity_line.data());
   193        comment_location.end =
   194            line_index_.ComputeByteOffset(line_number + 1, 0) - 1;
   195        if (RE2::PartialMatch(entity_line, top_comment_line_re)) {
   196          comment_lines.erase(comment_lines.begin());
   197        }
   198        break;
   199      }
   200    }
   201    if (line_number > line_index_.line_count()) {
   202      LOG(ERROR) << "Never found trailing comment \"" << comments << "\"";
   203      return entity_location;
   204    }
   205    ++line_number;
   206    for (const std::string& comment_line : comment_lines) {
   207      absl::string_view actual_line = line_index_.GetLine(line_number);
   208      std::string comment_re =
   209          absl::StrCat(R"(\s*(?://|/?\*\s*))", RE2::QuoteMeta(comment_line),
   210                       R"(\s*(?:\*/)?\s*)");
   211      if (!RE2::FullMatch(actual_line, comment_re)) {
   212        LOG(ERROR) << "Trailing comment line mismatch: [" << comment_line
   213                   << "] vs. [" << actual_line << "]"
   214                   << "(line " << line_number << ")";
   215        return comment_location;
   216      }
   217      comment_location.end += actual_line.size();
   218      ++line_number;
   219    }
   220  
   221    absl::string_view bottom_line = line_index_.GetLine(line_number);
   222    while (RE2::FullMatch(bottom_line, R"(\s*\*/?\s*)")) {
   223      comment_location.end += bottom_line.size();
   224      ++line_number;
   225      bottom_line = line_index_.GetLine(line_number);
   226    }
   227    return comment_location;
   228  }
   229  
   230  absl::StatusOr<PartialLocation> FileDescriptorWalker::ParseLocation(
   231      const std::vector<int>& span) const {
   232    PartialLocation location;
   233    if (span.size() == 4) {
   234      location.start_line = span[0] + 1;
   235      location.end_line = span[2] + 1;
   236      location.start_column = span[1];
   237      location.end_column = span[3];
   238    } else if (span.size() == 3) {
   239      location.start_line = span[0] + 1;
   240      location.end_line = span[0] + 1;
   241      location.start_column = span[1];
   242      location.end_column = span[2];
   243    } else {
   244      return absl::UnknownError("");
   245    }
   246    return location;
   247  }
   248  
   249  void FileDescriptorWalker::InitializeLocation(const std::vector<int>& span,
   250                                                Location* loc) {
   251    loc->file = file_name_;
   252    absl::StatusOr<PartialLocation> possible_location = ParseLocation(span);
   253    if (possible_location.ok()) {
   254      PartialLocation partial_location = *possible_location;
   255      loc->begin = ComputeByteOffset(partial_location.start_line,
   256                                     partial_location.start_column);
   257      loc->end = ComputeByteOffset(partial_location.end_line,
   258                                   partial_location.end_column);
   259    } else {
   260      // Some error in the span, create a dummy location for now
   261      // Happens in case of proto1 files
   262      LOG(ERROR) << "Unexpected location vector [" << absl::StrJoin(span, ":")
   263                 << "] while walking " << file_name_.path();
   264      loc->begin = 0;
   265      loc->end = 0;
   266    }
   267  }
   268  
   269  void FileDescriptorWalker::BuildLocationMap(
   270      const SourceCodeInfo& source_code_info) {
   271    for (int i = 0; i < source_code_info.location_size(); i++) {
   272      const SourceCodeInfo::Location& location = source_code_info.location(i);
   273      std::vector<int> path(location.path().begin(), location.path().end());
   274      std::vector<int> span(location.span().begin(), location.span().end());
   275      location_map_[path] = span;
   276      path_location_map_[path] = location;
   277    }
   278  }
   279  
   280  void FileDescriptorWalker::VisitImports() {
   281    {
   282      // Direct dependencies, from `import "foo.proto"` statements.
   283      std::vector<int> path = {FileDescriptorProto::kDependencyFieldNumber};
   284      for (int i = 0; i < file_descriptor_->dependency_count(); i++) {
   285        ScopedLookup import_lookup(&path, i);
   286        Location location;
   287        InitializeLocation(location_map_[path], &location);
   288        builder_->AddImport(file_descriptor_->dependency(i)->name(), location);
   289      }
   290    }
   291    {
   292      // Weak dependencies, from `import weak "foo.proto"` statements.
   293      std::vector<int> path = {FileDescriptorProto::kWeakDependencyFieldNumber};
   294      for (int i = 0; i < file_descriptor_->weak_dependency_count(); i++) {
   295        ScopedLookup import_lookup(&path, i);
   296        Location location;
   297        InitializeLocation(location_map_[path], &location);
   298        builder_->AddImport(file_descriptor_->weak_dependency(i)->name(),
   299                            location);
   300      }
   301    }
   302    {
   303      // Public dependencies, from `import public "foo.proto"` statements
   304      std::vector<int> path = {FileDescriptorProto::kPublicDependencyFieldNumber};
   305      for (int i = 0; i < file_descriptor_->public_dependency_count(); i++) {
   306        ScopedLookup import_lookup(&path, i);
   307        Location location;
   308        InitializeLocation(location_map_[path], &location);
   309        builder_->AddImport(file_descriptor_->public_dependency(i)->name(),
   310                            location);
   311      }
   312    }
   313  }
   314  
   315  namespace {
   316  std::string SignAnnotation(
   317      const google::protobuf::GeneratedCodeInfo::Annotation& annotation) {
   318    return absl::StrJoin(annotation.path(), ".");
   319  }
   320  
   321  VName VNameForAnnotation(
   322      const VName& context_vname,
   323      const google::protobuf::GeneratedCodeInfo::Annotation& annotation) {
   324    VName out;
   325    out.set_corpus(context_vname.corpus());
   326    out.set_path(annotation.source_file());
   327    out.set_signature(SignAnnotation(annotation));
   328    out.set_language(kLanguageName);
   329    return out;
   330  }
   331  }  // anonymous namespace
   332  
   333  void FileDescriptorWalker::VisitGeneratedProtoInfo() {
   334    if (!file_descriptor_->options().HasExtension(proto::generated_proto_info)) {
   335      return;
   336    }
   337    const google::protobuf::GeneratedCodeInfo& info =
   338        file_descriptor_->options()
   339            .GetExtension(proto::generated_proto_info)
   340            .generated_code_info();
   341  
   342    std::vector<MetadataFile::Rule> rules;
   343    int file_rule = -1;
   344    for (const auto& annotation : info.annotation()) {
   345      MetadataFile::Rule rule{};
   346      rule.whole_file = false;
   347      rule.begin = annotation.begin();
   348      rule.end = annotation.end();
   349      rule.vname = VNameForAnnotation(file_name_, annotation);
   350      rule.edge_in = kythe::common::schema::kDefinesBinding;
   351      rule.edge_out = kythe::common::schema::kGenerates;
   352      rule.reverse_edge = true;
   353      rule.generate_anchor = false;
   354      rule.anchor_begin = 0;
   355      rule.anchor_end = 0;
   356      rules.push_back(rule);
   357      if (!rule.vname.path().empty()) {
   358        if (file_rule < 0 || rule.begin > rules[file_rule].begin) {
   359          file_rule = rules.size() - 1;
   360        }
   361      }
   362    }
   363  
   364    // Add a file-scoped rule for the last encountered vname.
   365    if (file_rule >= 0) {
   366      MetadataFile::Rule rule{};
   367      rule.whole_file = true;
   368      rule.vname = rules[file_rule].vname;
   369      rule.vname.set_signature("");
   370      rule.vname.set_language("");
   371      rule.edge_out = kythe::common::schema::kGenerates;
   372      rule.reverse_edge = true;
   373      rule.generate_anchor = false;
   374      rules.push_back(rule);
   375    }
   376  
   377    auto meta = MetadataFile::LoadFromRules(file_name_.path(), rules.begin(),
   378                                            rules.end());
   379    builder_->SetMetadata(std::move(meta));
   380    builder_->MaybeAddMetadataFileRules(file_name_);
   381  }
   382  
   383  namespace {
   384  std::optional<proto::VName> VNameForBuiltinType(FieldDescriptor::Type type) {
   385    // TODO(zrlk): Emit builtins.
   386    return std::nullopt;
   387  }
   388  }  // anonymous namespace
   389  
   390  std::optional<proto::VName> FileDescriptorWalker::VNameForFieldType(
   391      const FieldDescriptor* field_proto) {
   392    if (field_proto->is_map()) {
   393      // Maps are technically TYPE_MESSAGE, but don't have a useful VName.
   394      return std::nullopt;
   395    }
   396    if (field_proto->type() == FieldDescriptor::TYPE_MESSAGE ||
   397        field_proto->type() == FieldDescriptor::TYPE_GROUP) {
   398      return builder_->VNameForDescriptor(field_proto->message_type());
   399    } else if (field_proto->type() == FieldDescriptor::TYPE_ENUM) {
   400      return builder_->VNameForDescriptor(field_proto->enum_type());
   401    } else {
   402      return VNameForBuiltinType(field_proto->type());
   403    }
   404  }
   405  
   406  void FileDescriptorWalker::AttachMarkedSource(
   407      const proto::VName& vname, const std::optional<MarkedSource>& code) {
   408    if (code) {
   409      builder_->AddCodeFact(vname, *code);
   410    }
   411  }
   412  
   413  void FileDescriptorWalker::VisitField(const std::string* parent_name,
   414                                        const VName* parent,
   415                                        const std::string& message_name,
   416                                        const VName& message,
   417                                        const FieldDescriptor* field,
   418                                        std::vector<int> lookup_path) {
   419    std::string vname = absl::StrCat(message_name, ".", field->name());
   420    VName v_name = builder_->VNameForDescriptor(field);
   421    AddComments(v_name, lookup_path);
   422  
   423    {
   424      // Get location of declaration and add as Grok binding
   425      ScopedLookup name_num(&lookup_path, FieldDescriptorProto::kNameFieldNumber);
   426      const std::vector<int>& span = location_map_[lookup_path];
   427      Location location;
   428      InitializeLocation(span, &location);
   429  
   430      VName oneof;
   431      bool in_oneof = false;
   432      if (field->containing_oneof() != nullptr) {
   433        in_oneof = true;
   434        oneof = builder_->VNameForDescriptor(field->containing_oneof());
   435      }
   436  
   437      builder_->AddFieldToMessage(parent, message, in_oneof ? &oneof : nullptr,
   438                                  v_name, location);
   439    }
   440  
   441    AttachMarkedSource(v_name,
   442                       GenerateMarkedSourceForDescriptor(field, builder_));
   443  
   444    // Check for [deprecated=true] annotations and emit deprecation tags.
   445    if (field->options().deprecated()) {
   446      builder_->SetDeprecated(v_name);
   447    }
   448  
   449    Location type_location;
   450    {
   451      ScopedLookup type_num(&lookup_path,
   452                            FieldDescriptorProto::kTypeNameFieldNumber);
   453      if (location_map_.find(lookup_path) == location_map_.end()) {
   454        // the type was primitive, ignore for now
   455        return;
   456      }
   457      const std::vector<int>& type_span = location_map_[lookup_path];
   458      InitializeLocation(type_span, &type_location);
   459  
   460      // If we're in a message or enum type, decorate only the span
   461      // covering the type name itself, not the full package name.
   462      // This is consistent with other languages and avoids the possibility
   463      // of a multi-line span, which some UIs have problems with.
   464      TruncateLocationToTypeName(type_location, *field);
   465    }
   466    if (auto type = VNameForFieldType(field)) {
   467      // TODO: add value_type back in at some point.
   468      // Add reference for this field's type.  We assume it to be output
   469      // processing a dependency, but in the worst case this might introduce
   470      // an edge to no VName (presumably in turn introducing a Lost node).
   471      builder_->AddReference(*type, type_location);
   472      builder_->AddTyping(v_name, *type);
   473    }
   474  
   475    if (field->is_map()) {
   476      // Map key/value types do not have SourceCodeInfo locations; we have to
   477      // find them within the outer "map<...>" type location.
   478      absl::string_view content = absl::string_view(content_);
   479      absl::string_view type_name = content.substr(
   480          type_location.begin, type_location.end - type_location.begin);
   481      absl::string_view key, val;
   482      if (RE2::FullMatch(type_name, R"(\s*map\s*<\s*(\S+)\s*,\s*(\S+)\s*>\s*)",
   483                         &key, &val)) {
   484        // Add references to map type components.
   485        if (auto key_type = VNameForFieldType(field->message_type()->field(0))) {
   486          size_t key_start = key.data() - content.data();
   487          builder_->AddReference(
   488              *key_type, {type_location.file, key_start, key_start + key.size()});
   489        }
   490  
   491        if (auto val_type = VNameForFieldType(field->message_type()->field(1))) {
   492          size_t val_start = val.data() - content.data();
   493          builder_->AddReference(
   494              *val_type, {type_location.file, val_start, val_start + val.size()});
   495        }
   496        // TODO(schroederc): emit map type node
   497      }
   498    }
   499  
   500    if (field->has_default_value()) {
   501      const EnumValueDescriptor* default_value = field->default_value_enum();
   502      VName value = builder_->VNameForDescriptor(default_value);
   503      // Find reference location
   504      ScopedLookup default_num(&lookup_path,
   505                               FieldDescriptorProto::kDefaultValueFieldNumber);
   506  
   507      const std::vector<int>& value_span = location_map_[lookup_path];
   508      Location value_location;
   509      InitializeLocation(value_span, &value_location);
   510      builder_->AddReference(value, value_location);
   511    }
   512  }
   513  
   514  void FileDescriptorWalker::VisitFields(const std::string& message_name,
   515                                         const Descriptor* dp,
   516                                         std::vector<int> lookup_path) {
   517    VName message = VNameForProtoPath(file_name_, lookup_path);
   518    if (visited_messages_.find(URI(message).ToString()) !=
   519        visited_messages_.end()) {
   520      return;
   521    }
   522    visited_messages_.insert(URI(message).ToString());
   523    {
   524      ScopedLookup field_num(&lookup_path, DescriptorProto::kFieldFieldNumber);
   525      for (int i = 0; i < dp->field_count(); i++) {
   526        ScopedLookup field_index(&lookup_path, i);
   527  
   528        VisitField(&message_name, &message, message_name, message, dp->field(i),
   529                   lookup_path);
   530      }
   531    }
   532    {
   533      ScopedLookup extension_num(&lookup_path,
   534                                 DescriptorProto::kExtensionFieldNumber);
   535      for (int i = 0; i < dp->extension_count(); i++) {
   536        ScopedLookup extension_index(&lookup_path, i);
   537        VisitExtension(&message_name, &message, dp->extension(i), lookup_path);
   538      }
   539    }
   540  }
   541  
   542  void FileDescriptorWalker::VisitNestedEnumTypes(const std::string& message_name,
   543                                                  const VName* message,
   544                                                  const Descriptor* dp,
   545                                                  std::vector<int> lookup_path) {
   546    ScopedLookup enum_num(&lookup_path, DescriptorProto::kEnumTypeFieldNumber);
   547    for (int i = 0; i < dp->enum_type_count(); i++) {
   548      const EnumDescriptor* nested_proto = dp->enum_type(i);
   549  
   550      // Get the path that corresponds to the name of the enum
   551      ScopedLookup enum_index(&lookup_path, i);
   552  
   553      std::string vname = absl::StrCat(message_name, ".", nested_proto->name());
   554  
   555      VName v_name = builder_->VNameForDescriptor(nested_proto);
   556      AddComments(v_name, lookup_path);
   557  
   558      {
   559        ScopedLookup name_num(&lookup_path,
   560                              EnumDescriptorProto::kNameFieldNumber);
   561        const std::vector<int>& span = location_map_[lookup_path];
   562        Location location;
   563        InitializeLocation(span, &location);
   564  
   565        builder_->AddEnumType(message, v_name, location);
   566        if (nested_proto->options().deprecated()) {
   567          builder_->SetDeprecated(v_name);
   568        }
   569        AttachMarkedSource(
   570            v_name, GenerateMarkedSourceForDescriptor(nested_proto, builder_));
   571      }
   572  
   573      // Visit values
   574      VisitEnumValues(nested_proto, &v_name, lookup_path);
   575    }
   576  }
   577  
   578  void FileDescriptorWalker::VisitNestedTypes(const std::string& message_name,
   579                                              const VName* message,
   580                                              const Descriptor* dp,
   581                                              std::vector<int> lookup_path) {
   582    ScopedLookup nested_type_num(&lookup_path,
   583                                 DescriptorProto::kNestedTypeFieldNumber);
   584  
   585    for (int i = 0; i < dp->nested_type_count(); i++) {
   586      ScopedLookup nested_index(&lookup_path, i);
   587      const Descriptor* nested_proto = dp->nested_type(i);
   588  
   589      // The proto compiler synthesizes types to represent map entries. For
   590      // example, a "map<string, string> my_map" field would cause a type
   591      // "MyMapEntry" to be generated. Because it doesn't actually exist in the
   592      // source .proto file, we ignore it.
   593      if (nested_proto->options().map_entry()) {
   594        continue;
   595      }
   596  
   597      std::string vname = absl::StrCat(message_name, ".", nested_proto->name());
   598  
   599      VName v_name = VNameForProtoPath(file_name_, lookup_path);
   600      AddComments(v_name, lookup_path);
   601  
   602      {
   603        // Also push kNameFieldNumber for location of declaration
   604        ScopedLookup name_num(&lookup_path, DescriptorProto::kNameFieldNumber);
   605  
   606        const std::vector<int>& span = location_map_[lookup_path];
   607        Location location;
   608        InitializeLocation(span, &location);
   609  
   610        builder_->AddMessageType(message, v_name, location);
   611        if (nested_proto->options().deprecated()) {
   612          builder_->SetDeprecated(v_name);
   613        }
   614        AttachMarkedSource(
   615            v_name, GenerateMarkedSourceForDescriptor(nested_proto, builder_));
   616      }
   617  
   618      // Need to visit nested enum and message types first!
   619      VisitNestedTypes(vname, &v_name, nested_proto, lookup_path);
   620      VisitNestedEnumTypes(vname, &v_name, nested_proto, lookup_path);
   621      VisitOneofs(vname, v_name, nested_proto, lookup_path);
   622    }
   623  }
   624  
   625  void FileDescriptorWalker::VisitOneofs(const std::string& message_name,
   626                                         const VName& message,
   627                                         const Descriptor* dp,
   628                                         std::vector<int> lookup_path) {
   629    ScopedLookup nested_type_num(&lookup_path,
   630                                 DescriptorProto::kOneofDeclFieldNumber);
   631  
   632    for (int i = 0; i < dp->oneof_decl_count(); i++) {
   633      ScopedLookup nested_index(&lookup_path, i);
   634      const OneofDescriptor* oneof = dp->oneof_decl(i);
   635      std::string vname = absl::StrCat(message_name, ".", oneof->name());
   636  
   637      VName v_name = builder_->VNameForDescriptor(oneof);
   638      AddComments(v_name, lookup_path);
   639  
   640      {
   641        // TODO: verify that this is correct for oneofs
   642        ScopedLookup name_num(&lookup_path, DescriptorProto::kNameFieldNumber);
   643  
   644        const std::vector<int>& span = location_map_[lookup_path];
   645        Location location;
   646        InitializeLocation(span, &location);
   647  
   648        builder_->AddOneofToMessage(message, v_name, location);
   649        AttachMarkedSource(v_name,
   650                           GenerateMarkedSourceForDescriptor(oneof, builder_));
   651      }
   652  
   653      // No need to add fields; they're also fields of the message
   654    }
   655  }
   656  
   657  void FileDescriptorWalker::VisitMessagesAndEnums(const std::string* ns_name,
   658                                                   const VName* ns) {
   659    std::vector<int> lookup_path;
   660    for (int i = 0; i < file_descriptor_->message_type_count(); i++) {
   661      ScopedLookup message_num(&lookup_path,
   662                               FileDescriptorProto::kMessageTypeFieldNumber);
   663  
   664      const Descriptor* dp = file_descriptor_->message_type(i);
   665  
   666      ScopedLookup message_index(&lookup_path, i);
   667  
   668      std::string vname = dp->name();
   669      if (ns_name != nullptr) {
   670        vname = absl::StrCat(*ns_name, ".", vname);
   671      }
   672  
   673      VName v_name = VNameForProtoPath(file_name_, lookup_path);
   674      AddComments(v_name, lookup_path);
   675  
   676      {
   677        ScopedLookup name_num(&lookup_path, DescriptorProto::kNameFieldNumber);
   678        const std::vector<int>& span = location_map_[lookup_path];
   679        Location location;
   680        InitializeLocation(span, &location);
   681  
   682        builder_->AddMessageType(ns, v_name, location);
   683        AttachMarkedSource(v_name,
   684                           GenerateMarkedSourceForDescriptor(dp, builder_));
   685        if (dp->options().deprecated()) {
   686          builder_->SetDeprecated(v_name);
   687        }
   688      }
   689  
   690      // Visit nested types first and fields later for easy type resolution
   691      VisitNestedTypes(vname, &v_name, dp, lookup_path);
   692      VisitNestedEnumTypes(vname, &v_name, dp, lookup_path);
   693      VisitOneofs(vname, v_name, dp, lookup_path);
   694    }
   695  
   696    // Add top-level ENUM bindings
   697    for (int i = 0; i < file_descriptor_->enum_type_count(); i++) {
   698      ScopedLookup enum_num(&lookup_path,
   699                            FileDescriptorProto::kEnumTypeFieldNumber);
   700      const EnumDescriptor* dp = file_descriptor_->enum_type(i);
   701      ScopedLookup enum_index(&lookup_path, i);
   702  
   703      std::string vname = dp->name();
   704      if (ns_name != nullptr) {
   705        vname = absl::StrCat(*ns_name, ".", vname);
   706      }
   707      VName v_name = builder_->VNameForDescriptor(dp);
   708      AddComments(v_name, lookup_path);
   709  
   710      {
   711        ScopedLookup name_num(&lookup_path,
   712                              EnumDescriptorProto::kNameFieldNumber);
   713        const std::vector<int>& span = location_map_[lookup_path];
   714        Location location;
   715        InitializeLocation(span, &location);
   716  
   717        builder_->AddEnumType(ns, v_name, location);
   718        AttachMarkedSource(v_name,
   719                           GenerateMarkedSourceForDescriptor(dp, builder_));
   720      }
   721  
   722      // Visit enum values and add kythe bindings for them
   723      VisitEnumValues(dp, &v_name, lookup_path);
   724    }
   725  }
   726  
   727  void FileDescriptorWalker::VisitEnumValues(const EnumDescriptor* dp,
   728                                             const VName* enum_node,
   729                                             std::vector<int> lookup_path) {
   730    ScopedLookup value_num(&lookup_path, EnumDescriptorProto::kValueFieldNumber);
   731  
   732    for (int j = 0; j < dp->value_count(); j++) {
   733      const EnumValueDescriptor* val_dp = dp->value(j);
   734  
   735      ScopedLookup value_index(&lookup_path, j);
   736      VName v_name = builder_->VNameForDescriptor(val_dp);
   737      AddComments(v_name, lookup_path);
   738  
   739      ScopedLookup name_num(&lookup_path,
   740                            EnumValueDescriptorProto::kNameFieldNumber);
   741      Location value_location;
   742      InitializeLocation(location_map_[lookup_path], &value_location);
   743  
   744      builder_->AddValueToEnum(*enum_node, v_name, value_location);
   745      if (val_dp->options().deprecated()) {
   746        builder_->SetDeprecated(v_name);
   747      }
   748      AttachMarkedSource(v_name,
   749                         GenerateMarkedSourceForDescriptor(val_dp, builder_));
   750    }
   751  }
   752  
   753  void FileDescriptorWalker::VisitAllFields(const std::string* ns_name,
   754                                            const VName* ns) {
   755    std::vector<int> lookup_path;
   756    {
   757      ScopedLookup message_num(&lookup_path,
   758                               FileDescriptorProto::kMessageTypeFieldNumber);
   759  
   760      // For each top-level message in the file, add the field bindings
   761      for (int i = 0; i < file_descriptor_->message_type_count(); i++) {
   762        const Descriptor* dp = file_descriptor_->message_type(i);
   763        std::string vname = dp->name();
   764        if (ns_name != nullptr) {
   765          vname = *ns_name + "." + vname;
   766        }
   767  
   768        ScopedLookup message_index(&lookup_path, i);
   769  
   770        // Visit fields within the message
   771        VisitFields(vname, dp, lookup_path);
   772  
   773        // Visit fields in nested mesages
   774        VisitNestedFields(vname, dp, lookup_path);
   775      }
   776    }
   777  
   778    {
   779      ScopedLookup extension_num(&lookup_path,
   780                                 FileDescriptorProto::kExtensionFieldNumber);
   781  
   782      // For each top-level extension in the file, add the field bindings
   783      for (int i = 0; i < file_descriptor_->extension_count(); i++) {
   784        ScopedLookup extension_index(&lookup_path, i);
   785        VisitExtension(ns_name, ns, file_descriptor_->extension(i), lookup_path);
   786      }
   787    }
   788  }
   789  
   790  void FileDescriptorWalker::VisitExtension(const std::string* parent_name,
   791                                            const VName* parent,
   792                                            const FieldDescriptor* field,
   793                                            std::vector<int> lookup_path) {
   794    std::string message_name = field->containing_type()->full_name();
   795    VName message = builder_->VNameForDescriptor(field->containing_type());
   796    {
   797      // In a block like this:
   798      // extend A {
   799      //    optional string b = 1;
   800      //    optional string c = 2;
   801      // }
   802      //
   803      // Link the name of the extended message "A" to the original
   804      // definition.  Each of "b" and "c" will generate this reference
   805      // which can result in duplicate references if more than one
   806      // field is declared in a single extend block.
   807      ScopedLookup extendee_num(&lookup_path,
   808                                FieldDescriptorProto::kExtendeeFieldNumber);
   809      const std::vector<int>& extendee_span = location_map_[lookup_path];
   810      Location extendee_location;
   811      InitializeLocation(extendee_span, &extendee_location);
   812      builder_->AddReference(message, extendee_location);
   813    }
   814  
   815    VisitField(parent_name, parent, message_name, message, field, lookup_path);
   816  }
   817  
   818  void FileDescriptorWalker::VisitNestedFields(const std::string& name_prefix,
   819                                               const Descriptor* dp,
   820                                               std::vector<int> lookup_path) {
   821    ScopedLookup nested_num(&lookup_path,
   822                            DescriptorProto::kNestedTypeFieldNumber);
   823  
   824    for (int j = 0; j < dp->nested_type_count(); j++) {
   825      const Descriptor* nested_dp = dp->nested_type(j);
   826      const std::string nested_name_prefix =
   827          absl::StrCat(name_prefix, ".", nested_dp->name());
   828  
   829      // The proto compiler synthesizes types to represent map entries. For
   830      // example, a "map<string, string> my_map" field would cause a type
   831      // "MyMapEntry" to be generated. Because it doesn't actually exist in the
   832      // source .proto file, we ignore it.
   833      if (nested_dp->options().map_entry()) {
   834        continue;
   835      }
   836  
   837      ScopedLookup nested_index(&lookup_path, j);
   838  
   839      // Visit fields within the message
   840      VisitFields(nested_name_prefix, nested_dp, lookup_path);
   841  
   842      // Visit fields in nested mesages
   843      VisitNestedFields(nested_name_prefix, nested_dp, lookup_path);
   844    }
   845  }
   846  
   847  void FileDescriptorWalker::AddComments(const VName& v_name,
   848                                         const std::vector<int>& path) {
   849    auto protoc_iter = path_location_map_.find(path);
   850    if (protoc_iter == path_location_map_.end()) {
   851      return;
   852    }
   853    const auto& protoc_location = protoc_iter->second;
   854    absl::StatusOr<PartialLocation> readable_location =
   855        ParseLocation(location_map_[path]);
   856    if (!readable_location.ok()) {
   857      return;
   858    }
   859    Location entity_location;
   860    InitializeLocation(location_map_[path], &entity_location);
   861    PartialLocation coordinates = *readable_location;
   862    if (protoc_location.has_leading_comments()) {
   863      Location comment_location = LocationOfLeadingComments(
   864          entity_location, coordinates.start_line, coordinates.start_column,
   865          protoc_location.leading_comments());
   866      builder_->AddDocComment(v_name, comment_location);
   867    }
   868    if (protoc_location.has_trailing_comments()) {
   869      Location comment_location = LocationOfTrailingComments(
   870          entity_location, coordinates.start_line, coordinates.start_column,
   871          protoc_location.trailing_comments());
   872      builder_->AddDocComment(v_name, comment_location);
   873    }
   874  }
   875  
   876  void FileDescriptorWalker::VisitRpcServices(const std::string* ns_name,
   877                                              const VName* ns) {
   878    std::vector<int> lookup_path;
   879    ScopedLookup service_num(&lookup_path,
   880                             FileDescriptorProto::kServiceFieldNumber);
   881    for (int i = 0; i < file_descriptor_->service_count(); i++) {
   882      const ServiceDescriptor* dp = file_descriptor_->service(i);
   883      ScopedLookup service_index(&lookup_path, i);
   884  
   885      std::string service_vname = dp->name();
   886      if (ns_name != nullptr) {
   887        service_vname = absl::StrCat(*ns_name, ".", service_vname);
   888      }
   889      VName v_name = builder_->VNameForDescriptor(dp);
   890      AddComments(v_name, lookup_path);
   891  
   892      {
   893        ScopedLookup name_num(&lookup_path,
   894                              ServiceDescriptorProto::kNameFieldNumber);
   895        const std::vector<int>& span = location_map_[lookup_path];
   896        Location location;
   897        InitializeLocation(span, &location);
   898  
   899        builder_->AddService(ns, v_name, location);
   900        AttachMarkedSource(v_name,
   901                           GenerateMarkedSourceForDescriptor(dp, builder_));
   902      }
   903  
   904      // Visit methods
   905      ScopedLookup method_num(&lookup_path,
   906                              ServiceDescriptorProto::kMethodFieldNumber);
   907      for (int j = 0; j < dp->method_count(); j++) {
   908        const MethodDescriptor* method_dp = dp->method(j);
   909        ScopedLookup method_index(&lookup_path, j);
   910        std::string method_vname =
   911            absl::StrCat(service_vname, ".", method_dp->name());
   912        VName method = builder_->VNameForDescriptor(method_dp);
   913        AddComments(method, lookup_path);
   914  
   915        {
   916          // Add method as a declaration
   917          ScopedLookup name_num(&lookup_path,
   918                                MethodDescriptorProto::kNameFieldNumber);
   919          Location method_location;
   920          InitializeLocation(location_map_[lookup_path], &method_location);
   921          AttachMarkedSource(
   922              method, GenerateMarkedSourceForDescriptor(method_dp, builder_));
   923          builder_->AddMethodToService(v_name, method, method_location);
   924        }
   925  
   926        VName input_sig;
   927        {
   928          // Add rpc method's input argument
   929          ScopedLookup input_num(&lookup_path,
   930                                 MethodDescriptorProto::kInputTypeFieldNumber);
   931          Location input_location;
   932          InitializeLocation(location_map_[lookup_path], &input_location);
   933          const Descriptor* input = method_dp->input_type();
   934          // Only decorate the type name, not the full <package>.<type> span.
   935          TruncateLocationToTypeName(input_location, *input);
   936  
   937          input_sig = builder_->VNameForDescriptor(input);
   938          builder_->AddArgumentToMethod(method, input_sig, input_location);
   939        }
   940  
   941        VName output_sig;
   942        {
   943          // Add rpc method's output argument
   944          ScopedLookup output_num(&lookup_path,
   945                                  MethodDescriptorProto::kOutputTypeFieldNumber);
   946          Location output_location;
   947          InitializeLocation(location_map_[lookup_path], &output_location);
   948          const Descriptor* output = method_dp->output_type();
   949          // Only decorate the type name, not the full <package>.<type> span.
   950          TruncateLocationToTypeName(output_location, *output);
   951  
   952          output_sig = builder_->VNameForDescriptor(output);
   953          builder_->AddArgumentToMethod(method, output_sig, output_location);
   954        }
   955        builder_->AddMethodType(method, input_sig, output_sig);
   956      }
   957    }
   958  }
   959  
   960  void FileDescriptorWalker::PopulateCodeGraph() {
   961    BuildLocationMap(*source_code_info_);
   962    VisitGeneratedProtoInfo();
   963    VisitImports();
   964  
   965    const VName* ns = nullptr;
   966    const std::string* ns_name = nullptr;
   967    VName v_name;
   968    const std::string& package = file_descriptor_->package();
   969    if (!package.empty()) {
   970      std::vector<int> lookup_path;
   971      ScopedLookup package_num(&lookup_path,
   972                               FileDescriptorProto::kPackageFieldNumber);
   973      const std::vector<int>& span = location_map_[lookup_path];
   974      Location location;
   975      InitializeLocation(span, &location);
   976      v_name.set_language(kLanguageName);
   977      v_name.set_corpus(file_name_.corpus());
   978      v_name.set_signature(package);
   979      builder_->AddNamespace(v_name, location);
   980      ns = &v_name;
   981      ns_name = &package;
   982    }
   983  
   984    VisitMessagesAndEnums(ns_name, ns);
   985    VisitAllFields(ns_name, ns);
   986    VisitRpcServices(ns_name, ns);
   987  }
   988  
   989  }  // namespace lang_proto
   990  }  // namespace kythe