kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/indexer/proto/file_descriptor_walker.h (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #ifndef KYTHE_CXX_INDEXER_PROTO_FILE_DESCRIPTOR_WALKER_H_
    18  #define KYTHE_CXX_INDEXER_PROTO_FILE_DESCRIPTOR_WALKER_H_
    19  
    20  #include <map>
    21  #include <memory>
    22  #include <optional>
    23  #include <set>
    24  #include <string>
    25  #include <vector>
    26  
    27  #include "absl/log/log.h"
    28  #include "absl/status/statusor.h"
    29  #include "absl/strings/string_view.h"
    30  #include "kythe/cxx/common/file_vname_generator.h"
    31  #include "kythe/cxx/common/indexing/KytheOutputStream.h"
    32  #include "kythe/cxx/common/kythe_uri.h"
    33  #include "kythe/cxx/common/utf8_line_index.h"
    34  #include "kythe/cxx/indexer/proto/proto_analyzer.h"
    35  #include "kythe/cxx/indexer/proto/proto_graph_builder.h"
    36  #include "kythe/proto/common.pb.h"
    37  #include "kythe/proto/storage.pb.h"
    38  #include "kythe/proto/xref.pb.h"
    39  
    40  namespace proto2 {
    41  class Descriptor;
    42  class DescriptorPool;
    43  class EnumDescriptor;
    44  class FileDescriptor;
    45  }  // namespace proto2
    46  
    47  namespace kythe {
    48  namespace lang_proto {
    49  
    50  // A human-readable mediator between 3/4 element "span" vectors and the proto
    51  // compiler's SourceLocations (which contain extra info we don't always want
    52  // to pass around).
    53  //
    54  // Line numbers start at 1, but column numbers start at 0. Column numbers
    55  // correspond with byte offsets into the file except in the case of tabs,
    56  // which advance the column number to the next multiple of 8.
    57  struct PartialLocation {
    58    int start_line;
    59    int end_line;
    60    int start_column;
    61    int end_column;
    62  };
    63  
    64  // Class for walking a file descriptor and its messages, enums, etc.
    65  // Mainly just a place to keep track of state between related methods.
    66  class FileDescriptorWalker {
    67   public:
    68    FileDescriptorWalker(const google::protobuf::FileDescriptor* file_descriptor,
    69                         const google::protobuf::SourceCodeInfo& source_code_info,
    70                         const proto::VName& file_name,
    71                         const std::string& content, ProtoGraphBuilder* builder,
    72                         ProtoAnalyzer* analyzer)
    73        : file_descriptor_(file_descriptor),
    74          source_code_info_(&source_code_info),
    75          file_name_(file_name),
    76          content_(content),
    77          line_index_(kythe::UTF8LineIndex(content_)),
    78          builder_(builder),
    79          uri_(file_name_) {}
    80  
    81    // disallow copy and assign
    82    FileDescriptorWalker(const FileDescriptorWalker&) = delete;
    83    void operator=(const FileDescriptorWalker&) = delete;
    84  
    85    // Takes in a span -- as defined by SourceCodeInfo.Location.span -- and
    86    // converts it into a Location.
    87    void InitializeLocation(const std::vector<int>& span, Location* loc);
    88  
    89    // Adds path and span from source_code_info to location_map_ as key and value
    90    // respectively.
    91    void BuildLocationMap(
    92        const google::protobuf::SourceCodeInfo& source_code_info);
    93  
    94    // Walks through all of the imports in the descriptor and adds them to the
    95    // graph. Imports includes all of dependencies, weak dependencies and public
    96    // dependencies.
    97    void VisitImports();
    98  
    99    // Walks through the fields and declared extensions of the input
   100    // DescriptorProto and adds Kythe childof edges. Also looks for the type name.
   101    // of the field and adds a Kythe ref edge if the type name can be resolved.
   102    // For example, consider the field:
   103    // Foo bar = 2;
   104    // ^   ^
   105    // We look for the location of typename (Foo) and save that in Kythe as
   106    // reference location. We look for the location of the name (bar) and save in
   107    // Kythe as a declaration.
   108    // `lookup_path` is expected to point to the parent message (all of it).
   109    void VisitFields(const std::string& message_name,
   110                     const google::protobuf::Descriptor* dp,
   111                     std::vector<int> lookup_path);
   112  
   113    // Processes the declaration of an individual field.
   114    // `parent_name`/`parent` refer to the context this field is declared in
   115    // (null for top-level extensions in a package-less file).
   116    // `message_name`/`message` refer to the message this ticket is a part of.
   117    // These only differ when processing extensions.
   118    // `lookup_path` is expected to point to the FieldDescriptorProto being
   119    // processed.
   120    void VisitField(const std::string* parent_name, const proto::VName* parent,
   121                    const std::string& message_name, const proto::VName& message,
   122                    const google::protobuf::FieldDescriptor* field,
   123                    std::vector<int> lookup_path);
   124  
   125    // Processes the declaration of an extended field, and adds a reference
   126    // to the message being extended (in the "extend X {" line).
   127    // `parent_name`/`parent` refers to the context this field is declared in
   128    // (null for top-level extensions in a package-less file).
   129    // `lookup_path` is expected to point to the FieldDescriptorProto of the
   130    // extension being processed.
   131    void VisitExtension(const std::string* parent_name,
   132                        const proto::VName* parent,
   133                        const google::protobuf::FieldDescriptor* field,
   134                        std::vector<int> lookup_path);
   135  
   136    // Visits all the nested message types in the given DescriptorProto.
   137    // The nested messages are added to the codegraph.
   138    // `lookup_path` is used to fetch the location of declaration.
   139    void VisitNestedEnumTypes(const std::string& message_name,
   140                              const proto::VName* message,
   141                              const google::protobuf::Descriptor* dp,
   142                              std::vector<int> lookup_path);
   143  
   144    // Visits all the nested message types in the given DescriptorProto.
   145    // The nested messages are added to the codegraph.
   146    // `lookup_path` must point to the given DescriptorProto.
   147    // The lookup path is used to fetch the location of declaration.
   148    void VisitNestedTypes(const std::string& message_name,
   149                          const proto::VName* message,
   150                          const google::protobuf::Descriptor* dp,
   151                          std::vector<int> lookup_path);
   152  
   153    // Visits all the oneofs within a message and adds them to the codegraph.
   154    // `lookup_path` must point to the given DescriptorProto.
   155    // The lookup path is used to fetch the location of declaration; although we
   156    // modify the lookup path, it is left in its original state after we return.
   157    void VisitOneofs(const std::string& message_name, const proto::VName& message,
   158                     const google::protobuf::Descriptor* dp,
   159                     std::vector<int> lookup_path);
   160  
   161    // Visits all the messages and enums within a namespace. All messages and
   162    // enums, along with their associated fields, oneofs, and values, are added
   163    // to the graph.
   164    void VisitMessagesAndEnums(const std::string* ns_name,
   165                               const proto::VName* ns);
   166  
   167    // Visit all values in a given enum (either top-level or nested) and add
   168    // Kythe nodes and edges.
   169    // `lookup_path` must point to the enum.
   170    void VisitEnumValues(const google::protobuf::EnumDescriptor* dp,
   171                         const proto::VName* e, std::vector<int> lookup_path);
   172  
   173    // Method to add declarations and references for all fields.
   174    // We do this after all messages and enums (both top-level and nested)
   175    // are added to Kythe.
   176    void VisitAllFields(const std::string* ns_name, const proto::VName* ns);
   177  
   178    // Visit stubby services and input/output methods.
   179    void VisitRpcServices(const std::string* ns_name, const proto::VName* ns);
   180  
   181    // This function invokes all the Visit* functions and also adds the
   182    // namespace as a Kythe binding.
   183    void PopulateCodeGraph();
   184  
   185   private:
   186    // Converts from a proto line/column (both 0 based, and where column counts
   187    // bytes except that tabs move to the next multiple of 8) to a byte offset
   188    // from the start of the current file.  Returns -1 on error.
   189    int ComputeByteOffset(int line_number, int column_number) const;
   190  
   191    // Computes the bytes prior to the start of the element starting on
   192    // `entity_start_line` at `entity_start_column` that make up `comment`.
   193    Location LocationOfLeadingComments(const Location& entity_location,
   194                                       int entity_start_line,
   195                                       int entity_start_column,
   196                                       const std::string& comment) const;
   197  
   198    // Compute the bytes following to the end of the element starting on
   199    // `entity_start_line` at `entity_start_column` that make up `comment`.
   200    Location LocationOfTrailingComments(const Location& entity_location,
   201                                        int entity_start_line,
   202                                        int entity_start_column,
   203                                        const std::string& comment) const;
   204  
   205    // Parses a location span vector (three or four integers that protoc uses to
   206    // represent a location in a file) and return a sensible PartialLocation or
   207    // Status::INVALID_ARGUMENT if the vector cannot be properly interpreted.
   208    absl::StatusOr<PartialLocation> ParseLocation(
   209        const std::vector<int>& span) const;
   210  
   211    std::optional<proto::VName> VNameForFieldType(
   212        const google::protobuf::FieldDescriptor* field);
   213  
   214    /// \brief Attach marked source (if not None) to `vname`.
   215    void AttachMarkedSource(const proto::VName& vname,
   216                            const std::optional<MarkedSource>& code);
   217  
   218    const google::protobuf::FileDescriptor* file_descriptor_;
   219    const google::protobuf::SourceCodeInfo* source_code_info_;
   220    const proto::VName file_name_;
   221    const std::string content_;
   222    const kythe::UTF8LineIndex line_index_;
   223    ProtoGraphBuilder* builder_;
   224    URI uri_;
   225    std::map<std::vector<int>, std::vector<int> > location_map_;
   226    std::map<std::vector<int>, google::protobuf::SourceCodeInfo::Location>
   227        path_location_map_;
   228  
   229    // Set of messages for which their fields are already visited.
   230    // There are two functions from which 'VisitFields' gets called;
   231    // 'VisitAllFields' and 'VisitNestedTypes'. This causes analyzer to create
   232    // duplicate entries for some nodes. This set helps us avoid processing
   233    // fields more than once.
   234    std::set<std::string> visited_messages_;
   235  
   236    // Adds leading and trailing comments for the element specified by ticket and
   237    // path. `v_name` is the name of the element in question; `path` is used
   238    // to look up the SourceCodeInfo::Location and the retrieve comment locations.
   239    void AddComments(const proto::VName& v_name, const std::vector<int>& path);
   240  
   241    // This recursively visits nested fields for VisitAllFields, with the current
   242    // parent scope specified by name_prefix, message-descriptor 'dp' and
   243    // lookup_path for source information lookup.
   244    void VisitNestedFields(const std::string& name_prefix,
   245                           const google::protobuf::Descriptor* dp,
   246                           std::vector<int> lookup_path);
   247  
   248    // Checks for generated proto info
   249    void VisitGeneratedProtoInfo();
   250  };
   251  
   252  }  // namespace lang_proto
   253  }  // namespace kythe
   254  
   255  #endif  // KYTHE_CXX_INDEXER_PROTO_FILE_DESCRIPTOR_WALKER_H_