kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/indexer/proto/file_descriptor_walker.h (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef KYTHE_CXX_INDEXER_PROTO_FILE_DESCRIPTOR_WALKER_H_ 18 #define KYTHE_CXX_INDEXER_PROTO_FILE_DESCRIPTOR_WALKER_H_ 19 20 #include <map> 21 #include <memory> 22 #include <optional> 23 #include <set> 24 #include <string> 25 #include <vector> 26 27 #include "absl/log/log.h" 28 #include "absl/status/statusor.h" 29 #include "absl/strings/string_view.h" 30 #include "kythe/cxx/common/file_vname_generator.h" 31 #include "kythe/cxx/common/indexing/KytheOutputStream.h" 32 #include "kythe/cxx/common/kythe_uri.h" 33 #include "kythe/cxx/common/utf8_line_index.h" 34 #include "kythe/cxx/indexer/proto/proto_analyzer.h" 35 #include "kythe/cxx/indexer/proto/proto_graph_builder.h" 36 #include "kythe/proto/common.pb.h" 37 #include "kythe/proto/storage.pb.h" 38 #include "kythe/proto/xref.pb.h" 39 40 namespace proto2 { 41 class Descriptor; 42 class DescriptorPool; 43 class EnumDescriptor; 44 class FileDescriptor; 45 } // namespace proto2 46 47 namespace kythe { 48 namespace lang_proto { 49 50 // A human-readable mediator between 3/4 element "span" vectors and the proto 51 // compiler's SourceLocations (which contain extra info we don't always want 52 // to pass around). 53 // 54 // Line numbers start at 1, but column numbers start at 0. Column numbers 55 // correspond with byte offsets into the file except in the case of tabs, 56 // which advance the column number to the next multiple of 8. 57 struct PartialLocation { 58 int start_line; 59 int end_line; 60 int start_column; 61 int end_column; 62 }; 63 64 // Class for walking a file descriptor and its messages, enums, etc. 65 // Mainly just a place to keep track of state between related methods. 66 class FileDescriptorWalker { 67 public: 68 FileDescriptorWalker(const google::protobuf::FileDescriptor* file_descriptor, 69 const google::protobuf::SourceCodeInfo& source_code_info, 70 const proto::VName& file_name, 71 const std::string& content, ProtoGraphBuilder* builder, 72 ProtoAnalyzer* analyzer) 73 : file_descriptor_(file_descriptor), 74 source_code_info_(&source_code_info), 75 file_name_(file_name), 76 content_(content), 77 line_index_(kythe::UTF8LineIndex(content_)), 78 builder_(builder), 79 uri_(file_name_) {} 80 81 // disallow copy and assign 82 FileDescriptorWalker(const FileDescriptorWalker&) = delete; 83 void operator=(const FileDescriptorWalker&) = delete; 84 85 // Takes in a span -- as defined by SourceCodeInfo.Location.span -- and 86 // converts it into a Location. 87 void InitializeLocation(const std::vector<int>& span, Location* loc); 88 89 // Adds path and span from source_code_info to location_map_ as key and value 90 // respectively. 91 void BuildLocationMap( 92 const google::protobuf::SourceCodeInfo& source_code_info); 93 94 // Walks through all of the imports in the descriptor and adds them to the 95 // graph. Imports includes all of dependencies, weak dependencies and public 96 // dependencies. 97 void VisitImports(); 98 99 // Walks through the fields and declared extensions of the input 100 // DescriptorProto and adds Kythe childof edges. Also looks for the type name. 101 // of the field and adds a Kythe ref edge if the type name can be resolved. 102 // For example, consider the field: 103 // Foo bar = 2; 104 // ^ ^ 105 // We look for the location of typename (Foo) and save that in Kythe as 106 // reference location. We look for the location of the name (bar) and save in 107 // Kythe as a declaration. 108 // `lookup_path` is expected to point to the parent message (all of it). 109 void VisitFields(const std::string& message_name, 110 const google::protobuf::Descriptor* dp, 111 std::vector<int> lookup_path); 112 113 // Processes the declaration of an individual field. 114 // `parent_name`/`parent` refer to the context this field is declared in 115 // (null for top-level extensions in a package-less file). 116 // `message_name`/`message` refer to the message this ticket is a part of. 117 // These only differ when processing extensions. 118 // `lookup_path` is expected to point to the FieldDescriptorProto being 119 // processed. 120 void VisitField(const std::string* parent_name, const proto::VName* parent, 121 const std::string& message_name, const proto::VName& message, 122 const google::protobuf::FieldDescriptor* field, 123 std::vector<int> lookup_path); 124 125 // Processes the declaration of an extended field, and adds a reference 126 // to the message being extended (in the "extend X {" line). 127 // `parent_name`/`parent` refers to the context this field is declared in 128 // (null for top-level extensions in a package-less file). 129 // `lookup_path` is expected to point to the FieldDescriptorProto of the 130 // extension being processed. 131 void VisitExtension(const std::string* parent_name, 132 const proto::VName* parent, 133 const google::protobuf::FieldDescriptor* field, 134 std::vector<int> lookup_path); 135 136 // Visits all the nested message types in the given DescriptorProto. 137 // The nested messages are added to the codegraph. 138 // `lookup_path` is used to fetch the location of declaration. 139 void VisitNestedEnumTypes(const std::string& message_name, 140 const proto::VName* message, 141 const google::protobuf::Descriptor* dp, 142 std::vector<int> lookup_path); 143 144 // Visits all the nested message types in the given DescriptorProto. 145 // The nested messages are added to the codegraph. 146 // `lookup_path` must point to the given DescriptorProto. 147 // The lookup path is used to fetch the location of declaration. 148 void VisitNestedTypes(const std::string& message_name, 149 const proto::VName* message, 150 const google::protobuf::Descriptor* dp, 151 std::vector<int> lookup_path); 152 153 // Visits all the oneofs within a message and adds them to the codegraph. 154 // `lookup_path` must point to the given DescriptorProto. 155 // The lookup path is used to fetch the location of declaration; although we 156 // modify the lookup path, it is left in its original state after we return. 157 void VisitOneofs(const std::string& message_name, const proto::VName& message, 158 const google::protobuf::Descriptor* dp, 159 std::vector<int> lookup_path); 160 161 // Visits all the messages and enums within a namespace. All messages and 162 // enums, along with their associated fields, oneofs, and values, are added 163 // to the graph. 164 void VisitMessagesAndEnums(const std::string* ns_name, 165 const proto::VName* ns); 166 167 // Visit all values in a given enum (either top-level or nested) and add 168 // Kythe nodes and edges. 169 // `lookup_path` must point to the enum. 170 void VisitEnumValues(const google::protobuf::EnumDescriptor* dp, 171 const proto::VName* e, std::vector<int> lookup_path); 172 173 // Method to add declarations and references for all fields. 174 // We do this after all messages and enums (both top-level and nested) 175 // are added to Kythe. 176 void VisitAllFields(const std::string* ns_name, const proto::VName* ns); 177 178 // Visit stubby services and input/output methods. 179 void VisitRpcServices(const std::string* ns_name, const proto::VName* ns); 180 181 // This function invokes all the Visit* functions and also adds the 182 // namespace as a Kythe binding. 183 void PopulateCodeGraph(); 184 185 private: 186 // Converts from a proto line/column (both 0 based, and where column counts 187 // bytes except that tabs move to the next multiple of 8) to a byte offset 188 // from the start of the current file. Returns -1 on error. 189 int ComputeByteOffset(int line_number, int column_number) const; 190 191 // Computes the bytes prior to the start of the element starting on 192 // `entity_start_line` at `entity_start_column` that make up `comment`. 193 Location LocationOfLeadingComments(const Location& entity_location, 194 int entity_start_line, 195 int entity_start_column, 196 const std::string& comment) const; 197 198 // Compute the bytes following to the end of the element starting on 199 // `entity_start_line` at `entity_start_column` that make up `comment`. 200 Location LocationOfTrailingComments(const Location& entity_location, 201 int entity_start_line, 202 int entity_start_column, 203 const std::string& comment) const; 204 205 // Parses a location span vector (three or four integers that protoc uses to 206 // represent a location in a file) and return a sensible PartialLocation or 207 // Status::INVALID_ARGUMENT if the vector cannot be properly interpreted. 208 absl::StatusOr<PartialLocation> ParseLocation( 209 const std::vector<int>& span) const; 210 211 std::optional<proto::VName> VNameForFieldType( 212 const google::protobuf::FieldDescriptor* field); 213 214 /// \brief Attach marked source (if not None) to `vname`. 215 void AttachMarkedSource(const proto::VName& vname, 216 const std::optional<MarkedSource>& code); 217 218 const google::protobuf::FileDescriptor* file_descriptor_; 219 const google::protobuf::SourceCodeInfo* source_code_info_; 220 const proto::VName file_name_; 221 const std::string content_; 222 const kythe::UTF8LineIndex line_index_; 223 ProtoGraphBuilder* builder_; 224 URI uri_; 225 std::map<std::vector<int>, std::vector<int> > location_map_; 226 std::map<std::vector<int>, google::protobuf::SourceCodeInfo::Location> 227 path_location_map_; 228 229 // Set of messages for which their fields are already visited. 230 // There are two functions from which 'VisitFields' gets called; 231 // 'VisitAllFields' and 'VisitNestedTypes'. This causes analyzer to create 232 // duplicate entries for some nodes. This set helps us avoid processing 233 // fields more than once. 234 std::set<std::string> visited_messages_; 235 236 // Adds leading and trailing comments for the element specified by ticket and 237 // path. `v_name` is the name of the element in question; `path` is used 238 // to look up the SourceCodeInfo::Location and the retrieve comment locations. 239 void AddComments(const proto::VName& v_name, const std::vector<int>& path); 240 241 // This recursively visits nested fields for VisitAllFields, with the current 242 // parent scope specified by name_prefix, message-descriptor 'dp' and 243 // lookup_path for source information lookup. 244 void VisitNestedFields(const std::string& name_prefix, 245 const google::protobuf::Descriptor* dp, 246 std::vector<int> lookup_path); 247 248 // Checks for generated proto info 249 void VisitGeneratedProtoInfo(); 250 }; 251 252 } // namespace lang_proto 253 } // namespace kythe 254 255 #endif // KYTHE_CXX_INDEXER_PROTO_FILE_DESCRIPTOR_WALKER_H_