kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/extractor/proto/proto_extractor.cc (about) 1 /* 2 * Copyright 2019 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "proto_extractor.h" 18 19 #include <cstdlib> 20 #include <cstring> 21 #include <memory> 22 #include <set> 23 #include <string> 24 #include <utility> 25 #include <vector> 26 27 #include "absl/log/check.h" 28 #include "absl/log/log.h" 29 #include "absl/strings/string_view.h" 30 #include "google/protobuf/compiler/importer.h" 31 #include "kythe/cxx/common/file_utils.h" 32 #include "kythe/cxx/common/file_vname_generator.h" 33 #include "kythe/cxx/common/index_writer.h" 34 #include "kythe/cxx/common/path_utils.h" 35 #include "kythe/cxx/indexer/proto/search_path.h" 36 #include "kythe/proto/analysis.pb.h" 37 #include "kythe/proto/storage.pb.h" 38 39 namespace kythe { 40 namespace lang_proto { 41 namespace { 42 43 using ::google::protobuf::compiler::DiskSourceTree; 44 45 // Error "collector" that just writes messages to log output. 46 class LoggingMultiFileErrorCollector 47 : public google::protobuf::compiler::MultiFileErrorCollector { 48 public: 49 void AddError(const std::string& filename, int line, int column, 50 const std::string& message) override { 51 LOG(ERROR) << filename << "@" << line << ":" << column << ": " << message; 52 } 53 54 void AddWarning(const std::string& filename, int line, int column, 55 const std::string& message) override { 56 LOG(ERROR) << filename << "@" << line << ":" << column << ": " << message; 57 } 58 }; 59 60 // DiskSourceTree that records which proto files are opened while parsing the 61 // toplevel proto(s), allowing us to get a list of transitive dependencies. 62 class RecordingDiskSourceTree : public DiskSourceTree { 63 public: 64 google::protobuf::io::ZeroCopyInputStream* Open( 65 absl::string_view filename) override { 66 // Record resolved/canonical path because the same proto may be Open()'d via 67 // multiple relative paths and we only want to record it once. 68 std::string canonical_path; 69 if (!DiskSourceTree::VirtualFileToDiskFile(filename, &canonical_path)) { 70 return nullptr; 71 } 72 if (opened_files_.find(canonical_path) == opened_files_.end()) { 73 opened_files_.insert(canonical_path); 74 } 75 76 return DiskSourceTree::Open(filename); 77 } 78 79 // A set of unique file paths that have been passed to Open(). 80 const std::set<std::string>& opened_files() const { return opened_files_; } 81 82 private: 83 std::set<std::string> opened_files_; 84 }; 85 86 } // namespace 87 88 proto::CompilationUnit ProtoExtractor::ExtractProtos( 89 const std::vector<std::string>& proto_filenames, 90 IndexWriter* index_writer) const { 91 proto::CompilationUnit unit; 92 93 unit.set_working_directory(GetCurrentDirectory().value()); 94 95 for (const std::string& proto : proto_filenames) { 96 unit.add_argument(proto); 97 } 98 99 // Add path substitutions to src_tree. 100 RecordingDiskSourceTree src_tree; 101 src_tree.MapPath("", ""); // Add current directory to VFS. 102 for (const auto& sub : path_substitutions) { 103 src_tree.MapPath(sub.first, sub.second); 104 } 105 106 // Add protoc args to output. 107 if (!path_substitutions.empty()) { 108 unit.add_argument("--"); 109 for (auto& arg : PathSubstitutionsToArgs(path_substitutions)) { 110 unit.add_argument(arg); 111 } 112 } 113 114 // Import the toplevel proto(s), which will record paths of any transitive 115 // dependencies to src_tree. 116 { 117 LoggingMultiFileErrorCollector err_collector; 118 for (const std::string& fname : proto_filenames) { 119 // Note that a separate importer instance is used for each top-level 120 // import to avoid double-importing any subprotos, which would happen if 121 // two top-level protos share any transitive dependencies. 122 google::protobuf::compiler::Importer importer(&src_tree, &err_collector); 123 CHECK(importer.Import(fname) != nullptr) 124 << "Failed to import file: " << fname; 125 126 unit.add_source_file(RelativizePath(fname, root_directory)); 127 } 128 } 129 130 // Write each toplevel proto and its transitive dependencies into the kzip. 131 for (const std::string& abspath : src_tree.opened_files()) { 132 // Resolve path relative to the proto compiler's search paths. 133 std::string relpath, shadow; 134 CHECK(DiskSourceTree::SUCCESS == 135 src_tree.DiskFileToVirtualFile(abspath, &relpath, &shadow)); 136 CHECK(shadow.empty()) << "Filepath shadows a real file: " << relpath; 137 // Read file contents 138 std::string file_contents; 139 { 140 std::unique_ptr<google::protobuf::io::ZeroCopyInputStream> in_stream( 141 src_tree.Open(relpath)); 142 CHECK(in_stream != nullptr) << "Can't open file: " << relpath; 143 144 const void* data = nullptr; 145 int size = 0; 146 while (in_stream->Next(&data, &size)) { 147 file_contents.append(static_cast<const char*>(data), size); 148 } 149 } 150 151 // Make path relative to KYTHE_ROOT_DIRECTORY. 152 const std::string final_path = RelativizePath(abspath, root_directory); 153 154 // Write file to index. 155 auto digest = index_writer->WriteFile(file_contents); 156 CHECK(digest.ok()) << digest.status(); 157 158 // Record file info to compilation unit. 159 proto::CompilationUnit::FileInput* file_input = unit.add_required_input(); 160 proto::VName vname = vname_gen.LookupVName(final_path); 161 if (vname.corpus().empty()) { 162 vname.set_corpus(std::string(corpus)); 163 } 164 *file_input->mutable_v_name() = std::move(vname); 165 file_input->mutable_info()->set_path(final_path); 166 file_input->mutable_info()->set_digest(*digest); 167 } 168 169 return unit; 170 } 171 172 void ProtoExtractor::ConfigureFromEnv() { 173 if (const char* env_corpus = getenv("KYTHE_CORPUS")) { 174 corpus = env_corpus; 175 } 176 177 // File paths in the output kzip should be relative to this directory. 178 if (const char* env_root_directory = getenv("KYTHE_ROOT_DIRECTORY")) { 179 root_directory = env_root_directory; 180 } 181 182 // Configure VName generator. 183 const char* vname_path = getenv("KYTHE_VNAMES"); 184 if (vname_path && strlen(vname_path) > 0) { 185 std::string json = LoadFileOrDie(vname_path); 186 std::string error_text; 187 CHECK(vname_gen.LoadJsonString(json, &error_text)) 188 << "Could not parse vname generator configuration: " << error_text; 189 } 190 } 191 192 } // namespace lang_proto 193 } // namespace kythe