kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/indexer/proto/kythe_indexer_main.cc (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Allows the Kythe Proto indexer to be invoked from the command line. By 18 // default, this program reads a single Proto compilation unit from stdin and 19 // emits binary Kythe artifacts to stdout as a sequence of Entity protos. 20 // 21 // eg: indexer foo.proto -o foo.bin 22 // indexer foo.proto | verifier foo.proto 23 // indexer -index_file some/file.kzip 24 // cat foo.proto | indexer | verifier foo.proto 25 26 #include <fcntl.h> 27 #include <sys/stat.h> 28 #include <unistd.h> 29 30 #include <string> 31 32 #include "absl/flags/flag.h" 33 #include "absl/flags/parse.h" 34 #include "absl/flags/usage.h" 35 #include "absl/log/check.h" 36 #include "absl/log/log.h" 37 #include "absl/status/statusor.h" 38 #include "absl/strings/match.h" 39 #include "absl/strings/str_cat.h" 40 #include "absl/strings/string_view.h" 41 #include "google/protobuf/io/coded_stream.h" 42 #include "google/protobuf/io/gzip_stream.h" 43 #include "google/protobuf/io/zero_copy_stream.h" 44 #include "google/protobuf/io/zero_copy_stream_impl.h" 45 #include "kythe/cxx/common/file_vname_generator.h" 46 #include "kythe/cxx/common/indexing/KytheCachingOutput.h" 47 #include "kythe/cxx/common/indexing/KytheGraphRecorder.h" 48 #include "kythe/cxx/common/init.h" 49 #include "kythe/cxx/common/json_proto.h" 50 #include "kythe/cxx/common/kzip_reader.h" 51 #include "kythe/cxx/common/path_utils.h" 52 #include "kythe/cxx/indexer/proto/indexer_frontend.h" 53 #include "kythe/proto/analysis.pb.h" 54 #include "kythe/proto/buildinfo.pb.h" 55 56 ABSL_FLAG(std::string, o, "-", "Output filename."); 57 ABSL_FLAG(bool, flush_after_each_entry, false, 58 "Flush output after writing each entry."); 59 ABSL_FLAG(std::string, index_file, "", 60 ".kzip file containing compilation unit."); 61 ABSL_FLAG(std::string, default_corpus, "", "Default corpus for VNames."); 62 ABSL_FLAG(std::string, default_root, "", "Default root for VNames."); 63 64 namespace kythe { 65 namespace { 66 67 /// Callback function to process a single compilation unit. 68 using CompilationVisitCallback = std::function<void( 69 const proto::CompilationUnit&, std::vector<proto::FileData> file_data)>; 70 71 /// \brief Reads all compilations from a .kzip file into memory. 72 /// \param path The path from which the file should be read. 73 /// \param visit Callback function called for each compiliation unit within the 74 /// kzip. 75 // TODO(justbuchanan): Refactor so that this function is shared with the cxx 76 // indexer. It was initially copied from cxx/indexer/frontend.cc. 77 void DecodeKzipFile(const std::string& path, 78 const CompilationVisitCallback& visit) { 79 // This forces the BuildDetails proto descriptor to be added to the pool so 80 // we can deserialize it. 81 proto::BuildDetails needed_for_proto_deserialization; 82 83 absl::StatusOr<IndexReader> reader = kythe::KzipReader::Open(path); 84 CHECK(reader.ok()) << "Couldn't open kzip from " << path << ": " 85 << reader.status(); 86 bool compilation_read = false; 87 auto status = reader->Scan([&](absl::string_view digest) { 88 std::vector<proto::FileData> virtual_files; 89 auto compilation = reader->ReadUnit(digest); 90 CHECK(compilation.ok()) << compilation.status(); 91 for (const auto& file : compilation->unit().required_input()) { 92 auto content = reader->ReadFile(file.info().digest()); 93 CHECK(content.ok()) << "Unable to read file with digest: " 94 << file.info().digest() << ": " << content.status(); 95 proto::FileData file_data; 96 file_data.set_content(*content); 97 file_data.mutable_info()->set_path(file.info().path()); 98 file_data.mutable_info()->set_digest(file.info().digest()); 99 virtual_files.push_back(std::move(file_data)); 100 } 101 102 visit(compilation->unit(), std::move(virtual_files)); 103 104 compilation_read = true; 105 return true; 106 }); 107 CHECK(status.ok()) << status.ToString(); 108 CHECK(compilation_read) << "Missing compilation in " << path; 109 } 110 111 bool ReadProtoFile(int fd, const std::string& relative_path, 112 const proto::VName& file_vname, 113 std::vector<proto::FileData>* files, 114 proto::CompilationUnit* unit) { 115 char buf[1024]; 116 std::string source_data; 117 ssize_t amount_read; 118 while ((amount_read = ::read(fd, buf, sizeof buf)) > 0) { 119 absl::StrAppend(&source_data, absl::string_view(buf, amount_read)); 120 } 121 if (amount_read < 0) { 122 LOG(ERROR) << "Error reading input file"; 123 return false; 124 } 125 proto::FileData file_data; 126 file_data.set_content(source_data); 127 file_data.mutable_info()->set_path(CleanPath(relative_path)); 128 proto::CompilationUnit::FileInput* file_input = unit->add_required_input(); 129 *file_input->mutable_v_name() = file_vname; 130 *file_input->mutable_info() = file_data.info(); 131 // keep the filename as entered on the command line in the argument list 132 unit->add_argument(relative_path); 133 unit->add_source_file(file_data.info().path()); 134 files->push_back(std::move(file_data)); 135 return true; 136 } 137 138 int main(int argc, char* argv[]) { 139 kythe::InitializeProgram(argv[0]); 140 absl::SetProgramUsageMessage( 141 R"(Command-line frontend for the Kythe Proto indexer. 142 Invokes the Kythe Proto indexer on compilation unit(s). By default writes binary 143 Kythe artifacts to STDOUT as a sequence of Entity protos; this destination can 144 be overridden with the argument of -o. 145 146 If -index_file is specified, input will be read from its argument (which will 147 typically end in .kzip). No other positional parameters may be specified, nor 148 may an additional input parameter be specified. 149 150 If -index_file is not specified, all positional parameters (and any flags 151 following "--") are taken as arguments to the Proto compiler. Those ending in 152 .proto are taken as the filenames of the compilation unit to be analyzed; all 153 arguments are added to the compilation unit's "arguments" field. If no .proto 154 filenames are among these arguments, or if "-" is supplied, then source text 155 will be read from STDIN (and named "stdin.proto"). 156 157 Examples: 158 indexer -index_file index.kzip 159 indexer -o foo.bin -- -Isome/path -Isome/other/path foo.proto 160 indexer foo.proto bar.proto | verifier foo.proto bar.proto")"); 161 std::vector<char*> remain = absl::ParseCommandLine(argc, argv); 162 std::vector<std::string> final_args(remain.begin() + 1, remain.end()); 163 164 std::string kzip_file; 165 if (!absl::GetFlag(FLAGS_index_file).empty()) { 166 CHECK(final_args.empty()) 167 << "No positional arguments are allowed when reading " 168 << "from an index file."; 169 kzip_file = absl::GetFlag(FLAGS_index_file); 170 } 171 172 int write_fd = STDOUT_FILENO; 173 if (absl::GetFlag(FLAGS_o) != "-") { 174 write_fd = 175 ::open(absl::GetFlag(FLAGS_o).c_str(), O_WRONLY | O_CREAT | O_TRUNC, 176 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); 177 CHECK(write_fd != -1) << "Can't open output file"; 178 } 179 180 bool had_error = false; 181 182 { 183 google::protobuf::io::FileOutputStream raw_output(write_fd); 184 kythe::FileOutputStream kythe_output(&raw_output); 185 kythe_output.set_flush_after_each_entry( 186 absl::GetFlag(FLAGS_flush_after_each_entry)); 187 188 if (!kzip_file.empty()) { 189 DecodeKzipFile(kzip_file, [&](const proto::CompilationUnit& unit, 190 std::vector<proto::FileData> file_data) { 191 std::string err = 192 IndexProtoCompilationUnit(unit, file_data, &kythe_output); 193 if (!err.empty()) { 194 had_error = true; 195 LOG(ERROR) << "Error: " << err; 196 } 197 }); 198 } else { 199 std::vector<proto::FileData> files; 200 proto::CompilationUnit unit; 201 202 unit.set_working_directory(GetCurrentDirectory().value()); 203 FileVNameGenerator file_vnames; 204 kythe::proto::VName default_vname; 205 default_vname.set_corpus(absl::GetFlag(FLAGS_default_corpus)); 206 default_vname.set_root(absl::GetFlag(FLAGS_default_root)); 207 file_vnames.set_default_base_vname(default_vname); 208 bool stdin_requested = false; 209 for (const std::string& arg : final_args) { 210 if (arg == "-") { 211 stdin_requested = true; 212 } else if (absl::EndsWith(arg, ".proto")) { 213 int read_fd = ::open(arg.c_str(), O_RDONLY); 214 proto::VName file_vname = file_vnames.LookupVName(arg); 215 file_vname.set_path(CleanPath(file_vname.path())); 216 CHECK(ReadProtoFile(read_fd, arg, file_vname, &files, &unit)) 217 << "Read error for " << arg; 218 ::close(read_fd); 219 } else { 220 LOG(ERROR) << "Adding protoc argument: " << arg; 221 unit.add_argument(arg); 222 } 223 } 224 if (stdin_requested || files.empty()) { 225 proto::VName stdin_vname; 226 stdin_vname.set_corpus(unit.v_name().corpus()); 227 stdin_vname.set_root(unit.v_name().root()); 228 stdin_vname.set_path("stdin.proto"); 229 CHECK(ReadProtoFile(STDIN_FILENO, "stdin.proto", stdin_vname, &files, 230 &unit)) 231 << "Read error for protobuf on STDIN"; 232 } 233 234 std::string err = IndexProtoCompilationUnit(unit, files, &kythe_output); 235 if (!err.empty()) { 236 had_error = true; 237 LOG(ERROR) << "Error: " << err; 238 } 239 } 240 } 241 242 CHECK(::close(write_fd) == 0) << "Error closing output file"; 243 244 return had_error ? 1 : 0; 245 } 246 } // anonymous namespace 247 } // namespace kythe 248 249 int main(int argc, char* argv[]) { return kythe::main(argc, argv); }