kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/extractor/textproto/textproto_extractor.cc (about) 1 /* 2 * Copyright 2019 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Standalone extractor for text-format protobuf (textproto) files. Given a 18 // textproto file, builds a kzip containing it and all proto files it depends 19 // on. 20 // 21 // Usage: 22 // export KYTHE_OUTPUT_FILE=foo.kzip 23 // textproto_extractor foo.pbtxt 24 // textproto_extractor foo.pbtxt -- --proto_path dir/with/proto/deps 25 26 #include <cstdlib> 27 #include <string> 28 #include <utility> 29 #include <vector> 30 31 #include "absl/flags/flag.h" 32 #include "absl/flags/parse.h" 33 #include "absl/flags/usage.h" 34 #include "absl/log/check.h" 35 #include "absl/strings/string_view.h" 36 #include "kythe/cxx/common/file_utils.h" 37 #include "kythe/cxx/common/index_writer.h" 38 #include "kythe/cxx/common/init.h" 39 #include "kythe/cxx/common/kzip_writer.h" 40 #include "kythe/cxx/common/path_utils.h" 41 #include "kythe/cxx/extractor/proto/proto_extractor.h" 42 #include "kythe/cxx/extractor/textproto/textproto_schema.h" 43 #include "kythe/cxx/indexer/proto/search_path.h" 44 #include "kythe/proto/analysis.pb.h" 45 #include "kythe/proto/storage.pb.h" 46 47 ABSL_FLAG(std::string, proto_message, "", 48 "namespace-qualified message name for the textproto."); 49 ABSL_FLAG(std::vector<std::string>, proto_files, {}, 50 "A comma-separated list of proto files needed to fully define " 51 "the textproto's schema."); 52 ABSL_FLAG(std::string, record_separator, "", 53 "Delimitates each record within a file. Presence of this" 54 "indicates this is a recordio textformat file."); 55 56 namespace kythe { 57 namespace lang_textproto { 58 namespace { 59 60 /// \brief Returns a kzip-based IndexWriter or dies. 61 IndexWriter OpenKzipWriterOrDie(absl::string_view path) { 62 auto writer = KzipWriter::Create(path); 63 CHECK(writer.ok()) << "Failed to open KzipWriter: " << writer.status(); 64 return std::move(*writer); 65 } 66 67 int main(int argc, char* argv[]) { 68 kythe::InitializeProgram(argv[0]); 69 absl::SetProgramUsageMessage( 70 R"(Standalone extractor for the Kythe textproto indexer. 71 Creates a kzip containing the textproto and all proto files it depends on. 72 73 In order to make sense of the textproto, the extractor must know what proto 74 message describes it and what file that proto message comes from. This 75 information can be supplied with the --proto_message and --proto_files flags or 76 with specially-formatted comments in the textproto itself: 77 78 # proto-file: some/file.proto 79 # proto-message: some_namespace.MyMessage 80 # proto-import: some/proto/with/extensions.proto 81 82 Examples: 83 export KYTHE_OUTPUT_FILE=foo.kzip 84 textproto_extractor foo.pbtxt 85 textproto_extractor foo.pbtxt --proto_message MyMessage --proto_files foo.proto,bar.proto 86 textproto_extractor foo.pbtxt --proto_message MyMessage --proto_files foo.proto -- --proto_path dir/with/my/deps 87 textproto_extractor foo.recordiotxt --proto_message MyMessage --proto_files foo.proto --record_separator @@@ -- --proto_path dir/with/my/deps")"); 88 std::vector<char*> remain = absl::ParseCommandLine(argc, argv); 89 std::vector<std::string> final_args(remain.begin() + 1, remain.end()); 90 91 lang_proto::ProtoExtractor proto_extractor; 92 proto_extractor.ConfigureFromEnv(); 93 94 // Parse --proto_path and -I args into a set of path substitution (search 95 // paths). 96 std::vector<std::string> textproto_args; 97 ::kythe::lang_proto::ParsePathSubstitutions( 98 final_args, &proto_extractor.path_substitutions, &textproto_args); 99 100 // Load textproto. 101 CHECK(textproto_args.size() == 1) 102 << "Expected 1 textproto file, got " << textproto_args.size(); 103 std::string textproto_filename = textproto_args[0]; 104 const std::string textproto = ::kythe::LoadFileOrDie(textproto_filename); 105 106 const char* output_file = getenv("KYTHE_OUTPUT_FILE"); 107 CHECK(output_file != nullptr) 108 << "Please specify an output kzip file with the KYTHE_OUTPUT_FILE " 109 "environment variable."; 110 IndexWriter kzip_writer = OpenKzipWriterOrDie(output_file); 111 112 // Info about the textproto's corresponding proto can come from comments in 113 // the textproto itself or as command line flags to the extractor. Note that 114 // if metadata is specified both in the textproto and via flags, flags take 115 // precedence. 116 TextprotoSchema schema = ParseTextprotoSchemaComments(textproto); 117 std::vector<std::string> proto_filenames; 118 if (!absl::GetFlag(FLAGS_proto_files).empty()) { 119 proto_filenames = absl::GetFlag(FLAGS_proto_files); 120 } else { 121 proto_filenames.push_back(std::string(schema.proto_file)); 122 for (const auto& extra_import : schema.proto_imports) { 123 proto_filenames.push_back(std::string(extra_import)); 124 } 125 } 126 std::string proto_message = absl::GetFlag(FLAGS_proto_message); 127 if (!proto_message.empty()) { 128 schema.proto_message = proto_message; 129 } 130 CHECK(!proto_filenames.empty()) 131 << "Proto file must be specified either with --proto_files flag or in " 132 "textproto comments"; 133 CHECK(!schema.proto_message.empty()) 134 << "Proto message must be specified either with --proto_message flag or " 135 "in textproto comments"; 136 137 // Call the proto extractor. This adds proto_file and all of its dependencies 138 // into the kzip/unit, which we'll later need when indexing the textproto. 139 proto::IndexedCompilation compilation; 140 *compilation.mutable_unit() = 141 proto_extractor.ExtractProtos(proto_filenames, &kzip_writer); 142 143 // Relativize path before writing to kzip. 144 const std::string textproto_rel_filename = 145 RelativizePath(textproto_filename, proto_extractor.root_directory); 146 147 // Replace the proto extractor's source file list with our textproto. 148 compilation.mutable_unit()->clear_source_file(); 149 compilation.mutable_unit()->add_source_file(textproto_rel_filename); 150 151 // Re-build compilation unit's arguments list. Add --proto_message and any 152 // protoc args. 153 compilation.mutable_unit()->clear_argument(); 154 compilation.mutable_unit()->add_argument(textproto_filename); 155 compilation.mutable_unit()->add_argument("--proto_message"); 156 compilation.mutable_unit()->add_argument(std::string(schema.proto_message)); 157 std::string record_separator = absl::GetFlag(FLAGS_record_separator); 158 if (!record_separator.empty()) { 159 compilation.mutable_unit()->add_argument("--record_separator"); 160 compilation.mutable_unit()->add_argument(record_separator); 161 } 162 // Add protoc args. 163 if (!proto_extractor.path_substitutions.empty()) { 164 compilation.mutable_unit()->add_argument("--"); 165 for (auto& arg : lang_proto::PathSubstitutionsToArgs( 166 proto_extractor.path_substitutions)) { 167 compilation.mutable_unit()->add_argument(arg); 168 } 169 } 170 171 // Add textproto file to kzip. 172 { 173 auto digest = kzip_writer.WriteFile(textproto); 174 CHECK(digest.ok()) << digest.status(); 175 176 proto::CompilationUnit::FileInput* file_input = 177 compilation.mutable_unit()->add_required_input(); 178 proto::VName vname = 179 proto_extractor.vname_gen.LookupVName(textproto_rel_filename); 180 if (vname.corpus().empty()) { 181 vname.set_corpus(proto_extractor.corpus); 182 } 183 *file_input->mutable_v_name() = std::move(vname); 184 file_input->mutable_info()->set_path(textproto_rel_filename); 185 file_input->mutable_info()->set_digest(*digest); 186 } 187 188 // Save compilation unit. 189 auto digest = kzip_writer.WriteUnit(compilation); 190 CHECK(digest.ok()) << "Error writing unit to kzip: " << digest.status(); 191 CHECK(kzip_writer.Close().ok()); 192 193 return 0; 194 } 195 196 } // namespace 197 } // namespace lang_textproto 198 } // namespace kythe 199 200 int main(int argc, char* argv[]) { 201 return kythe::lang_textproto::main(argc, argv); 202 }