kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/extractor/textproto/textproto_extractor.cc (about)

     1  /*
     2   * Copyright 2019 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Standalone extractor for text-format protobuf (textproto) files. Given a
    18  // textproto file, builds a kzip containing it and all proto files it depends
    19  // on.
    20  //
    21  // Usage:
    22  //   export KYTHE_OUTPUT_FILE=foo.kzip
    23  //   textproto_extractor foo.pbtxt
    24  //   textproto_extractor foo.pbtxt -- --proto_path dir/with/proto/deps
    25  
    26  #include <cstdlib>
    27  #include <string>
    28  #include <utility>
    29  #include <vector>
    30  
    31  #include "absl/flags/flag.h"
    32  #include "absl/flags/parse.h"
    33  #include "absl/flags/usage.h"
    34  #include "absl/log/check.h"
    35  #include "absl/strings/string_view.h"
    36  #include "kythe/cxx/common/file_utils.h"
    37  #include "kythe/cxx/common/index_writer.h"
    38  #include "kythe/cxx/common/init.h"
    39  #include "kythe/cxx/common/kzip_writer.h"
    40  #include "kythe/cxx/common/path_utils.h"
    41  #include "kythe/cxx/extractor/proto/proto_extractor.h"
    42  #include "kythe/cxx/extractor/textproto/textproto_schema.h"
    43  #include "kythe/cxx/indexer/proto/search_path.h"
    44  #include "kythe/proto/analysis.pb.h"
    45  #include "kythe/proto/storage.pb.h"
    46  
    47  ABSL_FLAG(std::string, proto_message, "",
    48            "namespace-qualified message name for the textproto.");
    49  ABSL_FLAG(std::vector<std::string>, proto_files, {},
    50            "A comma-separated list of proto files needed to fully define "
    51            "the textproto's schema.");
    52  ABSL_FLAG(std::string, record_separator, "",
    53            "Delimitates each record within a file. Presence of this"
    54            "indicates this is a recordio textformat file.");
    55  
    56  namespace kythe {
    57  namespace lang_textproto {
    58  namespace {
    59  
    60  /// \brief Returns a kzip-based IndexWriter or dies.
    61  IndexWriter OpenKzipWriterOrDie(absl::string_view path) {
    62    auto writer = KzipWriter::Create(path);
    63    CHECK(writer.ok()) << "Failed to open KzipWriter: " << writer.status();
    64    return std::move(*writer);
    65  }
    66  
    67  int main(int argc, char* argv[]) {
    68    kythe::InitializeProgram(argv[0]);
    69    absl::SetProgramUsageMessage(
    70        R"(Standalone extractor for the Kythe textproto indexer.
    71  Creates a kzip containing the textproto and all proto files it depends on.
    72  
    73  In order to make sense of the textproto, the extractor must know what proto
    74  message describes it and what file that proto message comes from. This
    75  information can be supplied with the --proto_message and --proto_files flags or
    76  with specially-formatted comments in the textproto itself:
    77  
    78    # proto-file: some/file.proto
    79    # proto-message: some_namespace.MyMessage
    80    # proto-import: some/proto/with/extensions.proto
    81  
    82  Examples:
    83    export KYTHE_OUTPUT_FILE=foo.kzip
    84    textproto_extractor foo.pbtxt
    85    textproto_extractor foo.pbtxt --proto_message MyMessage --proto_files foo.proto,bar.proto
    86    textproto_extractor foo.pbtxt --proto_message MyMessage --proto_files foo.proto -- --proto_path dir/with/my/deps
    87    textproto_extractor foo.recordiotxt --proto_message MyMessage --proto_files foo.proto --record_separator @@@ -- --proto_path dir/with/my/deps")");
    88    std::vector<char*> remain = absl::ParseCommandLine(argc, argv);
    89    std::vector<std::string> final_args(remain.begin() + 1, remain.end());
    90  
    91    lang_proto::ProtoExtractor proto_extractor;
    92    proto_extractor.ConfigureFromEnv();
    93  
    94    // Parse --proto_path and -I args into a set of path substitution (search
    95    // paths).
    96    std::vector<std::string> textproto_args;
    97    ::kythe::lang_proto::ParsePathSubstitutions(
    98        final_args, &proto_extractor.path_substitutions, &textproto_args);
    99  
   100    // Load textproto.
   101    CHECK(textproto_args.size() == 1)
   102        << "Expected 1 textproto file, got " << textproto_args.size();
   103    std::string textproto_filename = textproto_args[0];
   104    const std::string textproto = ::kythe::LoadFileOrDie(textproto_filename);
   105  
   106    const char* output_file = getenv("KYTHE_OUTPUT_FILE");
   107    CHECK(output_file != nullptr)
   108        << "Please specify an output kzip file with the KYTHE_OUTPUT_FILE "
   109           "environment variable.";
   110    IndexWriter kzip_writer = OpenKzipWriterOrDie(output_file);
   111  
   112    // Info about the textproto's corresponding proto can come from comments in
   113    // the textproto itself or as command line flags to the extractor. Note that
   114    // if metadata is specified both in the textproto and via flags, flags take
   115    // precedence.
   116    TextprotoSchema schema = ParseTextprotoSchemaComments(textproto);
   117    std::vector<std::string> proto_filenames;
   118    if (!absl::GetFlag(FLAGS_proto_files).empty()) {
   119      proto_filenames = absl::GetFlag(FLAGS_proto_files);
   120    } else {
   121      proto_filenames.push_back(std::string(schema.proto_file));
   122      for (const auto& extra_import : schema.proto_imports) {
   123        proto_filenames.push_back(std::string(extra_import));
   124      }
   125    }
   126    std::string proto_message = absl::GetFlag(FLAGS_proto_message);
   127    if (!proto_message.empty()) {
   128      schema.proto_message = proto_message;
   129    }
   130    CHECK(!proto_filenames.empty())
   131        << "Proto file must be specified either with --proto_files flag or in "
   132           "textproto comments";
   133    CHECK(!schema.proto_message.empty())
   134        << "Proto message must be specified either with --proto_message flag or "
   135           "in textproto comments";
   136  
   137    // Call the proto extractor. This adds proto_file and all of its dependencies
   138    // into the kzip/unit, which we'll later need when indexing the textproto.
   139    proto::IndexedCompilation compilation;
   140    *compilation.mutable_unit() =
   141        proto_extractor.ExtractProtos(proto_filenames, &kzip_writer);
   142  
   143    // Relativize path before writing to kzip.
   144    const std::string textproto_rel_filename =
   145        RelativizePath(textproto_filename, proto_extractor.root_directory);
   146  
   147    // Replace the proto extractor's source file list with our textproto.
   148    compilation.mutable_unit()->clear_source_file();
   149    compilation.mutable_unit()->add_source_file(textproto_rel_filename);
   150  
   151    // Re-build compilation unit's arguments list. Add --proto_message and any
   152    // protoc args.
   153    compilation.mutable_unit()->clear_argument();
   154    compilation.mutable_unit()->add_argument(textproto_filename);
   155    compilation.mutable_unit()->add_argument("--proto_message");
   156    compilation.mutable_unit()->add_argument(std::string(schema.proto_message));
   157    std::string record_separator = absl::GetFlag(FLAGS_record_separator);
   158    if (!record_separator.empty()) {
   159      compilation.mutable_unit()->add_argument("--record_separator");
   160      compilation.mutable_unit()->add_argument(record_separator);
   161    }
   162    // Add protoc args.
   163    if (!proto_extractor.path_substitutions.empty()) {
   164      compilation.mutable_unit()->add_argument("--");
   165      for (auto& arg : lang_proto::PathSubstitutionsToArgs(
   166               proto_extractor.path_substitutions)) {
   167        compilation.mutable_unit()->add_argument(arg);
   168      }
   169    }
   170  
   171    // Add textproto file to kzip.
   172    {
   173      auto digest = kzip_writer.WriteFile(textproto);
   174      CHECK(digest.ok()) << digest.status();
   175  
   176      proto::CompilationUnit::FileInput* file_input =
   177          compilation.mutable_unit()->add_required_input();
   178      proto::VName vname =
   179          proto_extractor.vname_gen.LookupVName(textproto_rel_filename);
   180      if (vname.corpus().empty()) {
   181        vname.set_corpus(proto_extractor.corpus);
   182      }
   183      *file_input->mutable_v_name() = std::move(vname);
   184      file_input->mutable_info()->set_path(textproto_rel_filename);
   185      file_input->mutable_info()->set_digest(*digest);
   186    }
   187  
   188    // Save compilation unit.
   189    auto digest = kzip_writer.WriteUnit(compilation);
   190    CHECK(digest.ok()) << "Error writing unit to kzip: " << digest.status();
   191    CHECK(kzip_writer.Close().ok());
   192  
   193    return 0;
   194  }
   195  
   196  }  // namespace
   197  }  // namespace lang_textproto
   198  }  // namespace kythe
   199  
   200  int main(int argc, char* argv[]) {
   201    return kythe::lang_textproto::main(argc, argv);
   202  }