kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/indexer/proto/kythe_indexer_main.cc (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Allows the Kythe Proto indexer to be invoked from the command line. By
    18  // default, this program reads a single Proto compilation unit from stdin and
    19  // emits binary Kythe artifacts to stdout as a sequence of Entity protos.
    20  //
    21  //   eg: indexer foo.proto -o foo.bin
    22  //       indexer foo.proto | verifier foo.proto
    23  //       indexer -index_file some/file.kzip
    24  //       cat foo.proto | indexer | verifier foo.proto
    25  
    26  #include <fcntl.h>
    27  #include <sys/stat.h>
    28  #include <unistd.h>
    29  
    30  #include <string>
    31  
    32  #include "absl/flags/flag.h"
    33  #include "absl/flags/parse.h"
    34  #include "absl/flags/usage.h"
    35  #include "absl/log/check.h"
    36  #include "absl/log/log.h"
    37  #include "absl/status/statusor.h"
    38  #include "absl/strings/match.h"
    39  #include "absl/strings/str_cat.h"
    40  #include "absl/strings/string_view.h"
    41  #include "google/protobuf/io/coded_stream.h"
    42  #include "google/protobuf/io/gzip_stream.h"
    43  #include "google/protobuf/io/zero_copy_stream.h"
    44  #include "google/protobuf/io/zero_copy_stream_impl.h"
    45  #include "kythe/cxx/common/file_vname_generator.h"
    46  #include "kythe/cxx/common/indexing/KytheCachingOutput.h"
    47  #include "kythe/cxx/common/indexing/KytheGraphRecorder.h"
    48  #include "kythe/cxx/common/init.h"
    49  #include "kythe/cxx/common/json_proto.h"
    50  #include "kythe/cxx/common/kzip_reader.h"
    51  #include "kythe/cxx/common/path_utils.h"
    52  #include "kythe/cxx/indexer/proto/indexer_frontend.h"
    53  #include "kythe/proto/analysis.pb.h"
    54  #include "kythe/proto/buildinfo.pb.h"
    55  
    56  ABSL_FLAG(std::string, o, "-", "Output filename.");
    57  ABSL_FLAG(bool, flush_after_each_entry, false,
    58            "Flush output after writing each entry.");
    59  ABSL_FLAG(std::string, index_file, "",
    60            ".kzip file containing compilation unit.");
    61  ABSL_FLAG(std::string, default_corpus, "", "Default corpus for VNames.");
    62  ABSL_FLAG(std::string, default_root, "", "Default root for VNames.");
    63  
    64  namespace kythe {
    65  namespace {
    66  
    67  /// Callback function to process a single compilation unit.
    68  using CompilationVisitCallback = std::function<void(
    69      const proto::CompilationUnit&, std::vector<proto::FileData> file_data)>;
    70  
    71  /// \brief Reads all compilations from a .kzip file into memory.
    72  /// \param path The path from which the file should be read.
    73  /// \param visit Callback function called for each compiliation unit within the
    74  /// kzip.
    75  // TODO(justbuchanan): Refactor so that this function is shared with the cxx
    76  // indexer. It was initially copied from cxx/indexer/frontend.cc.
    77  void DecodeKzipFile(const std::string& path,
    78                      const CompilationVisitCallback& visit) {
    79    // This forces the BuildDetails proto descriptor to be added to the pool so
    80    // we can deserialize it.
    81    proto::BuildDetails needed_for_proto_deserialization;
    82  
    83    absl::StatusOr<IndexReader> reader = kythe::KzipReader::Open(path);
    84    CHECK(reader.ok()) << "Couldn't open kzip from " << path << ": "
    85                       << reader.status();
    86    bool compilation_read = false;
    87    auto status = reader->Scan([&](absl::string_view digest) {
    88      std::vector<proto::FileData> virtual_files;
    89      auto compilation = reader->ReadUnit(digest);
    90      CHECK(compilation.ok()) << compilation.status();
    91      for (const auto& file : compilation->unit().required_input()) {
    92        auto content = reader->ReadFile(file.info().digest());
    93        CHECK(content.ok()) << "Unable to read file with digest: "
    94                            << file.info().digest() << ": " << content.status();
    95        proto::FileData file_data;
    96        file_data.set_content(*content);
    97        file_data.mutable_info()->set_path(file.info().path());
    98        file_data.mutable_info()->set_digest(file.info().digest());
    99        virtual_files.push_back(std::move(file_data));
   100      }
   101  
   102      visit(compilation->unit(), std::move(virtual_files));
   103  
   104      compilation_read = true;
   105      return true;
   106    });
   107    CHECK(status.ok()) << status.ToString();
   108    CHECK(compilation_read) << "Missing compilation in " << path;
   109  }
   110  
   111  bool ReadProtoFile(int fd, const std::string& relative_path,
   112                     const proto::VName& file_vname,
   113                     std::vector<proto::FileData>* files,
   114                     proto::CompilationUnit* unit) {
   115    char buf[1024];
   116    std::string source_data;
   117    ssize_t amount_read;
   118    while ((amount_read = ::read(fd, buf, sizeof buf)) > 0) {
   119      absl::StrAppend(&source_data, absl::string_view(buf, amount_read));
   120    }
   121    if (amount_read < 0) {
   122      LOG(ERROR) << "Error reading input file";
   123      return false;
   124    }
   125    proto::FileData file_data;
   126    file_data.set_content(source_data);
   127    file_data.mutable_info()->set_path(CleanPath(relative_path));
   128    proto::CompilationUnit::FileInput* file_input = unit->add_required_input();
   129    *file_input->mutable_v_name() = file_vname;
   130    *file_input->mutable_info() = file_data.info();
   131    // keep the filename as entered on the command line in the argument list
   132    unit->add_argument(relative_path);
   133    unit->add_source_file(file_data.info().path());
   134    files->push_back(std::move(file_data));
   135    return true;
   136  }
   137  
   138  int main(int argc, char* argv[]) {
   139    kythe::InitializeProgram(argv[0]);
   140    absl::SetProgramUsageMessage(
   141        R"(Command-line frontend for the Kythe Proto indexer.
   142  Invokes the Kythe Proto indexer on compilation unit(s). By default writes binary
   143  Kythe artifacts to STDOUT as a sequence of Entity protos; this destination can
   144  be overridden with the argument of -o.
   145  
   146  If -index_file is specified, input will be read from its argument (which will
   147  typically end in .kzip). No other positional parameters may be specified, nor
   148  may an additional input parameter be specified.
   149  
   150  If -index_file is not specified, all positional parameters (and any flags
   151  following "--") are taken as arguments to the Proto compiler. Those ending in
   152  .proto are taken as the filenames of the compilation unit to be analyzed; all
   153  arguments are added to the compilation unit's "arguments" field. If no .proto
   154  filenames are among these arguments, or if "-" is supplied, then source text
   155  will be read from STDIN (and named "stdin.proto").
   156  
   157  Examples:
   158    indexer -index_file index.kzip
   159    indexer -o foo.bin -- -Isome/path -Isome/other/path foo.proto
   160    indexer foo.proto bar.proto | verifier foo.proto bar.proto")");
   161    std::vector<char*> remain = absl::ParseCommandLine(argc, argv);
   162    std::vector<std::string> final_args(remain.begin() + 1, remain.end());
   163  
   164    std::string kzip_file;
   165    if (!absl::GetFlag(FLAGS_index_file).empty()) {
   166      CHECK(final_args.empty())
   167          << "No positional arguments are allowed when reading "
   168          << "from an index file.";
   169      kzip_file = absl::GetFlag(FLAGS_index_file);
   170    }
   171  
   172    int write_fd = STDOUT_FILENO;
   173    if (absl::GetFlag(FLAGS_o) != "-") {
   174      write_fd =
   175          ::open(absl::GetFlag(FLAGS_o).c_str(), O_WRONLY | O_CREAT | O_TRUNC,
   176                 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
   177      CHECK(write_fd != -1) << "Can't open output file";
   178    }
   179  
   180    bool had_error = false;
   181  
   182    {
   183      google::protobuf::io::FileOutputStream raw_output(write_fd);
   184      kythe::FileOutputStream kythe_output(&raw_output);
   185      kythe_output.set_flush_after_each_entry(
   186          absl::GetFlag(FLAGS_flush_after_each_entry));
   187  
   188      if (!kzip_file.empty()) {
   189        DecodeKzipFile(kzip_file, [&](const proto::CompilationUnit& unit,
   190                                      std::vector<proto::FileData> file_data) {
   191          std::string err =
   192              IndexProtoCompilationUnit(unit, file_data, &kythe_output);
   193          if (!err.empty()) {
   194            had_error = true;
   195            LOG(ERROR) << "Error: " << err;
   196          }
   197        });
   198      } else {
   199        std::vector<proto::FileData> files;
   200        proto::CompilationUnit unit;
   201  
   202        unit.set_working_directory(GetCurrentDirectory().value());
   203        FileVNameGenerator file_vnames;
   204        kythe::proto::VName default_vname;
   205        default_vname.set_corpus(absl::GetFlag(FLAGS_default_corpus));
   206        default_vname.set_root(absl::GetFlag(FLAGS_default_root));
   207        file_vnames.set_default_base_vname(default_vname);
   208        bool stdin_requested = false;
   209        for (const std::string& arg : final_args) {
   210          if (arg == "-") {
   211            stdin_requested = true;
   212          } else if (absl::EndsWith(arg, ".proto")) {
   213            int read_fd = ::open(arg.c_str(), O_RDONLY);
   214            proto::VName file_vname = file_vnames.LookupVName(arg);
   215            file_vname.set_path(CleanPath(file_vname.path()));
   216            CHECK(ReadProtoFile(read_fd, arg, file_vname, &files, &unit))
   217                << "Read error for " << arg;
   218            ::close(read_fd);
   219          } else {
   220            LOG(ERROR) << "Adding protoc argument: " << arg;
   221            unit.add_argument(arg);
   222          }
   223        }
   224        if (stdin_requested || files.empty()) {
   225          proto::VName stdin_vname;
   226          stdin_vname.set_corpus(unit.v_name().corpus());
   227          stdin_vname.set_root(unit.v_name().root());
   228          stdin_vname.set_path("stdin.proto");
   229          CHECK(ReadProtoFile(STDIN_FILENO, "stdin.proto", stdin_vname, &files,
   230                              &unit))
   231              << "Read error for protobuf on STDIN";
   232        }
   233  
   234        std::string err = IndexProtoCompilationUnit(unit, files, &kythe_output);
   235        if (!err.empty()) {
   236          had_error = true;
   237          LOG(ERROR) << "Error: " << err;
   238        }
   239      }
   240    }
   241  
   242    CHECK(::close(write_fd) == 0) << "Error closing output file";
   243  
   244    return had_error ? 1 : 0;
   245  }
   246  }  // anonymous namespace
   247  }  // namespace kythe
   248  
   249  int main(int argc, char* argv[]) { return kythe::main(argc, argv); }