kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/extractor/proto/proto_extractor.cc (about)

     1  /*
     2   * Copyright 2019 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "proto_extractor.h"
    18  
    19  #include <cstdlib>
    20  #include <cstring>
    21  #include <memory>
    22  #include <set>
    23  #include <string>
    24  #include <utility>
    25  #include <vector>
    26  
    27  #include "absl/log/check.h"
    28  #include "absl/log/log.h"
    29  #include "absl/strings/string_view.h"
    30  #include "google/protobuf/compiler/importer.h"
    31  #include "kythe/cxx/common/file_utils.h"
    32  #include "kythe/cxx/common/file_vname_generator.h"
    33  #include "kythe/cxx/common/index_writer.h"
    34  #include "kythe/cxx/common/path_utils.h"
    35  #include "kythe/cxx/indexer/proto/search_path.h"
    36  #include "kythe/proto/analysis.pb.h"
    37  #include "kythe/proto/storage.pb.h"
    38  
    39  namespace kythe {
    40  namespace lang_proto {
    41  namespace {
    42  
    43  using ::google::protobuf::compiler::DiskSourceTree;
    44  
    45  // Error "collector" that just writes messages to log output.
    46  class LoggingMultiFileErrorCollector
    47      : public google::protobuf::compiler::MultiFileErrorCollector {
    48   public:
    49    void AddError(const std::string& filename, int line, int column,
    50                  const std::string& message) override {
    51      LOG(ERROR) << filename << "@" << line << ":" << column << ": " << message;
    52    }
    53  
    54    void AddWarning(const std::string& filename, int line, int column,
    55                    const std::string& message) override {
    56      LOG(ERROR) << filename << "@" << line << ":" << column << ": " << message;
    57    }
    58  };
    59  
    60  // DiskSourceTree that records which proto files are opened while parsing the
    61  // toplevel proto(s), allowing us to get a list of transitive dependencies.
    62  class RecordingDiskSourceTree : public DiskSourceTree {
    63   public:
    64    google::protobuf::io::ZeroCopyInputStream* Open(
    65        absl::string_view filename) override {
    66      // Record resolved/canonical path because the same proto may be Open()'d via
    67      // multiple relative paths and we only want to record it once.
    68      std::string canonical_path;
    69      if (!DiskSourceTree::VirtualFileToDiskFile(filename, &canonical_path)) {
    70        return nullptr;
    71      }
    72      if (opened_files_.find(canonical_path) == opened_files_.end()) {
    73        opened_files_.insert(canonical_path);
    74      }
    75  
    76      return DiskSourceTree::Open(filename);
    77    }
    78  
    79    // A set of unique file paths that have been passed to Open().
    80    const std::set<std::string>& opened_files() const { return opened_files_; }
    81  
    82   private:
    83    std::set<std::string> opened_files_;
    84  };
    85  
    86  }  // namespace
    87  
    88  proto::CompilationUnit ProtoExtractor::ExtractProtos(
    89      const std::vector<std::string>& proto_filenames,
    90      IndexWriter* index_writer) const {
    91    proto::CompilationUnit unit;
    92  
    93    unit.set_working_directory(GetCurrentDirectory().value());
    94  
    95    for (const std::string& proto : proto_filenames) {
    96      unit.add_argument(proto);
    97    }
    98  
    99    // Add path substitutions to src_tree.
   100    RecordingDiskSourceTree src_tree;
   101    src_tree.MapPath("", "");  // Add current directory to VFS.
   102    for (const auto& sub : path_substitutions) {
   103      src_tree.MapPath(sub.first, sub.second);
   104    }
   105  
   106    // Add protoc args to output.
   107    if (!path_substitutions.empty()) {
   108      unit.add_argument("--");
   109      for (auto& arg : PathSubstitutionsToArgs(path_substitutions)) {
   110        unit.add_argument(arg);
   111      }
   112    }
   113  
   114    // Import the toplevel proto(s), which will record paths of any transitive
   115    // dependencies to src_tree.
   116    {
   117      LoggingMultiFileErrorCollector err_collector;
   118      for (const std::string& fname : proto_filenames) {
   119        // Note that a separate importer instance is used for each top-level
   120        // import to avoid double-importing any subprotos, which would happen if
   121        // two top-level protos share any transitive dependencies.
   122        google::protobuf::compiler::Importer importer(&src_tree, &err_collector);
   123        CHECK(importer.Import(fname) != nullptr)
   124            << "Failed to import file: " << fname;
   125  
   126        unit.add_source_file(RelativizePath(fname, root_directory));
   127      }
   128    }
   129  
   130    // Write each toplevel proto and its transitive dependencies into the kzip.
   131    for (const std::string& abspath : src_tree.opened_files()) {
   132      // Resolve path relative to the proto compiler's search paths.
   133      std::string relpath, shadow;
   134      CHECK(DiskSourceTree::SUCCESS ==
   135            src_tree.DiskFileToVirtualFile(abspath, &relpath, &shadow));
   136      CHECK(shadow.empty()) << "Filepath shadows a real file: " << relpath;
   137      // Read file contents
   138      std::string file_contents;
   139      {
   140        std::unique_ptr<google::protobuf::io::ZeroCopyInputStream> in_stream(
   141            src_tree.Open(relpath));
   142        CHECK(in_stream != nullptr) << "Can't open file: " << relpath;
   143  
   144        const void* data = nullptr;
   145        int size = 0;
   146        while (in_stream->Next(&data, &size)) {
   147          file_contents.append(static_cast<const char*>(data), size);
   148        }
   149      }
   150  
   151      // Make path relative to KYTHE_ROOT_DIRECTORY.
   152      const std::string final_path = RelativizePath(abspath, root_directory);
   153  
   154      // Write file to index.
   155      auto digest = index_writer->WriteFile(file_contents);
   156      CHECK(digest.ok()) << digest.status();
   157  
   158      // Record file info to compilation unit.
   159      proto::CompilationUnit::FileInput* file_input = unit.add_required_input();
   160      proto::VName vname = vname_gen.LookupVName(final_path);
   161      if (vname.corpus().empty()) {
   162        vname.set_corpus(std::string(corpus));
   163      }
   164      *file_input->mutable_v_name() = std::move(vname);
   165      file_input->mutable_info()->set_path(final_path);
   166      file_input->mutable_info()->set_digest(*digest);
   167    }
   168  
   169    return unit;
   170  }
   171  
   172  void ProtoExtractor::ConfigureFromEnv() {
   173    if (const char* env_corpus = getenv("KYTHE_CORPUS")) {
   174      corpus = env_corpus;
   175    }
   176  
   177    // File paths in the output kzip should be relative to this directory.
   178    if (const char* env_root_directory = getenv("KYTHE_ROOT_DIRECTORY")) {
   179      root_directory = env_root_directory;
   180    }
   181  
   182    // Configure VName generator.
   183    const char* vname_path = getenv("KYTHE_VNAMES");
   184    if (vname_path && strlen(vname_path) > 0) {
   185      std::string json = LoadFileOrDie(vname_path);
   186      std::string error_text;
   187      CHECK(vname_gen.LoadJsonString(json, &error_text))
   188          << "Could not parse vname generator configuration: " << error_text;
   189    }
   190  }
   191  
   192  }  // namespace lang_proto
   193  }  // namespace kythe