kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/tools/static_claim_main.cc (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  // static_claim: a tool to assign ownership for indexing dependencies
    17  //
    18  // static_claim
    19  //   reads the names of .kzip files from standard input and emits a static claim
    20  //   assignment to standard output
    21  
    22  #include <fcntl.h>
    23  #include <sys/stat.h>
    24  
    25  #include <iostream>
    26  #include <map>
    27  #include <set>
    28  #include <string>
    29  #include <vector>
    30  
    31  #include "absl/flags/flag.h"
    32  #include "absl/flags/parse.h"
    33  #include "absl/flags/usage.h"
    34  #include "absl/log/check.h"
    35  #include "absl/log/log.h"
    36  #include "absl/strings/str_format.h"
    37  #include "google/protobuf/io/coded_stream.h"
    38  #include "google/protobuf/io/gzip_stream.h"
    39  #include "google/protobuf/io/zero_copy_stream_impl.h"
    40  #include "kythe/cxx/common/init.h"
    41  #include "kythe/cxx/common/kzip_reader.h"
    42  #include "kythe/cxx/common/re2_flag.h"
    43  #include "kythe/cxx/common/vname_ordering.h"
    44  #include "kythe/proto/analysis.pb.h"
    45  #include "kythe/proto/claim.pb.h"
    46  #include "kythe/proto/filecontext.pb.h"
    47  
    48  using kythe::proto::ClaimAssignment;
    49  using kythe::proto::CompilationUnit;
    50  using kythe::proto::VName;
    51  
    52  ABSL_FLAG(bool, text, false, "Dump output as text instead of protobuf.");
    53  ABSL_FLAG(bool, show_stats, false, "Show some statistics.");
    54  ABSL_FLAG(kythe::RE2Flag, include_files, {},
    55            "If set, a RE2 pattern of file VName paths to claim.");
    56  
    57  struct Claimable;
    58  
    59  /// \brief Something (like a compilation unit) that can take responsibility for
    60  /// a claimable object.
    61  struct Claimant {
    62    /// \brief This Claimant's VName.
    63    VName vname;
    64    /// \brief The set of confirmed claims that this Claimant has. Non-owning.
    65    std::set<Claimable*> claims;
    66  };
    67  
    68  /// \brief Stably compares `Claimants` by vname.
    69  struct ClaimantPointerLess {
    70    bool operator()(const Claimant* lhs, const Claimant* rhs) const {
    71      return kythe::VNameLess()(lhs->vname, rhs->vname);
    72    }
    73  };
    74  
    75  /// \brief An object (like a header transcript) that a Claimant can take
    76  /// responsibility for.
    77  struct Claimable {
    78    /// \brief This Claimable's VName.
    79    VName vname;
    80    /// \brief Of the `claimants`, which one has responsibility. Non-owning.
    81    Claimant* elected_claimant;
    82    /// \brief All of the Claimants that can possibly be given responsibility.
    83    std::set<Claimant*, ClaimantPointerLess> claimants;
    84  };
    85  
    86  /// \brief Populates the compilation units from a kzip.
    87  /// \param path Path to the .kzip file.
    88  /// \return Vector of collected CompilationUnits.
    89  static std::vector<CompilationUnit> ReadCompilationUnits(
    90      const std::string& path) {
    91    kythe::IndexReader reader = kythe::KzipReader::Open(path).value();
    92    std::vector<CompilationUnit> result;
    93    auto status = reader.Scan([&](const auto digest) {
    94      const auto compilation = reader.ReadUnit(digest);
    95      CHECK(compilation.ok()) << compilation.status();
    96      result.push_back(compilation->unit());
    97      return true;
    98    });
    99    return result;
   100  }
   101  
   102  /// \brief Maps from vnames to claimants (like compilation units).
   103  using ClaimantMap = std::map<VName, Claimant, kythe::VNameLess>;
   104  
   105  /// \brief Maps from vnames to claimables.
   106  ///
   107  /// The vname for a claimable with a transcript (like a header file)
   108  /// is formed from the underlying vname with its signature changed to
   109  /// include the transcript as a prefix.
   110  using ClaimableMap = std::map<VName, Claimable, kythe::VNameLess>;
   111  
   112  /// \brief Range wrapper around unpacked ContextDependentVersion rows.
   113  class FileContextRows {
   114   public:
   115    using iterator = decltype(
   116        std::declval<kythe::proto::ContextDependentVersion>().row().begin());
   117  
   118    explicit FileContextRows(
   119        const kythe::proto::CompilationUnit::FileInput& file_input) {
   120      for (const google::protobuf::Any& detail : file_input.details()) {
   121        if (detail.UnpackTo(&context_)) break;
   122      }
   123    }
   124  
   125    iterator begin() const { return context_.row().begin(); }
   126    iterator end() const { return context_.row().end(); }
   127    bool empty() const { return context_.row().empty(); }
   128  
   129   private:
   130    kythe::proto::ContextDependentVersion context_;
   131  };
   132  
   133  /// \brief Generates and exports a mapping from claimants to claimables.
   134  class ClaimTool {
   135   public:
   136    /// \brief Selects a claimant for every claimable.
   137    ///
   138    /// We apply a simple heuristic: for every claimable, for every possible
   139    /// claimant, we choose the claimant with the fewest claimables assigned to
   140    /// it when trying to assign a new claimable.
   141    void AssignClaims() {
   142      // claimables_ is sorted by VName.
   143      for (auto& claimable : claimables_) {
   144        CHECK(!claimable.second.claimants.empty());
   145        Claimant* emptiest_claimant = *claimable.second.claimants.begin();
   146        // claimants is also sorted by VName, so this assignment should be stable.
   147        for (auto& claimant : claimable.second.claimants) {
   148          if (claimant->claims.size() < emptiest_claimant->claims.size()) {
   149            emptiest_claimant = claimant;
   150          }
   151        }
   152        emptiest_claimant->claims.insert(&claimable.second);
   153        claimable.second.elected_claimant = emptiest_claimant;
   154      }
   155    }
   156  
   157    /// \brief Export claim data to `out_fd` in the format specified by
   158    /// `FLAGS_text`.
   159    void WriteClaimFile(int out_fd) {
   160      if (absl::GetFlag(FLAGS_text)) {
   161        for (auto& claimable : claimables_) {
   162          if (claimable.second.elected_claimant) {
   163            ClaimAssignment claim;
   164            claim.mutable_compilation_v_name()->CopyFrom(
   165                claimable.second.elected_claimant->vname);
   166            claim.mutable_dependency_v_name()->CopyFrom(claimable.second.vname);
   167            absl::PrintF("%v", claim);
   168          }
   169        }
   170        return;
   171      }
   172      {
   173        namespace io = google::protobuf::io;
   174        io::FileOutputStream file_output_stream(out_fd);
   175        io::GzipOutputStream::Options options;
   176        options.format = io::GzipOutputStream::GZIP;
   177        io::GzipOutputStream gzip_stream(&file_output_stream, options);
   178        io::CodedOutputStream coded_stream(&gzip_stream);
   179        for (auto& claimable : claimables_) {
   180          const auto& elected_claimant = claimable.second.elected_claimant;
   181          if (elected_claimant) {
   182            ClaimAssignment claim;
   183            claim.mutable_compilation_v_name()->CopyFrom(elected_claimant->vname);
   184            claim.mutable_dependency_v_name()->CopyFrom(claimable.second.vname);
   185            coded_stream.WriteVarint32(claim.ByteSizeLong());
   186            CHECK(claim.SerializeToCodedStream(&coded_stream));
   187          }
   188        }
   189        CHECK(!coded_stream.HadError());
   190      }
   191      CHECK(::close(out_fd) == 0) << "errno was: " << errno;
   192    }
   193  
   194    /// \brief Add `unit` as a possible claimant and remember all of its
   195    /// dependencies (and their different transcripts) as claimables.
   196    void HandleCompilationUnit(const CompilationUnit& unit) {
   197      auto insert_result =
   198          claimants_.emplace(unit.v_name(), Claimant{unit.v_name()});
   199      if (!insert_result.second) {
   200        LOG(WARNING) << "Compilation unit with name " << unit.v_name()
   201                     << " had the same VName as another previous unit.";
   202      }
   203      for (auto& input : unit.required_input()) {
   204        ++total_input_count_;
   205        FileContextRows context_rows(input);
   206        if (!context_rows.empty()) {
   207          VName input_vname = input.v_name();
   208          if (!input_vname.signature().empty()) {
   209            // We generally expect that file vnames have no signature.
   210            // If this happens, we'll emit a warning, but we'll also be sure to
   211            // keep the signature around as a suffix when building vnames for
   212            // contexts.
   213            LOG(WARNING) << "Input " << input_vname
   214                         << " has a nonempty signature.\n";
   215          }
   216          for (const auto& row : context_rows) {
   217            // If we have a (r, h, c) entry, we'd better have an input entry for
   218            // the file included at h with context c (otherwise the index file
   219            // isn't well-formed). We therefore only need to claim each unique
   220            // row.
   221            VName cxt_vname = input_vname;
   222            cxt_vname.set_signature(row.source_context() +
   223                                    input_vname.signature());
   224            Claim(cxt_vname, &insert_result.first->second);
   225          }
   226        } else {
   227          Claim(input.v_name(), &insert_result.first->second);
   228        }
   229      }
   230    }
   231  
   232    const ClaimantMap& claimants() const { return claimants_; }
   233    const ClaimableMap& claimables() const { return claimables_; }
   234    size_t total_include_count() const { return total_include_count_; }
   235    size_t total_input_count() const { return total_input_count_; }
   236    size_t skipped_input_count() const { return skipped_input_count_; }
   237  
   238   private:
   239    void Claim(const VName& vname, Claimant* claimant) {
   240      ++total_include_count_;
   241      if (auto accept = absl::GetFlag(FLAGS_include_files);
   242          accept.value != nullptr &&
   243          !RE2::FullMatch(vname.path(), *accept.value)) {
   244        ++skipped_input_count_;
   245        return;
   246      }
   247  
   248      auto input_insert_result =
   249          claimables_.emplace(vname, Claimable{vname, nullptr});
   250      input_insert_result.first->second.claimants.insert(claimant);
   251    }
   252    /// Objects that may claim resources.
   253    ClaimantMap claimants_;
   254    /// Resources that may be claimed.
   255    ClaimableMap claimables_;
   256    /// Number of required inputs.
   257    size_t total_include_count_ = 0;
   258    /// Number of #includes.
   259    size_t total_input_count_ = 0;
   260    /// Number of required inputs skipped due for failure to match.
   261    size_t skipped_input_count_ = 0;
   262  };
   263  
   264  int main(int argc, char* argv[]) {
   265    GOOGLE_PROTOBUF_VERIFY_VERSION;
   266    kythe::InitializeProgram(argv[0]);
   267    absl::SetProgramUsageMessage("static_claim: assign ownership for analysis");
   268    absl::ParseCommandLine(argc, argv);
   269    std::string next_index_file;
   270    ClaimTool tool;
   271    while (std::getline(std::cin, next_index_file)) {
   272      if (next_index_file.empty()) {
   273        continue;
   274      }
   275      for (const auto& unit : ReadCompilationUnits(next_index_file)) {
   276        tool.HandleCompilationUnit(unit);
   277      }
   278    }
   279    if (!std::cin.eof()) {
   280      absl::FPrintF(stderr, "Error reading from standard input.\n");
   281      return 1;
   282    }
   283    tool.AssignClaims();
   284    tool.WriteClaimFile(STDOUT_FILENO);
   285    if (absl::GetFlag(FLAGS_show_stats)) {
   286      absl::PrintF("Number of claimables: %lu\n", tool.claimables().size());
   287      absl::PrintF(" Number of claimants: %lu\n", tool.claimants().size());
   288      absl::PrintF("   Total input count: %lu\n", tool.total_input_count());
   289      absl::PrintF(" Total include count: %lu\n", tool.total_include_count());
   290      absl::PrintF(" Skipped input count: %lu\n", tool.skipped_input_count());
   291      absl::PrintF("%%claimables/includes: %f\n",
   292                   tool.claimables().size() * 100.0 / tool.total_include_count());
   293    }
   294    return 0;
   295  }