kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/tools/static_claim_main.cc (about) 1 /* 2 * Copyright 2014 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 // static_claim: a tool to assign ownership for indexing dependencies 17 // 18 // static_claim 19 // reads the names of .kzip files from standard input and emits a static claim 20 // assignment to standard output 21 22 #include <fcntl.h> 23 #include <sys/stat.h> 24 25 #include <iostream> 26 #include <map> 27 #include <set> 28 #include <string> 29 #include <vector> 30 31 #include "absl/flags/flag.h" 32 #include "absl/flags/parse.h" 33 #include "absl/flags/usage.h" 34 #include "absl/log/check.h" 35 #include "absl/log/log.h" 36 #include "absl/strings/str_format.h" 37 #include "google/protobuf/io/coded_stream.h" 38 #include "google/protobuf/io/gzip_stream.h" 39 #include "google/protobuf/io/zero_copy_stream_impl.h" 40 #include "kythe/cxx/common/init.h" 41 #include "kythe/cxx/common/kzip_reader.h" 42 #include "kythe/cxx/common/re2_flag.h" 43 #include "kythe/cxx/common/vname_ordering.h" 44 #include "kythe/proto/analysis.pb.h" 45 #include "kythe/proto/claim.pb.h" 46 #include "kythe/proto/filecontext.pb.h" 47 48 using kythe::proto::ClaimAssignment; 49 using kythe::proto::CompilationUnit; 50 using kythe::proto::VName; 51 52 ABSL_FLAG(bool, text, false, "Dump output as text instead of protobuf."); 53 ABSL_FLAG(bool, show_stats, false, "Show some statistics."); 54 ABSL_FLAG(kythe::RE2Flag, include_files, {}, 55 "If set, a RE2 pattern of file VName paths to claim."); 56 57 struct Claimable; 58 59 /// \brief Something (like a compilation unit) that can take responsibility for 60 /// a claimable object. 61 struct Claimant { 62 /// \brief This Claimant's VName. 63 VName vname; 64 /// \brief The set of confirmed claims that this Claimant has. Non-owning. 65 std::set<Claimable*> claims; 66 }; 67 68 /// \brief Stably compares `Claimants` by vname. 69 struct ClaimantPointerLess { 70 bool operator()(const Claimant* lhs, const Claimant* rhs) const { 71 return kythe::VNameLess()(lhs->vname, rhs->vname); 72 } 73 }; 74 75 /// \brief An object (like a header transcript) that a Claimant can take 76 /// responsibility for. 77 struct Claimable { 78 /// \brief This Claimable's VName. 79 VName vname; 80 /// \brief Of the `claimants`, which one has responsibility. Non-owning. 81 Claimant* elected_claimant; 82 /// \brief All of the Claimants that can possibly be given responsibility. 83 std::set<Claimant*, ClaimantPointerLess> claimants; 84 }; 85 86 /// \brief Populates the compilation units from a kzip. 87 /// \param path Path to the .kzip file. 88 /// \return Vector of collected CompilationUnits. 89 static std::vector<CompilationUnit> ReadCompilationUnits( 90 const std::string& path) { 91 kythe::IndexReader reader = kythe::KzipReader::Open(path).value(); 92 std::vector<CompilationUnit> result; 93 auto status = reader.Scan([&](const auto digest) { 94 const auto compilation = reader.ReadUnit(digest); 95 CHECK(compilation.ok()) << compilation.status(); 96 result.push_back(compilation->unit()); 97 return true; 98 }); 99 return result; 100 } 101 102 /// \brief Maps from vnames to claimants (like compilation units). 103 using ClaimantMap = std::map<VName, Claimant, kythe::VNameLess>; 104 105 /// \brief Maps from vnames to claimables. 106 /// 107 /// The vname for a claimable with a transcript (like a header file) 108 /// is formed from the underlying vname with its signature changed to 109 /// include the transcript as a prefix. 110 using ClaimableMap = std::map<VName, Claimable, kythe::VNameLess>; 111 112 /// \brief Range wrapper around unpacked ContextDependentVersion rows. 113 class FileContextRows { 114 public: 115 using iterator = decltype( 116 std::declval<kythe::proto::ContextDependentVersion>().row().begin()); 117 118 explicit FileContextRows( 119 const kythe::proto::CompilationUnit::FileInput& file_input) { 120 for (const google::protobuf::Any& detail : file_input.details()) { 121 if (detail.UnpackTo(&context_)) break; 122 } 123 } 124 125 iterator begin() const { return context_.row().begin(); } 126 iterator end() const { return context_.row().end(); } 127 bool empty() const { return context_.row().empty(); } 128 129 private: 130 kythe::proto::ContextDependentVersion context_; 131 }; 132 133 /// \brief Generates and exports a mapping from claimants to claimables. 134 class ClaimTool { 135 public: 136 /// \brief Selects a claimant for every claimable. 137 /// 138 /// We apply a simple heuristic: for every claimable, for every possible 139 /// claimant, we choose the claimant with the fewest claimables assigned to 140 /// it when trying to assign a new claimable. 141 void AssignClaims() { 142 // claimables_ is sorted by VName. 143 for (auto& claimable : claimables_) { 144 CHECK(!claimable.second.claimants.empty()); 145 Claimant* emptiest_claimant = *claimable.second.claimants.begin(); 146 // claimants is also sorted by VName, so this assignment should be stable. 147 for (auto& claimant : claimable.second.claimants) { 148 if (claimant->claims.size() < emptiest_claimant->claims.size()) { 149 emptiest_claimant = claimant; 150 } 151 } 152 emptiest_claimant->claims.insert(&claimable.second); 153 claimable.second.elected_claimant = emptiest_claimant; 154 } 155 } 156 157 /// \brief Export claim data to `out_fd` in the format specified by 158 /// `FLAGS_text`. 159 void WriteClaimFile(int out_fd) { 160 if (absl::GetFlag(FLAGS_text)) { 161 for (auto& claimable : claimables_) { 162 if (claimable.second.elected_claimant) { 163 ClaimAssignment claim; 164 claim.mutable_compilation_v_name()->CopyFrom( 165 claimable.second.elected_claimant->vname); 166 claim.mutable_dependency_v_name()->CopyFrom(claimable.second.vname); 167 absl::PrintF("%v", claim); 168 } 169 } 170 return; 171 } 172 { 173 namespace io = google::protobuf::io; 174 io::FileOutputStream file_output_stream(out_fd); 175 io::GzipOutputStream::Options options; 176 options.format = io::GzipOutputStream::GZIP; 177 io::GzipOutputStream gzip_stream(&file_output_stream, options); 178 io::CodedOutputStream coded_stream(&gzip_stream); 179 for (auto& claimable : claimables_) { 180 const auto& elected_claimant = claimable.second.elected_claimant; 181 if (elected_claimant) { 182 ClaimAssignment claim; 183 claim.mutable_compilation_v_name()->CopyFrom(elected_claimant->vname); 184 claim.mutable_dependency_v_name()->CopyFrom(claimable.second.vname); 185 coded_stream.WriteVarint32(claim.ByteSizeLong()); 186 CHECK(claim.SerializeToCodedStream(&coded_stream)); 187 } 188 } 189 CHECK(!coded_stream.HadError()); 190 } 191 CHECK(::close(out_fd) == 0) << "errno was: " << errno; 192 } 193 194 /// \brief Add `unit` as a possible claimant and remember all of its 195 /// dependencies (and their different transcripts) as claimables. 196 void HandleCompilationUnit(const CompilationUnit& unit) { 197 auto insert_result = 198 claimants_.emplace(unit.v_name(), Claimant{unit.v_name()}); 199 if (!insert_result.second) { 200 LOG(WARNING) << "Compilation unit with name " << unit.v_name() 201 << " had the same VName as another previous unit."; 202 } 203 for (auto& input : unit.required_input()) { 204 ++total_input_count_; 205 FileContextRows context_rows(input); 206 if (!context_rows.empty()) { 207 VName input_vname = input.v_name(); 208 if (!input_vname.signature().empty()) { 209 // We generally expect that file vnames have no signature. 210 // If this happens, we'll emit a warning, but we'll also be sure to 211 // keep the signature around as a suffix when building vnames for 212 // contexts. 213 LOG(WARNING) << "Input " << input_vname 214 << " has a nonempty signature.\n"; 215 } 216 for (const auto& row : context_rows) { 217 // If we have a (r, h, c) entry, we'd better have an input entry for 218 // the file included at h with context c (otherwise the index file 219 // isn't well-formed). We therefore only need to claim each unique 220 // row. 221 VName cxt_vname = input_vname; 222 cxt_vname.set_signature(row.source_context() + 223 input_vname.signature()); 224 Claim(cxt_vname, &insert_result.first->second); 225 } 226 } else { 227 Claim(input.v_name(), &insert_result.first->second); 228 } 229 } 230 } 231 232 const ClaimantMap& claimants() const { return claimants_; } 233 const ClaimableMap& claimables() const { return claimables_; } 234 size_t total_include_count() const { return total_include_count_; } 235 size_t total_input_count() const { return total_input_count_; } 236 size_t skipped_input_count() const { return skipped_input_count_; } 237 238 private: 239 void Claim(const VName& vname, Claimant* claimant) { 240 ++total_include_count_; 241 if (auto accept = absl::GetFlag(FLAGS_include_files); 242 accept.value != nullptr && 243 !RE2::FullMatch(vname.path(), *accept.value)) { 244 ++skipped_input_count_; 245 return; 246 } 247 248 auto input_insert_result = 249 claimables_.emplace(vname, Claimable{vname, nullptr}); 250 input_insert_result.first->second.claimants.insert(claimant); 251 } 252 /// Objects that may claim resources. 253 ClaimantMap claimants_; 254 /// Resources that may be claimed. 255 ClaimableMap claimables_; 256 /// Number of required inputs. 257 size_t total_include_count_ = 0; 258 /// Number of #includes. 259 size_t total_input_count_ = 0; 260 /// Number of required inputs skipped due for failure to match. 261 size_t skipped_input_count_ = 0; 262 }; 263 264 int main(int argc, char* argv[]) { 265 GOOGLE_PROTOBUF_VERIFY_VERSION; 266 kythe::InitializeProgram(argv[0]); 267 absl::SetProgramUsageMessage("static_claim: assign ownership for analysis"); 268 absl::ParseCommandLine(argc, argv); 269 std::string next_index_file; 270 ClaimTool tool; 271 while (std::getline(std::cin, next_index_file)) { 272 if (next_index_file.empty()) { 273 continue; 274 } 275 for (const auto& unit : ReadCompilationUnits(next_index_file)) { 276 tool.HandleCompilationUnit(unit); 277 } 278 } 279 if (!std::cin.eof()) { 280 absl::FPrintF(stderr, "Error reading from standard input.\n"); 281 return 1; 282 } 283 tool.AssignClaims(); 284 tool.WriteClaimFile(STDOUT_FILENO); 285 if (absl::GetFlag(FLAGS_show_stats)) { 286 absl::PrintF("Number of claimables: %lu\n", tool.claimables().size()); 287 absl::PrintF(" Number of claimants: %lu\n", tool.claimants().size()); 288 absl::PrintF(" Total input count: %lu\n", tool.total_input_count()); 289 absl::PrintF(" Total include count: %lu\n", tool.total_include_count()); 290 absl::PrintF(" Skipped input count: %lu\n", tool.skipped_input_count()); 291 absl::PrintF("%%claimables/includes: %f\n", 292 tool.claimables().size() * 100.0 / tool.total_include_count()); 293 } 294 return 0; 295 }