kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/extractor/bazel_artifact_selector.cc (about) 1 /* 2 * Copyright 2020 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "kythe/cxx/extractor/bazel_artifact_selector.h" 17 18 #include <cstddef> 19 #include <cstdint> 20 #include <functional> 21 #include <optional> 22 #include <string> 23 #include <tuple> 24 #include <type_traits> 25 #include <utility> 26 #include <vector> 27 28 #include "absl/base/attributes.h" 29 #include "absl/container/flat_hash_map.h" 30 #include "absl/container/flat_hash_set.h" 31 #include "absl/log/check.h" 32 #include "absl/log/die_if_null.h" 33 #include "absl/log/log.h" 34 #include "absl/status/status.h" 35 #include "absl/strings/ascii.h" 36 #include "absl/strings/escaping.h" 37 #include "absl/strings/numbers.h" 38 #include "absl/strings/str_cat.h" 39 #include "absl/strings/str_join.h" 40 #include "absl/strings/string_view.h" 41 #include "absl/types/span.h" 42 #include "google/protobuf/any.pb.h" 43 #include "kythe/cxx/extractor/bazel_artifact.h" 44 #include "kythe/proto/bazel_artifact_selector.pb.h" 45 #include "kythe/proto/bazel_artifact_selector_v2.pb.h" 46 #include "re2/re2.h" 47 #include "third_party/bazel/src/main/java/com/google/devtools/build/lib/buildeventstream/proto/build_event_stream.pb.h" 48 49 namespace kythe { 50 namespace { 51 52 std::optional<std::string> ToUri(const build_event_stream::File& file) { 53 switch (file.file_case()) { 54 case build_event_stream::File::kUri: 55 return file.uri(); 56 case build_event_stream::File::kContents: 57 // We expect inline data to be rare and small, so always base64 encode it. 58 return absl::StrCat( 59 "data:base64,", 60 // data URIs use regular base64, not "web safe" base64. 61 absl::Base64Escape(file.contents())); 62 case build_event_stream::File::kSymlinkTargetPath: 63 return std::nullopt; 64 default: 65 break; 66 } 67 LOG(ERROR) << "Unexpected build_event_stream::File case!" << file.file_case(); 68 return std::nullopt; 69 } 70 71 std::string ToLocalPath(const build_event_stream::File& file) { 72 std::vector<std::string> parts(file.path_prefix().begin(), 73 file.path_prefix().end()); 74 parts.push_back(file.name()); 75 return absl::StrJoin(parts, "/"); 76 } 77 78 std::optional<BazelArtifactFile> ToBazelArtifactFile( 79 const build_event_stream::File& file, const RegexSet& allowlist) { 80 if (!allowlist.Match(file.name())) { 81 return std::nullopt; 82 } 83 std::optional<std::string> uri = ToUri(file); 84 if (!uri.has_value()) return std::nullopt; 85 return BazelArtifactFile{ 86 .local_path = ToLocalPath(file), 87 .uri = *std::move(uri), 88 }; 89 } 90 91 template <typename T> 92 T& GetOrConstruct(std::optional<T>& value) { 93 return value.has_value() ? *value : value.emplace(); 94 } 95 96 template <typename T> 97 const T& AsConstRef(const T& value) { 98 return value; 99 } 100 101 template <typename T> 102 const T& AsConstRef(const T* value) { 103 return *value; 104 } 105 106 template <typename T, typename U> 107 absl::Status DeserializeInternal(T& selector, const U& container) { 108 absl::Status error; 109 for (const auto& any : container) { 110 switch (auto status = selector.DeserializeFrom(AsConstRef(any)); 111 status.code()) { 112 case absl::StatusCode::kOk: 113 case absl::StatusCode::kUnimplemented: 114 return absl::OkStatus(); 115 case absl::StatusCode::kInvalidArgument: 116 return status; 117 case absl::StatusCode::kFailedPrecondition: 118 error = status; 119 continue; 120 default: 121 error = status; 122 LOG(WARNING) << "Unrecognized status code: " << status; 123 } 124 } 125 return error.ok() ? absl::NotFoundError("No state found") 126 : absl::NotFoundError( 127 absl::StrCat("No state found: ", error.ToString())); 128 } 129 bool StrictAtoI(absl::string_view value, int64_t* out) { 130 if (value == "0") { 131 *out = 0; 132 return true; 133 } 134 if (value.empty() || value.front() == '0') { 135 // We need to ignore leading zeros as they don't contribute to the integral 136 // value. 137 return false; 138 } 139 for (char ch : value) { 140 if (!absl::ascii_isdigit(ch)) { 141 return false; 142 } 143 } 144 return absl::SimpleAtoi(value, out); 145 } 146 } // namespace 147 148 absl::Status BazelArtifactSelector::Deserialize( 149 absl::Span<const google::protobuf::Any> state) { 150 return DeserializeInternal(*this, state); 151 } 152 153 absl::Status BazelArtifactSelector::Deserialize( 154 absl::Span<const google::protobuf::Any* const> state) { 155 return DeserializeInternal(*this, state); 156 } 157 158 std::optional<BazelArtifact> AspectArtifactSelector::Select( 159 const build_event_stream::BuildEvent& event) { 160 std::optional<BazelArtifact> result = std::nullopt; 161 if (event.id().has_named_set()) { 162 result = 163 SelectFileSet(event.id().named_set().id(), event.named_set_of_files()); 164 } else if (event.id().has_target_completed()) { 165 result = 166 SelectTargetCompleted(event.id().target_completed(), event.completed()); 167 } 168 if (event.last_message()) { 169 state_ = {}; 170 } 171 return result; 172 } 173 174 class AspectArtifactSelectorSerializationHelper { 175 public: 176 using FileId = AspectArtifactSelector::FileId; 177 using ProtoFile = ::kythe::proto::BazelAspectArtifactSelectorStateV2::File; 178 using FileSet = AspectArtifactSelector::FileSet; 179 using ProtoFileSet = 180 ::kythe::proto::BazelAspectArtifactSelectorStateV2::FileSet; 181 using FileSetId = AspectArtifactSelector::FileSetId; 182 using State = AspectArtifactSelector::State; 183 184 static bool SerializeInto( 185 const State& state, 186 kythe::proto::BazelAspectArtifactSelectorStateV2& result) { 187 return Serializer(&state, result).Serialize(); 188 } 189 190 static absl::Status DeserializeFrom( 191 const kythe::proto::BazelAspectArtifactSelectorStateV2& state, 192 State& result) { 193 return Deserializer(&state, result).Deserialize(); 194 } 195 196 private: 197 class Serializer { 198 public: 199 explicit Serializer(const State* state ABSL_ATTRIBUTE_LIFETIME_BOUND, 200 kythe::proto::BazelAspectArtifactSelectorStateV2& result 201 ABSL_ATTRIBUTE_LIFETIME_BOUND) 202 : state_(*ABSL_DIE_IF_NULL(state)), result_(result) {} 203 204 bool Serialize() { 205 for (const auto& [id, file_set] : state_.file_sets.file_sets()) { 206 SerializeFileSet(id, file_set); 207 } 208 for (FileSetId id : state_.file_sets.disposed()) { 209 SerializeDisposed(id); 210 } 211 for (const auto& [id, target] : state_.pending) { 212 SerializePending(id, target); 213 } 214 return true; 215 } 216 217 private: 218 static int64_t ToSerializationId(FileSetId id, size_t other) { 219 if (const auto [unpacked] = id; unpacked >= 0) { 220 return unpacked; 221 } 222 // 0 is reserved for the integral ids, so start at -1. 223 return -1 - static_cast<int64_t>(other); 224 } 225 226 int64_t SerializeFileSetId(FileSetId id) { 227 auto [iter, inserted] = set_id_map_.try_emplace( 228 id, ToSerializationId(id, result_.file_set_ids().size())); 229 if (inserted && iter->second < 0) { 230 result_.add_file_set_ids(state_.file_sets.ToString(id)); 231 } 232 return iter->second; 233 } 234 235 void SerializeFileSet(FileSetId id, const FileSet& file_set) { 236 auto& entry = (*result_.mutable_file_sets())[SerializeFileSetId(id)]; 237 for (FileId file_id : file_set.files) { 238 if (std::optional<uint64_t> index = SerializeFile(file_id)) { 239 entry.add_files(*index); 240 } 241 } 242 for (FileSetId child_id : file_set.file_sets) { 243 entry.add_file_sets(SerializeFileSetId(child_id)); 244 } 245 } 246 247 std::optional<uint64_t> SerializeFile(FileId id) { 248 const BazelArtifactFile* file = state_.files.Find(id); 249 if (file == nullptr) { 250 LOG(INFO) << "Omitting extracted FileId from serialization: " 251 << std::get<0>(id); 252 // FileSets may still reference files which have already been selected. 253 // If so, don't keep them when serializing. 254 return std::nullopt; 255 } 256 auto [iter, inserted] = 257 file_id_map_.try_emplace(id, result_.files().size()); 258 if (!inserted) { 259 return iter->second; 260 } 261 262 auto* entry = result_.add_files(); 263 entry->set_local_path(file->local_path); 264 entry->set_uri(file->uri); 265 return iter->second; 266 } 267 268 void SerializeDisposed(FileSetId id) { 269 result_.add_disposed(SerializeFileSetId(id)); 270 } 271 272 void SerializePending(FileSetId id, absl::string_view target) { 273 (*result_.mutable_pending())[SerializeFileSetId(id)] = target; 274 } 275 276 const State& state_; 277 kythe::proto::BazelAspectArtifactSelectorStateV2& result_; 278 279 absl::flat_hash_map<FileId, uint64_t> file_id_map_; 280 absl::flat_hash_map<FileSetId, int64_t> set_id_map_; 281 }; 282 283 class Deserializer { 284 public: 285 explicit Deserializer( 286 const kythe::proto::BazelAspectArtifactSelectorStateV2* state 287 ABSL_ATTRIBUTE_LIFETIME_BOUND, 288 State& result ABSL_ATTRIBUTE_LIFETIME_BOUND) 289 : state_(*ABSL_DIE_IF_NULL(state)), result_(result) {} 290 291 absl::Status Deserialize() { 292 // First, deserialize all of the disposed sets to help check consistency 293 // during the rest of deserialization. 294 for (int64_t id : state_.disposed()) { 295 absl::StatusOr<FileSetId> real_id = DeserializeFileSetId(id); 296 if (!real_id.ok()) return real_id.status(); 297 result_.file_sets.Dispose(*real_id); 298 } 299 { 300 // Then check the file_set_ids list for uniqueness: 301 absl::flat_hash_set<std::string> non_integer_ids( 302 state_.file_set_ids().begin(), state_.file_set_ids().end()); 303 if (non_integer_ids.size() != state_.file_set_ids().size()) { 304 return absl::InvalidArgumentError("Inconsistent file_set_ids map"); 305 } 306 } 307 308 for (const auto& [id, file_set] : state_.file_sets()) { 309 // Ensure pending and live file sets are distinct. 310 if (state_.pending().contains(id)) { 311 return absl::InvalidArgumentError( 312 absl::StrCat("FileSet ", id, " is both pending and live")); 313 } 314 absl::Status status = DeserializeFileSet(id, file_set); 315 if (!status.ok()) return status; 316 } 317 for (const auto& [id, target] : state_.pending()) { 318 absl::Status status = DeserializePending(id, target); 319 if (!status.ok()) return status; 320 } 321 return absl::OkStatus(); 322 } 323 324 private: 325 static constexpr FileSetId kDummy{0}; 326 327 static absl::StatusOr<std::string> ToDeserializationId( 328 const kythe::proto::BazelAspectArtifactSelectorStateV2& state, 329 int64_t id) { 330 if (id < 0) { 331 // Normalize the -1 based index. 332 size_t index = -(id + 1); 333 if (index > state.file_set_ids().size()) { 334 return absl::InvalidArgumentError(absl::StrCat( 335 "Non-integral FileSetId index out of range: ", index)); 336 } 337 return state.file_set_ids(index); 338 } 339 return absl::StrCat(id); 340 } 341 342 absl::StatusOr<FileSetId> DeserializeFileSetId(int64_t id) { 343 auto [iter, inserted] = set_id_map_.try_emplace(id, kDummy); 344 if (inserted) { 345 absl::StatusOr<std::string> string_id = ToDeserializationId(state_, id); 346 if (!string_id.ok()) return string_id.status(); 347 348 std::optional<FileSetId> file_set_id = 349 result_.file_sets.InternUnlessDisposed(*string_id); 350 if (!file_set_id.has_value()) { 351 return absl::InvalidArgumentError( 352 "Encountered disposed FileSetId during deserialization"); 353 } 354 iter->second = *file_set_id; 355 } 356 return iter->second; 357 } 358 359 absl::Status DeserializeFileSet(int64_t id, const ProtoFileSet& file_set) { 360 absl::StatusOr<FileSetId> file_set_id = DeserializeFileSetId(id); 361 if (!file_set_id.ok()) return file_set_id.status(); 362 363 FileSet result_set; 364 for (uint64_t file_id : file_set.files()) { 365 absl::StatusOr<FileId> real_id = DeserializeFile(file_id); 366 if (!real_id.ok()) return real_id.status(); 367 368 result_set.files.push_back(*real_id); 369 } 370 for (int64_t child_id : file_set.file_sets()) { 371 if (!(state_.file_sets().contains(child_id) || 372 state_.pending().contains(child_id))) { 373 // Ensure internal consistency. 374 return absl::InvalidArgumentError(absl::StrCat( 375 "Child FileSetId is neither live nor pending: ", id)); 376 } 377 378 absl::StatusOr<FileSetId> real_id = DeserializeFileSetId(child_id); 379 if (!real_id.ok()) return real_id.status(); 380 381 result_set.file_sets.push_back(*real_id); 382 } 383 if (!result_.file_sets.InsertUnlessDisposed(*file_set_id, 384 std::move(result_set))) { 385 return absl::InvalidArgumentError( 386 absl::StrCat("FileSetId both disposed and live: ", id)); 387 } 388 return absl::OkStatus(); 389 } 390 391 absl::StatusOr<FileId> DeserializeFile(uint64_t id) { 392 if (id > state_.files_size()) { 393 return absl::InvalidArgumentError( 394 absl::StrCat("File index out of range: ", id)); 395 } 396 return result_.files.Insert(BazelArtifactFile{ 397 .local_path = state_.files(id).local_path(), 398 .uri = state_.files(id).uri(), 399 }); 400 } 401 402 absl::Status DeserializePending(int64_t id, absl::string_view target) { 403 absl::StatusOr<FileSetId> real_id = DeserializeFileSetId(id); 404 if (!real_id.ok()) return real_id.status(); 405 406 result_.pending.try_emplace(*real_id, target); 407 return absl::OkStatus(); 408 } 409 410 const kythe::proto::BazelAspectArtifactSelectorStateV2& state_; 411 State& result_; 412 413 absl::flat_hash_map<int64_t, FileSetId> set_id_map_; 414 }; 415 }; 416 417 bool AspectArtifactSelector::SerializeInto(google::protobuf::Any& state) const { 418 switch (options_.serialization_format) { 419 case AspectArtifactSelectorSerializationFormat::kV2: { 420 kythe::proto::BazelAspectArtifactSelectorStateV2 raw; 421 if (!AspectArtifactSelectorSerializationHelper::SerializeInto(state_, 422 raw)) { 423 return false; 424 } 425 state.PackFrom(raw); 426 return true; 427 } 428 case AspectArtifactSelectorSerializationFormat::kV1: { 429 kythe::proto::BazelAspectArtifactSelectorState raw; 430 for (FileSetId id : state_.file_sets.disposed()) { 431 raw.add_disposed(state_.file_sets.ToString(id)); 432 } 433 for (const auto& [id, target] : state_.pending) { 434 (*raw.mutable_pending())[state_.file_sets.ToString(id)] = target; 435 } 436 for (const auto& [id, file_set] : state_.file_sets.file_sets()) { 437 auto& entry = (*raw.mutable_filesets())[state_.file_sets.ToString(id)]; 438 for (FileSetId child_id : file_set.file_sets) { 439 entry.add_file_sets()->set_id(state_.file_sets.ToString(child_id)); 440 } 441 for (FileId file_id : file_set.files) { 442 const BazelArtifactFile* file = state_.files.Find(file_id); 443 if (file == nullptr) continue; 444 445 auto* file_entry = entry.add_files(); 446 file_entry->set_name(file->local_path); 447 file_entry->set_uri(file->uri); 448 } 449 } 450 state.PackFrom(raw); 451 return true; 452 } 453 } 454 return false; 455 } 456 457 absl::Status AspectArtifactSelector::DeserializeFrom( 458 const google::protobuf::Any& state) { 459 if (auto raw = kythe::proto::BazelAspectArtifactSelectorStateV2(); 460 state.UnpackTo(&raw)) { 461 state_ = {}; 462 return AspectArtifactSelectorSerializationHelper::DeserializeFrom(raw, 463 state_); 464 } else if (state.Is<kythe::proto::BazelAspectArtifactSelectorStateV2>()) { 465 return absl::InvalidArgumentError( 466 "Malformed kythe.proto.BazelAspectArtifactSelectorStateV2"); 467 } 468 if (auto raw = kythe::proto::BazelAspectArtifactSelectorState(); 469 state.UnpackTo(&raw)) { 470 state_ = {}; 471 for (const auto& id : raw.disposed()) { 472 if (std::optional<FileSetId> file_set_id = 473 state_.file_sets.InternUnlessDisposed(id)) { 474 state_.file_sets.Dispose(*file_set_id); 475 } 476 } 477 for (const auto& [id, target] : raw.pending()) { 478 if (std::optional<FileSetId> file_set_id = 479 state_.file_sets.InternUnlessDisposed(id)) { 480 state_.pending.try_emplace(*file_set_id, target); 481 } 482 } 483 for (const auto& [id, file_set] : raw.filesets()) { 484 if (std::optional<FileSetId> file_set_id = 485 state_.file_sets.InternUnlessDisposed(id)) { 486 InsertFileSet(*file_set_id, file_set); 487 } 488 } 489 return absl::OkStatus(); 490 } else if (state.Is<kythe::proto::BazelAspectArtifactSelectorState>()) { 491 return absl::InvalidArgumentError( 492 "Malformed kythe.proto.BazelAspectArtifactSelectorState"); 493 } 494 return absl::FailedPreconditionError( 495 "State not of type kythe.proto.BazelAspectArtifactSelectorState"); 496 } 497 498 AspectArtifactSelector::FileTable::FileTable(const FileTable& other) 499 : next_id_(other.next_id_), 500 file_map_(other.file_map_), 501 id_map_(file_map_.size()) { 502 for (const auto& [file, entry] : file_map_) { 503 id_map_.insert_or_assign(entry.id, &file); 504 } 505 } 506 507 AspectArtifactSelector::FileTable& AspectArtifactSelector::FileTable::operator=( 508 const FileTable& other) { 509 next_id_ = other.next_id_; 510 file_map_ = other.file_map_; 511 id_map_.clear(); 512 for (const auto& [file, entry] : file_map_) { 513 id_map_.insert_or_assign(entry.id, &file); 514 } 515 return *this; 516 } 517 518 AspectArtifactSelector::FileId AspectArtifactSelector::FileTable::Insert( 519 BazelArtifactFile file) { 520 auto [iter, inserted] = file_map_.emplace( 521 std::move(file), Entry{.id = FileId(next_id_), .count = 1}); 522 if (inserted) { 523 next_id_++; 524 id_map_[iter->second.id] = &iter->first; 525 } else { 526 iter->second.count++; 527 } 528 return iter->second.id; 529 } 530 531 BazelArtifactFile AspectArtifactSelector::FileTable::ExtractIterators( 532 IdMap::iterator id_iter, FileMap::iterator file_iter) { 533 CHECK(id_iter != id_map_.end()); 534 CHECK(file_iter != file_map_.end()); 535 if (--file_iter->second.count == 0) { 536 // Only remove the file once it's been extracted for each FileSet which 537 // references it. 538 id_map_.erase(id_iter); 539 return std::move(file_map_.extract(file_iter).key()); 540 } 541 return file_iter->first; 542 } 543 544 std::optional<BazelArtifactFile> AspectArtifactSelector::FileTable::Extract( 545 FileId id) { 546 auto id_iter = id_map_.find(id); 547 if (id_iter == id_map_.end()) { 548 return std::nullopt; 549 } 550 // file_map_ owns the memory underlying the pointer we dereferenced here. 551 // If it's missing from the map, we're well into UB trouble. 552 return ExtractIterators(id_iter, file_map_.find(*id_iter->second)); 553 } 554 555 BazelArtifactFile AspectArtifactSelector::FileTable::ExtractFile( 556 BazelArtifactFile file) { 557 auto file_iter = file_map_.find(file); 558 if (file_iter == file_map_.end()) { 559 return file; 560 } 561 // If the file id is missing from id_map_, something has gone horribly wrong 562 // with our invariants. 563 return ExtractIterators(id_map_.find(file_iter->second.id), file_iter); 564 } 565 566 const BazelArtifactFile* AspectArtifactSelector::FileTable::Find( 567 FileId id) const { 568 auto iter = id_map_.find(id); 569 if (iter == id_map_.end()) { 570 return nullptr; 571 } 572 return iter->second; 573 } 574 575 std::optional<AspectArtifactSelector::FileSetId> 576 AspectArtifactSelector::FileSetTable::InternUnlessDisposed( 577 absl::string_view id) { 578 auto [result, inserted] = InternOrCreate(id); 579 if (!inserted && disposed_.contains(result)) { 580 return std::nullopt; 581 } 582 return result; 583 } 584 585 std::pair<AspectArtifactSelector::FileSetId, bool> 586 AspectArtifactSelector::FileSetTable::InternOrCreate(absl::string_view id) { 587 int64_t token; 588 if (StrictAtoI(id, &token)) { 589 return {{token}, false}; 590 } 591 auto [iter, inserted] = id_map_.try_emplace(id, std::make_tuple(next_id_)); 592 if (inserted) { 593 next_id_--; // Non-integral ids are mapped to negative values. 594 inverse_id_map_.try_emplace(iter->second, iter->first); 595 } 596 return {{iter->second}, inserted}; 597 } 598 599 bool AspectArtifactSelector::FileSetTable::InsertUnlessDisposed( 600 FileSetId id, FileSet file_set) { 601 if (disposed_.contains(id)) { 602 return false; 603 } 604 file_sets_.insert_or_assign(id, std::move(file_set)); 605 return true; // A false return indicates the set has already been disposed. 606 } 607 608 std::optional<AspectArtifactSelector::FileSet> 609 AspectArtifactSelector::FileSetTable::ExtractAndDispose(FileSetId id) { 610 if (auto node = file_sets_.extract(id); !node.empty()) { 611 disposed_.insert(id); 612 return std::move(node.mapped()); 613 } 614 return std::nullopt; 615 } 616 617 void AspectArtifactSelector::FileSetTable::Dispose(FileSetId id) { 618 disposed_.insert(id); 619 file_sets_.erase(id); 620 } 621 622 bool AspectArtifactSelector::FileSetTable::Disposed(FileSetId id) { 623 return disposed_.contains(id); 624 } 625 626 std::string AspectArtifactSelector::FileSetTable::ToString(FileSetId id) const { 627 if (const auto [unpacked] = id; unpacked >= 0) { 628 return absl::StrCat(unpacked); 629 } 630 return inverse_id_map_.at(id); 631 } 632 633 std::optional<BazelArtifact> AspectArtifactSelector::SelectFileSet( 634 absl::string_view id, const build_event_stream::NamedSetOfFiles& fileset) { 635 std::optional<FileSetId> file_set_id = InternUnlessDisposed(id); 636 if (!file_set_id.has_value()) { 637 // Already disposed, skip. 638 return std::nullopt; 639 } 640 // This was a pending file set, select it directly. 641 if (auto node = state_.pending.extract(*file_set_id); !node.empty()) { 642 state_.file_sets.Dispose(*file_set_id); 643 BazelArtifact result = {.label = node.mapped()}; 644 for (const auto& file : fileset.files()) { 645 if (std::optional<BazelArtifactFile> artifact_file = 646 ToBazelArtifactFile(file, options_.file_name_allowlist)) { 647 result.files.push_back( 648 state_.files.ExtractFile(*std::move(artifact_file))); 649 } 650 } 651 for (const auto& child : fileset.file_sets()) { 652 if (std::optional<FileSetId> child_id = 653 InternUnlessDisposed(child.id())) { 654 ExtractFilesInto(*child_id, result.label, &result.files); 655 } 656 } 657 return result; 658 } 659 InsertFileSet(*file_set_id, fileset); 660 return std::nullopt; 661 } 662 663 std::optional<BazelArtifact> AspectArtifactSelector::SelectTargetCompleted( 664 const build_event_stream::BuildEventId::TargetCompletedId& id, 665 const build_event_stream::TargetComplete& payload) { 666 BazelArtifact result = { 667 .label = id.label(), 668 }; 669 const auto& [selected, unselected] = PartitionFileSets(id, payload); 670 for (FileSetId file_set_id : selected) { 671 ExtractFilesInto(file_set_id, result.label, &result.files); 672 } 673 if (options_.dispose_unselected_output_groups) { 674 for (FileSetId file_set_id : unselected) { 675 ExtractFilesInto(file_set_id, result.label, nullptr); 676 } 677 } 678 if (!result.files.empty()) { 679 return result; 680 } 681 return std::nullopt; 682 } 683 684 AspectArtifactSelector::PartitionFileSetsResult 685 AspectArtifactSelector::PartitionFileSets( 686 const build_event_stream::BuildEventId::TargetCompletedId& id, 687 const build_event_stream::TargetComplete& payload) { 688 PartitionFileSetsResult result; 689 bool id_match = options_.target_aspect_allowlist.Match(id.aspect()); 690 for (const auto& output_group : payload.output_group()) { 691 auto& output = 692 (id_match && options_.output_group_allowlist.Match(output_group.name())) 693 ? result.selected 694 : result.unselected; 695 for (const auto& fileset : output_group.file_sets()) { 696 if (std::optional<FileSetId> file_set_id = 697 InternUnlessDisposed(fileset.id())) { 698 output.push_back(*file_set_id); 699 } 700 } 701 } 702 return result; 703 } 704 705 void AspectArtifactSelector::ExtractFilesInto( 706 FileSetId id, absl::string_view target, 707 std::vector<BazelArtifactFile>* files) { 708 if (state_.file_sets.Disposed(id)) { 709 return; 710 } 711 712 std::optional<FileSet> file_set = state_.file_sets.ExtractAndDispose(id); 713 if (!file_set.has_value()) { 714 // Files where requested, but we haven't disposed that filesets id yet. 715 // Record this for future processing. 716 LOG(INFO) << "NamedSetOfFiles " << state_.file_sets.ToString(id) 717 << " requested by " << target << " but not yet disposed."; 718 if (files != nullptr) { 719 // Only retain pending file sets if they would've been saved. 720 state_.pending.emplace(id, target); 721 } else if (state_.pending.find(id) == state_.pending.end()) { 722 // But still prefer to retain pending file sets. 723 state_.file_sets.Dispose(id); 724 } 725 return; 726 } 727 728 for (FileId file_id : file_set->files) { 729 if (std::optional<BazelArtifactFile> file = state_.files.Extract(file_id); 730 file.has_value() && files != nullptr) { 731 files->push_back(*std::move(file)); 732 } 733 } 734 for (FileSetId child_id : file_set->file_sets) { 735 ExtractFilesInto(child_id, target, files); 736 } 737 } 738 739 void AspectArtifactSelector::InsertFileSet( 740 FileSetId id, const build_event_stream::NamedSetOfFiles& fileset) { 741 std::optional<FileSet> file_set; 742 for (const auto& file : fileset.files()) { 743 if (std::optional<BazelArtifactFile> artifact_file = 744 ToBazelArtifactFile(file, options_.file_name_allowlist)) { 745 FileId file_id = state_.files.Insert(*std::move(artifact_file)); 746 GetOrConstruct(file_set).files.push_back(file_id); 747 } 748 } 749 for (const auto& child : fileset.file_sets()) { 750 if (std::optional<FileSetId> child_id = InternUnlessDisposed(child.id())) { 751 GetOrConstruct(file_set).file_sets.push_back(*child_id); 752 } 753 } 754 if (file_set.has_value()) { 755 state_.file_sets.InsertUnlessDisposed(id, *std::move(file_set)); 756 } else { 757 // Nothing to do with this fileset, mark it disposed. 758 state_.file_sets.Dispose(id); 759 } 760 } 761 762 ExtraActionSelector::ExtraActionSelector( 763 absl::flat_hash_set<std::string> action_types) 764 : action_matches_([action_types = std::move(action_types)]( 765 absl::string_view action_type) { 766 return action_types.empty() || action_types.contains(action_type); 767 }) {} 768 769 ExtraActionSelector::ExtraActionSelector(const RE2* action_pattern) 770 : action_matches_([action_pattern](absl::string_view action_type) { 771 if (action_pattern == nullptr || action_pattern->pattern().empty()) { 772 return false; 773 } 774 return RE2::FullMatch(action_type, *action_pattern); 775 }) { 776 CHECK(action_pattern == nullptr || action_pattern->ok()) 777 << "ExtraActionSelector requires a valid pattern: " 778 << action_pattern->error(); 779 } 780 781 std::optional<BazelArtifact> ExtraActionSelector::Select( 782 const build_event_stream::BuildEvent& event) { 783 if (event.id().has_action_completed() && event.action().success() && 784 action_matches_(event.action().type())) { 785 if (std::optional<std::string> uri = 786 ToUri(event.action().primary_output())) { 787 return BazelArtifact{ 788 .label = event.id().action_completed().label(), 789 .files = {{ 790 .local_path = event.id().action_completed().primary_output(), 791 .uri = *std::move(uri), 792 }}, 793 }; 794 } 795 } 796 return std::nullopt; 797 } 798 799 } // namespace kythe