kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/extractor/bazel_artifact_selector.h (about) 1 /* 2 * Copyright 2020 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef KYTHE_CXX_EXTRACTOR_BAZEL_ARTIFACT_SELECTOR_H_ 17 #define KYTHE_CXX_EXTRACTOR_BAZEL_ARTIFACT_SELECTOR_H_ 18 19 #include <cstdint> 20 #include <functional> 21 #include <memory> 22 #include <optional> 23 #include <tuple> 24 #include <type_traits> 25 26 #include "absl/container/flat_hash_map.h" 27 #include "absl/container/flat_hash_set.h" 28 #include "absl/container/inlined_vector.h" 29 #include "absl/container/node_hash_map.h" 30 #include "absl/meta/type_traits.h" 31 #include "absl/status/status.h" 32 #include "absl/types/span.h" 33 #include "google/protobuf/any.pb.h" 34 #include "kythe/cxx/common/regex.h" 35 #include "kythe/cxx/extractor/bazel_artifact.h" 36 #include "re2/re2.h" 37 #include "third_party/bazel/src/main/java/com/google/devtools/build/lib/buildeventstream/proto/build_event_stream.pb.h" 38 39 namespace kythe { 40 41 /// \brief BazelArtifactSelector is an interface which can be used for finding 42 /// extractor artifacts in a Bazel sequence of build_event_stream.BuildEvent 43 /// messages. 44 class BazelArtifactSelector { 45 public: 46 virtual ~BazelArtifactSelector() = default; 47 48 /// \brief Selects matching BazelArtifacts from the provided event. 49 /// Select() will be called for each message in the stream to allow 50 /// implementations to update internal state. 51 virtual std::optional<BazelArtifact> Select( 52 const build_event_stream::BuildEvent& event) = 0; 53 54 /// \brief Encodes per-stream selector state into the Any protobuf. 55 /// Stateful selectors should serialize any per-stream state into a 56 /// suitable protocol buffer, encoded as an Any. If no state has been 57 /// accumulated, they should return an empty protocol buffer of the 58 /// appropriate type and return true. 59 /// Stateless selectors should return false. 60 virtual bool SerializeInto(google::protobuf::Any& state) const { 61 return false; 62 } 63 64 /// \brief Updates any per-stream state from the provided proto. 65 /// Stateless selectors should unconditionally return a kUnimplemented status. 66 /// Stateful selectors should return OK if the provided state contains a 67 /// suitable proto, InvalidArgument if the proto is of the right type but 68 /// cannot be decoded or FailedPrecondition if the proto is of the wrong type. 69 virtual absl::Status DeserializeFrom(const google::protobuf::Any& state) { 70 return absl::UnimplementedError("stateless selector"); 71 } 72 73 /// \brief Finds and updates any per-stream state from the provided list. 74 /// Returns OK if the selector is stateless or if the requisite state was 75 /// found in the list. 76 /// Returns NotFound for a stateful selector whose state was not present 77 /// or InvalidArgument if the state was present but couldn't be decoded. 78 absl::Status Deserialize(absl::Span<const google::protobuf::Any> state); 79 absl::Status Deserialize( 80 absl::Span<const google::protobuf::Any* const> state); 81 82 protected: 83 // Not publicly copyable or movable to avoid slicing, but subclasses may be. 84 BazelArtifactSelector() = default; 85 BazelArtifactSelector(const BazelArtifactSelector&) = default; 86 BazelArtifactSelector& operator=(const BazelArtifactSelector&) = default; 87 }; 88 89 /// \brief A type-erased value-type implementation of the BazelArtifactSelector 90 /// interface. 91 class AnyArtifactSelector final : public BazelArtifactSelector { 92 public: 93 /// \brief Constructs an AnyArtifactSelector which delegates to the provided 94 /// argument, which must derive from BazelArtifactSelector. 95 template < 96 typename S, 97 typename = absl::enable_if_t<!std::is_same_v<S, AnyArtifactSelector>>, 98 typename = 99 absl::enable_if_t<std::is_convertible_v<S&, BazelArtifactSelector&>>> 100 AnyArtifactSelector(S s) 101 : AnyArtifactSelector([s = std::move(s)]() mutable -> S& { return s; }) {} 102 103 // Copyable. 104 AnyArtifactSelector(const AnyArtifactSelector&) = default; 105 AnyArtifactSelector& operator=(const AnyArtifactSelector&) = default; 106 107 /// \brief AnyArtifactSelector is movable, but will be empty after a move. 108 /// The only valid operations on an empty AnyArtifactSelector is assigning a 109 /// new value or destruction. 110 AnyArtifactSelector(AnyArtifactSelector&&) = default; 111 AnyArtifactSelector& operator=(AnyArtifactSelector&&) = default; 112 113 /// \brief Forwards selection to the contained BazelArtifactSelector. 114 std::optional<BazelArtifact> Select( 115 const build_event_stream::BuildEvent& event) { 116 return get_().Select(event); 117 } 118 119 /// \brief Forwards serialization to the contained BazelArtifactSelector. 120 bool SerializeInto(google::protobuf::Any& state) const final { 121 return get_().SerializeInto(state); 122 } 123 124 /// \brief Forwards deserialization to the contained BazelArtifactSelector. 125 absl::Status DeserializeFrom(const google::protobuf::Any& state) final { 126 return get_().DeserializeFrom(state); 127 } 128 129 private: 130 explicit AnyArtifactSelector(std::function<BazelArtifactSelector&()> get) 131 : get_(std::move(get)) {} 132 133 std::function<BazelArtifactSelector&()> get_; 134 }; 135 136 /// \brief Known serialization format versions. 137 enum class AspectArtifactSelectorSerializationFormat { 138 kV1, // The initial, bulky-but-simple format. 139 kV2, // The newer, flatter, smaller format. 140 }; 141 142 /// \brief Options class used for constructing an AspectArtifactSelector. 143 struct AspectArtifactSelectorOptions { 144 // A set of patterns used to filter file names from NamedSetOfFiles events. 145 // Matches nothing by default. 146 RegexSet file_name_allowlist; 147 // A set of patterns used to filter output_group names from TargetComplete 148 // events. Matches nothing by default. 149 RegexSet output_group_allowlist; 150 // A set of patterns used to filter aspect names from TargetComplete events. 151 RegexSet target_aspect_allowlist = RegexSet::Build({".*"}).value(); 152 // Which serialization format version to use. 153 AspectArtifactSelectorSerializationFormat serialization_format = 154 AspectArtifactSelectorSerializationFormat::kV2; 155 // Whether to eagerly drop files and filesets from unselected output groups. 156 // As this can cause data loss when a file set would have been selected 157 // by a subsequent target's output group, it defaults to false. 158 bool dispose_unselected_output_groups = false; 159 }; 160 161 /// \brief A BazelArtifactSelector implementation which tracks state from 162 /// NamedSetOfFiles and TargetComplete events to select artifacts produced by 163 /// extractor aspects. 164 class AspectArtifactSelector final : public BazelArtifactSelector { 165 public: 166 using Options = AspectArtifactSelectorOptions; 167 168 /// \brief Constructs an instance of AspectArtifactSelector from the provided 169 /// options. 170 explicit AspectArtifactSelector(Options options) 171 : options_(std::move(options)) {} 172 173 AspectArtifactSelector(const AspectArtifactSelector&) = default; 174 AspectArtifactSelector& operator=(const AspectArtifactSelector&) = default; 175 AspectArtifactSelector(AspectArtifactSelector&&) = default; 176 AspectArtifactSelector& operator=(AspectArtifactSelector&&) = default; 177 178 /// \brief Selects an artifact if the event matches an expected 179 /// aspect-produced compilation unit. 180 std::optional<BazelArtifact> Select( 181 const build_event_stream::BuildEvent& event) final; 182 183 /// \brief Serializes the accumulated state into the return value, which will 184 /// always be non-empty and of type 185 /// `kythe.proto.BazelAspectArtifactSelectorState`. 186 bool SerializeInto(google::protobuf::Any& state) const final; 187 188 /// \brief Deserializes accumulated stream state from an Any of type 189 /// `kythe.proto.BazelAspectArtifactSelectorState`. 190 absl::Status DeserializeFrom(const google::protobuf::Any& state) final; 191 192 private: 193 friend class AspectArtifactSelectorSerializationHelper; 194 195 using FileId = std::tuple<uint64_t>; 196 using FileSetId = std::tuple<int64_t>; 197 198 class FileTable { 199 public: 200 FileTable() = default; 201 FileTable(const FileTable& other); 202 FileTable& operator=(const FileTable& other); 203 FileTable(FileTable&&) = default; 204 FileTable& operator=(FileTable&&) = default; 205 206 FileId Insert(BazelArtifactFile file); 207 std::optional<BazelArtifactFile> Extract(FileId id); 208 // Extract the equivalent file, if present, returning the argument. 209 BazelArtifactFile ExtractFile(BazelArtifactFile file); 210 211 const BazelArtifactFile* Find(FileId) const; 212 213 auto begin() const { return id_map_.begin(); } 214 auto end() const { return id_map_.end(); } 215 216 private: 217 struct Entry { 218 FileId id; 219 int count = 0; 220 }; 221 using FileMap = absl::node_hash_map<BazelArtifactFile, Entry>; 222 using IdMap = absl::flat_hash_map<FileId, const BazelArtifactFile*>; 223 224 BazelArtifactFile ExtractIterators(IdMap::iterator id_iter, 225 FileMap::iterator file_iter); 226 227 uint64_t next_id_ = 0; 228 FileMap file_map_; 229 IdMap id_map_; 230 }; 231 232 struct FileSet { 233 absl::InlinedVector<FileId, 1> files; 234 absl::InlinedVector<FileSetId, 1> file_sets; 235 }; 236 237 class FileSetTable { 238 public: 239 std::optional<FileSetId> InternUnlessDisposed(absl::string_view id); 240 bool InsertUnlessDisposed(FileSetId id, FileSet file_set); 241 // Extracts the FileSet and, if previously present, marks it disposed. 242 std::optional<FileSet> ExtractAndDispose(FileSetId id); 243 // Unconditionally marks a FileSet as disposed. 244 // Erases it if present in the map. 245 void Dispose(FileSetId id); 246 [[nodiscard]] bool Disposed(FileSetId id); 247 248 std::string ToString(FileSetId id) const; 249 250 const absl::flat_hash_map<FileSetId, FileSet>& file_sets() const { 251 return file_sets_; 252 } 253 const absl::flat_hash_set<FileSetId>& disposed() const { return disposed_; } 254 255 private: 256 std::pair<FileSetId, bool> InternOrCreate(absl::string_view id); 257 258 // A record of all pending FileSets. 259 absl::flat_hash_map<FileSetId, FileSet> file_sets_; 260 // A record of all of the NamedSetOfFiles events which have been processed. 261 absl::flat_hash_set<FileSetId> disposed_; 262 263 // The next integral id to use. 264 // Non-integral file set ids are mapped to negative values. 265 int64_t next_id_ = -1; 266 // For non-integral file set ids coming from Bazel. 267 absl::flat_hash_map<std::string, FileSetId> id_map_; 268 absl::flat_hash_map<FileSetId, std::string> inverse_id_map_; 269 }; 270 271 struct State { 272 // A record of all of the potentially-selectable files encountered. 273 FileTable files; 274 // A record of all of the potentially-selectable NamedSetOfFiles. 275 FileSetTable file_sets; 276 // Mapping from fileset id to target name which required that 277 // file set when it had not yet been seen. 278 absl::flat_hash_map<FileSetId, std::string> pending; 279 }; 280 std::optional<BazelArtifact> SelectFileSet( 281 absl::string_view id, const build_event_stream::NamedSetOfFiles& fileset); 282 283 std::optional<BazelArtifact> SelectTargetCompleted( 284 const build_event_stream::BuildEventId::TargetCompletedId& id, 285 const build_event_stream::TargetComplete& payload); 286 287 struct PartitionFileSetsResult { 288 std::vector<FileSetId> selected; 289 std::vector<FileSetId> unselected; 290 }; 291 PartitionFileSetsResult PartitionFileSets( 292 const build_event_stream::BuildEventId::TargetCompletedId& id, 293 const build_event_stream::TargetComplete& payload); 294 295 // Extracts the selected files into the (optional) `files` output. 296 // If `files` is nullptr, extracted files will be dropped. 297 void ExtractFilesInto(FileSetId id, absl::string_view target, 298 std::vector<BazelArtifactFile>* files); 299 void InsertFileSet(FileSetId id, 300 const build_event_stream::NamedSetOfFiles& fileset); 301 302 std::optional<FileSetId> InternUnlessDisposed(absl::string_view id) { 303 return state_.file_sets.InternUnlessDisposed(id); 304 } 305 306 Options options_; 307 State state_; 308 }; 309 310 /// \brief An ArtifactSelector which selects artifacts emitted by extra 311 /// actions. 312 /// 313 /// This will select any successful ActionCompleted build event, but the 314 /// selection can be restricted to an allowlist of action_types. 315 class ExtraActionSelector final : public BazelArtifactSelector { 316 public: 317 /// \brief Constructs an ExtraActionSelector from an allowlist against which 318 /// to match ActionCompleted events. An empty set will select any successful 319 /// action. 320 explicit ExtraActionSelector( 321 absl::flat_hash_set<std::string> action_types = {}); 322 323 /// \brief Constructs an ExtraActionSelector from an allowlist pattern. 324 /// Both a null and an empty pattern will match nothing. 325 explicit ExtraActionSelector(const RE2* action_pattern); 326 327 /// \brief Selects artifacts from ExtraAction-based extractors. 328 std::optional<BazelArtifact> Select( 329 const build_event_stream::BuildEvent& event) final; 330 331 private: 332 std::function<bool(absl::string_view)> action_matches_; 333 }; 334 335 } // namespace kythe 336 337 #endif // KYTHE_CXX_EXTRACTOR_BAZEL_ARTIFACT_SELECTOR_H_