kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/extractor/bazel_artifact_selector.h (about)

     1  /*
     2   * Copyright 2020 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  #ifndef KYTHE_CXX_EXTRACTOR_BAZEL_ARTIFACT_SELECTOR_H_
    17  #define KYTHE_CXX_EXTRACTOR_BAZEL_ARTIFACT_SELECTOR_H_
    18  
    19  #include <cstdint>
    20  #include <functional>
    21  #include <memory>
    22  #include <optional>
    23  #include <tuple>
    24  #include <type_traits>
    25  
    26  #include "absl/container/flat_hash_map.h"
    27  #include "absl/container/flat_hash_set.h"
    28  #include "absl/container/inlined_vector.h"
    29  #include "absl/container/node_hash_map.h"
    30  #include "absl/meta/type_traits.h"
    31  #include "absl/status/status.h"
    32  #include "absl/types/span.h"
    33  #include "google/protobuf/any.pb.h"
    34  #include "kythe/cxx/common/regex.h"
    35  #include "kythe/cxx/extractor/bazel_artifact.h"
    36  #include "re2/re2.h"
    37  #include "third_party/bazel/src/main/java/com/google/devtools/build/lib/buildeventstream/proto/build_event_stream.pb.h"
    38  
    39  namespace kythe {
    40  
    41  /// \brief BazelArtifactSelector is an interface which can be used for finding
    42  /// extractor artifacts in a Bazel sequence of build_event_stream.BuildEvent
    43  /// messages.
    44  class BazelArtifactSelector {
    45   public:
    46    virtual ~BazelArtifactSelector() = default;
    47  
    48    /// \brief Selects matching BazelArtifacts from the provided event.
    49    /// Select() will be called for each message in the stream to allow
    50    /// implementations to update internal state.
    51    virtual std::optional<BazelArtifact> Select(
    52        const build_event_stream::BuildEvent& event) = 0;
    53  
    54    /// \brief Encodes per-stream selector state into the Any protobuf.
    55    /// Stateful selectors should serialize any per-stream state into a
    56    /// suitable protocol buffer, encoded as an Any. If no state has been
    57    /// accumulated, they should return an empty protocol buffer of the
    58    /// appropriate type and return true.
    59    /// Stateless selectors should return false.
    60    virtual bool SerializeInto(google::protobuf::Any& state) const {
    61      return false;
    62    }
    63  
    64    /// \brief Updates any per-stream state from the provided proto.
    65    /// Stateless selectors should unconditionally return a kUnimplemented status.
    66    /// Stateful selectors should return OK if the provided state contains a
    67    /// suitable proto, InvalidArgument if the proto is of the right type but
    68    /// cannot be decoded or FailedPrecondition if the proto is of the wrong type.
    69    virtual absl::Status DeserializeFrom(const google::protobuf::Any& state) {
    70      return absl::UnimplementedError("stateless selector");
    71    }
    72  
    73    /// \brief Finds and updates any per-stream state from the provided list.
    74    /// Returns OK if the selector is stateless or if the requisite state was
    75    /// found in the list.
    76    /// Returns NotFound for a stateful selector whose state was not present
    77    /// or InvalidArgument if the state was present but couldn't be decoded.
    78    absl::Status Deserialize(absl::Span<const google::protobuf::Any> state);
    79    absl::Status Deserialize(
    80        absl::Span<const google::protobuf::Any* const> state);
    81  
    82   protected:
    83    // Not publicly copyable or movable to avoid slicing, but subclasses may be.
    84    BazelArtifactSelector() = default;
    85    BazelArtifactSelector(const BazelArtifactSelector&) = default;
    86    BazelArtifactSelector& operator=(const BazelArtifactSelector&) = default;
    87  };
    88  
    89  /// \brief A type-erased value-type implementation of the BazelArtifactSelector
    90  /// interface.
    91  class AnyArtifactSelector final : public BazelArtifactSelector {
    92   public:
    93    /// \brief Constructs an AnyArtifactSelector which delegates to the provided
    94    /// argument, which must derive from BazelArtifactSelector.
    95    template <
    96        typename S,
    97        typename = absl::enable_if_t<!std::is_same_v<S, AnyArtifactSelector>>,
    98        typename =
    99            absl::enable_if_t<std::is_convertible_v<S&, BazelArtifactSelector&>>>
   100    AnyArtifactSelector(S s)
   101        : AnyArtifactSelector([s = std::move(s)]() mutable -> S& { return s; }) {}
   102  
   103    // Copyable.
   104    AnyArtifactSelector(const AnyArtifactSelector&) = default;
   105    AnyArtifactSelector& operator=(const AnyArtifactSelector&) = default;
   106  
   107    /// \brief AnyArtifactSelector is movable, but will be empty after a move.
   108    /// The only valid operations on an empty AnyArtifactSelector is assigning a
   109    /// new value or destruction.
   110    AnyArtifactSelector(AnyArtifactSelector&&) = default;
   111    AnyArtifactSelector& operator=(AnyArtifactSelector&&) = default;
   112  
   113    /// \brief Forwards selection to the contained BazelArtifactSelector.
   114    std::optional<BazelArtifact> Select(
   115        const build_event_stream::BuildEvent& event) {
   116      return get_().Select(event);
   117    }
   118  
   119    /// \brief Forwards serialization to the contained BazelArtifactSelector.
   120    bool SerializeInto(google::protobuf::Any& state) const final {
   121      return get_().SerializeInto(state);
   122    }
   123  
   124    /// \brief Forwards deserialization to the contained BazelArtifactSelector.
   125    absl::Status DeserializeFrom(const google::protobuf::Any& state) final {
   126      return get_().DeserializeFrom(state);
   127    }
   128  
   129   private:
   130    explicit AnyArtifactSelector(std::function<BazelArtifactSelector&()> get)
   131        : get_(std::move(get)) {}
   132  
   133    std::function<BazelArtifactSelector&()> get_;
   134  };
   135  
   136  /// \brief Known serialization format versions.
   137  enum class AspectArtifactSelectorSerializationFormat {
   138    kV1,  // The initial, bulky-but-simple format.
   139    kV2,  // The newer, flatter, smaller format.
   140  };
   141  
   142  /// \brief Options class used for constructing an AspectArtifactSelector.
   143  struct AspectArtifactSelectorOptions {
   144    // A set of patterns used to filter file names from NamedSetOfFiles events.
   145    // Matches nothing by default.
   146    RegexSet file_name_allowlist;
   147    // A set of patterns used to filter output_group names from TargetComplete
   148    // events. Matches nothing by default.
   149    RegexSet output_group_allowlist;
   150    // A set of patterns used to filter aspect names from TargetComplete events.
   151    RegexSet target_aspect_allowlist = RegexSet::Build({".*"}).value();
   152    // Which serialization format version to use.
   153    AspectArtifactSelectorSerializationFormat serialization_format =
   154        AspectArtifactSelectorSerializationFormat::kV2;
   155    // Whether to eagerly drop files and filesets from unselected output groups.
   156    // As this can cause data loss when a file set would have been selected
   157    // by a subsequent target's output group, it defaults to false.
   158    bool dispose_unselected_output_groups = false;
   159  };
   160  
   161  /// \brief A BazelArtifactSelector implementation which tracks state from
   162  /// NamedSetOfFiles and TargetComplete events to select artifacts produced by
   163  /// extractor aspects.
   164  class AspectArtifactSelector final : public BazelArtifactSelector {
   165   public:
   166    using Options = AspectArtifactSelectorOptions;
   167  
   168    /// \brief Constructs an instance of AspectArtifactSelector from the provided
   169    /// options.
   170    explicit AspectArtifactSelector(Options options)
   171        : options_(std::move(options)) {}
   172  
   173    AspectArtifactSelector(const AspectArtifactSelector&) = default;
   174    AspectArtifactSelector& operator=(const AspectArtifactSelector&) = default;
   175    AspectArtifactSelector(AspectArtifactSelector&&) = default;
   176    AspectArtifactSelector& operator=(AspectArtifactSelector&&) = default;
   177  
   178    /// \brief Selects an artifact if the event matches an expected
   179    /// aspect-produced compilation unit.
   180    std::optional<BazelArtifact> Select(
   181        const build_event_stream::BuildEvent& event) final;
   182  
   183    /// \brief Serializes the accumulated state into the return value, which will
   184    /// always be non-empty and of type
   185    /// `kythe.proto.BazelAspectArtifactSelectorState`.
   186    bool SerializeInto(google::protobuf::Any& state) const final;
   187  
   188    /// \brief Deserializes accumulated stream state from an Any of type
   189    /// `kythe.proto.BazelAspectArtifactSelectorState`.
   190    absl::Status DeserializeFrom(const google::protobuf::Any& state) final;
   191  
   192   private:
   193    friend class AspectArtifactSelectorSerializationHelper;
   194  
   195    using FileId = std::tuple<uint64_t>;
   196    using FileSetId = std::tuple<int64_t>;
   197  
   198    class FileTable {
   199     public:
   200      FileTable() = default;
   201      FileTable(const FileTable& other);
   202      FileTable& operator=(const FileTable& other);
   203      FileTable(FileTable&&) = default;
   204      FileTable& operator=(FileTable&&) = default;
   205  
   206      FileId Insert(BazelArtifactFile file);
   207      std::optional<BazelArtifactFile> Extract(FileId id);
   208      // Extract the equivalent file, if present, returning the argument.
   209      BazelArtifactFile ExtractFile(BazelArtifactFile file);
   210  
   211      const BazelArtifactFile* Find(FileId) const;
   212  
   213      auto begin() const { return id_map_.begin(); }
   214      auto end() const { return id_map_.end(); }
   215  
   216     private:
   217      struct Entry {
   218        FileId id;
   219        int count = 0;
   220      };
   221      using FileMap = absl::node_hash_map<BazelArtifactFile, Entry>;
   222      using IdMap = absl::flat_hash_map<FileId, const BazelArtifactFile*>;
   223  
   224      BazelArtifactFile ExtractIterators(IdMap::iterator id_iter,
   225                                         FileMap::iterator file_iter);
   226  
   227      uint64_t next_id_ = 0;
   228      FileMap file_map_;
   229      IdMap id_map_;
   230    };
   231  
   232    struct FileSet {
   233      absl::InlinedVector<FileId, 1> files;
   234      absl::InlinedVector<FileSetId, 1> file_sets;
   235    };
   236  
   237    class FileSetTable {
   238     public:
   239      std::optional<FileSetId> InternUnlessDisposed(absl::string_view id);
   240      bool InsertUnlessDisposed(FileSetId id, FileSet file_set);
   241      // Extracts the FileSet and, if previously present, marks it disposed.
   242      std::optional<FileSet> ExtractAndDispose(FileSetId id);
   243      // Unconditionally marks a FileSet as disposed.
   244      // Erases it if present in the map.
   245      void Dispose(FileSetId id);
   246      [[nodiscard]] bool Disposed(FileSetId id);
   247  
   248      std::string ToString(FileSetId id) const;
   249  
   250      const absl::flat_hash_map<FileSetId, FileSet>& file_sets() const {
   251        return file_sets_;
   252      }
   253      const absl::flat_hash_set<FileSetId>& disposed() const { return disposed_; }
   254  
   255     private:
   256      std::pair<FileSetId, bool> InternOrCreate(absl::string_view id);
   257  
   258      // A record of all pending FileSets.
   259      absl::flat_hash_map<FileSetId, FileSet> file_sets_;
   260      // A record of all of the NamedSetOfFiles events which have been processed.
   261      absl::flat_hash_set<FileSetId> disposed_;
   262  
   263      // The next integral id to use.
   264      // Non-integral file set ids are mapped to negative values.
   265      int64_t next_id_ = -1;
   266      // For non-integral file set ids coming from Bazel.
   267      absl::flat_hash_map<std::string, FileSetId> id_map_;
   268      absl::flat_hash_map<FileSetId, std::string> inverse_id_map_;
   269    };
   270  
   271    struct State {
   272      // A record of all of the potentially-selectable files encountered.
   273      FileTable files;
   274      // A record of all of the potentially-selectable NamedSetOfFiles.
   275      FileSetTable file_sets;
   276      // Mapping from fileset id to target name which required that
   277      // file set when it had not yet been seen.
   278      absl::flat_hash_map<FileSetId, std::string> pending;
   279    };
   280    std::optional<BazelArtifact> SelectFileSet(
   281        absl::string_view id, const build_event_stream::NamedSetOfFiles& fileset);
   282  
   283    std::optional<BazelArtifact> SelectTargetCompleted(
   284        const build_event_stream::BuildEventId::TargetCompletedId& id,
   285        const build_event_stream::TargetComplete& payload);
   286  
   287    struct PartitionFileSetsResult {
   288      std::vector<FileSetId> selected;
   289      std::vector<FileSetId> unselected;
   290    };
   291    PartitionFileSetsResult PartitionFileSets(
   292        const build_event_stream::BuildEventId::TargetCompletedId& id,
   293        const build_event_stream::TargetComplete& payload);
   294  
   295    // Extracts the selected files into the (optional) `files` output.
   296    // If `files` is nullptr, extracted files will be dropped.
   297    void ExtractFilesInto(FileSetId id, absl::string_view target,
   298                          std::vector<BazelArtifactFile>* files);
   299    void InsertFileSet(FileSetId id,
   300                       const build_event_stream::NamedSetOfFiles& fileset);
   301  
   302    std::optional<FileSetId> InternUnlessDisposed(absl::string_view id) {
   303      return state_.file_sets.InternUnlessDisposed(id);
   304    }
   305  
   306    Options options_;
   307    State state_;
   308  };
   309  
   310  /// \brief An ArtifactSelector which selects artifacts emitted by extra
   311  /// actions.
   312  ///
   313  /// This will select any successful ActionCompleted build event, but the
   314  /// selection can be restricted to an allowlist of action_types.
   315  class ExtraActionSelector final : public BazelArtifactSelector {
   316   public:
   317    /// \brief Constructs an ExtraActionSelector from an allowlist against which
   318    /// to match ActionCompleted events. An empty set will select any successful
   319    /// action.
   320    explicit ExtraActionSelector(
   321        absl::flat_hash_set<std::string> action_types = {});
   322  
   323    /// \brief Constructs an ExtraActionSelector from an allowlist pattern.
   324    /// Both a null and an empty pattern will match nothing.
   325    explicit ExtraActionSelector(const RE2* action_pattern);
   326  
   327    /// \brief Selects artifacts from ExtraAction-based extractors.
   328    std::optional<BazelArtifact> Select(
   329        const build_event_stream::BuildEvent& event) final;
   330  
   331   private:
   332    std::function<bool(absl::string_view)> action_matches_;
   333  };
   334  
   335  }  // namespace kythe
   336  
   337  #endif  // KYTHE_CXX_EXTRACTOR_BAZEL_ARTIFACT_SELECTOR_H_