kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/regex.h (about)

     1  /*
     2   * Copyright 2020 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  #ifndef KYTHE_CXX_COMMON_REGEX_H_
    17  #define KYTHE_CXX_COMMON_REGEX_H_
    18  
    19  #include <memory>
    20  
    21  #include "absl/status/statusor.h"
    22  #include "absl/strings/string_view.h"
    23  #include "absl/types/span.h"
    24  #include "re2/re2.h"
    25  #include "re2/set.h"
    26  
    27  namespace kythe {
    28  
    29  /// \brief Regex is a Regular value type implemented on top of RE2.
    30  class Regex {
    31   public:
    32    /// \brief Compiles the pattern into a Regex with the provided options.
    33    static absl::StatusOr<Regex> Compile(
    34        absl::string_view pattern,
    35        const RE2::Options& options = RE2::DefaultOptions);
    36  
    37    /// \brief Constructs a Regex from an already-compiled RE2 object.
    38    /// Requires: re.ok()
    39    explicit Regex(const RE2& re);
    40  
    41    /// \brief Constructs an empty Regex.
    42    Regex();
    43  
    44    /// \brief Regex is copyable.
    45    Regex(const Regex&) = default;
    46    Regex& operator=(const Regex&) = default;
    47  
    48    /// \brief Regex is movable.
    49    /// Moves leave the moved-from object in a default constructed state.
    50    Regex(Regex&&) noexcept;
    51    Regex& operator=(Regex&&) noexcept;
    52  
    53    /// \brief Retrieves the underlying RE2 object to be compatible with the RE2
    54    /// non-member functions.
    55    operator const RE2&() const { return *re_; }  // NOLINT
    56  
    57    /// \brief Returns the string specification of the regular expression.
    58    absl::string_view pattern() const { return re_->pattern(); }
    59  
    60   private:
    61    explicit Regex(std::shared_ptr<const RE2> re) : re_(std::move(re)) {}
    62  
    63    std::shared_ptr<const RE2> re_;  // non-null.
    64  };
    65  
    66  /// \brief RegexSet is a regular value-type wrapper around RE2::Set.
    67  class RegexSet {
    68   public:
    69    /// \brief Builds a RegexSet from the list of patterns and options.
    70    template <typename Range = absl::Span<const absl::string_view>>
    71    static absl::StatusOr<RegexSet> Build(
    72        const Range& patterns, const RE2::Options& = RE2::DefaultOptions,
    73        RE2::Anchor = RE2::UNANCHORED);
    74  
    75    /// \brief Constructs a RegexSet from the extant RE2::Set.
    76    /// Requires: set has been compiled
    77    explicit RegexSet(RE2::Set set);
    78  
    79    /// \brief Default constructs an empty RegexSet. Matches nothing.
    80    RegexSet();
    81  
    82    /// \brief Regex set is copyable.
    83    RegexSet(const RegexSet& other) = default;
    84    RegexSet& operator=(const RegexSet& other) = default;
    85  
    86    /// \brief Regex set is movable.
    87    /// Moves leave the moved-from object in a default constructed state.
    88    RegexSet(RegexSet&& other) noexcept;
    89    RegexSet& operator=(RegexSet&& other) noexcept;
    90  
    91    /// \brief Returns true if the provided value matches one of the contained
    92    /// regular expressions.
    93    bool Match(absl::string_view value) const {
    94      return set_->Match(value, nullptr);
    95    }
    96  
    97    /// \brief Matches the input against the contained regular expressions,
    98    /// returning the indices at which the value matched or an empty vector if it
    99    /// did not.
   100    absl::StatusOr<std::vector<int>> ExplainMatch(absl::string_view value) const;
   101  
   102   private:
   103    std::shared_ptr<const RE2::Set> set_;  // non-null.
   104  };
   105  
   106  template <typename Range>
   107  absl::StatusOr<RegexSet> RegexSet::Build(const Range& patterns,
   108                                           const RE2::Options& options,
   109                                           RE2::Anchor anchor) {
   110    RE2::Set set(options, anchor);
   111    for (const auto& value : patterns) {
   112      std::string error;
   113      if (set.Add(value, &error) == -1) {
   114        return absl::InvalidArgumentError(error);
   115      }
   116    }
   117    if (!set.Compile()) {
   118      return absl::ResourceExhaustedError(
   119          "Out of memory attempting to compile RegexSet");
   120    }
   121    return RegexSet(std::move(set));
   122  }
   123  
   124  }  // namespace kythe
   125  
   126  #endif  // KYTHE_CXX_COMMON_REGEX_H_