kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/kythe_uri.cc (about)

     1  /*
     2   * Copyright 2015 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "kythe/cxx/common/kythe_uri.h"
    18  
    19  #include <cstddef>
    20  #include <string>
    21  #include <utility>
    22  
    23  #include "absl/strings/match.h"
    24  #include "absl/strings/str_split.h"
    25  #include "absl/strings/string_view.h"
    26  #include "kythe/cxx/common/path_utils.h"
    27  #include "kythe/proto/storage.pb.h"
    28  
    29  namespace kythe {
    30  namespace {
    31  
    32  constexpr char kHexDigits[] = "0123456789ABCDEF";
    33  /// The URI scheme label for Kythe.
    34  constexpr char kUriScheme[] = "kythe";
    35  constexpr char kUriPrefix[] = "kythe:";
    36  
    37  /// \brief Returns whether a byte should be escaped.
    38  /// \param mode The escaping mode to use.
    39  /// \param c The byte to examine.
    40  bool should_escape(UriEscapeMode mode, char c) {
    41    return !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
    42             (c >= '0' && c <= '9') || c == '-' || c == '.' || c == '_' ||
    43             c == '~' || (mode == UriEscapeMode::kEscapePaths && c == '/'));
    44  }
    45  
    46  /// \brief Returns the value of a hex digit.
    47  /// \param digit The hex digit.
    48  /// \return The value of the digit, or -1 if it was not a hex digit.
    49  int value_for_hex_digit(char c) {
    50    if (c >= '0' && c <= '9') {
    51      return c - '0';
    52    } else if (c >= 'a' && c <= 'f') {
    53      return 10 + (c - 'a');
    54    } else if (c >= 'A' && c <= 'F') {
    55      return 10 + (c - 'A');
    56    }
    57    return -1;
    58  }
    59  
    60  std::pair<absl::string_view, absl::string_view> Split(absl::string_view input,
    61                                                        char ch) {
    62    return absl::StrSplit(input, absl::MaxSplits(ch, 1));
    63  }
    64  
    65  }  // namespace
    66  
    67  std::string UriEscape(UriEscapeMode mode, absl::string_view uri) {
    68    size_t num_escapes = 0;
    69    for (char c : uri) {
    70      if (should_escape(mode, c)) {
    71        ++num_escapes;
    72      }
    73    }
    74    std::string result;
    75    result.reserve(num_escapes * 2 + uri.size());
    76    for (char c : uri) {
    77      if (should_escape(mode, c)) {
    78        result.push_back('%');
    79        result.push_back(kHexDigits[(c >> 4) & 0xF]);
    80        result.push_back(kHexDigits[c & 0xF]);
    81      } else {
    82        result.push_back(c);
    83      }
    84    }
    85    return result;
    86  }
    87  
    88  /// \brief URI-unescapes a string.
    89  /// \param string The string to unescape.
    90  /// \return A pair of (success, error-or-unescaped-string).
    91  std::pair<bool, std::string> UriUnescape(absl::string_view string) {
    92    size_t num_escapes = 0;
    93    for (size_t i = 0, s = string.size(); i < s; ++i) {
    94      if (string[i] == '%') {
    95        ++num_escapes;
    96        if (i + 3 > string.size()) {
    97          return std::make_pair(false, "bad escape");
    98        }
    99        i += 2;
   100      }
   101    }
   102    std::string result;
   103    result.reserve(string.size() - num_escapes * 2);
   104    for (size_t i = 0, s = string.size(); i < s; ++i) {
   105      char c = string[i];
   106      if (c != '%') {
   107        result.push_back(c);
   108        continue;
   109      }
   110      int high = value_for_hex_digit(string[++i]);
   111      int low = value_for_hex_digit(string[++i]);
   112      if (high < 0 || low < 0) {
   113        return std::make_pair(false, "bad hex digit");
   114      }
   115      result.push_back((high << 4) | low);
   116    }
   117    return std::make_pair(true, result);
   118  }
   119  
   120  std::string URI::ToString() const {
   121    std::string result = kUriPrefix;
   122    if (vname_.signature().empty() && vname_.path().empty() &&
   123        vname_.root().empty() && vname_.corpus().empty() &&
   124        vname_.language().empty()) {
   125      return result;
   126    }
   127    absl::string_view signature = vname_.signature();
   128    absl::string_view path = vname_.path();
   129    absl::string_view corpus = vname_.corpus();
   130    absl::string_view language = vname_.language();
   131    absl::string_view root = vname_.root();
   132    if (!corpus.empty()) {
   133      result.append("//");
   134      result.append(UriEscape(UriEscapeMode::kEscapePaths, corpus));
   135    }
   136    if (!language.empty()) {
   137      result.append("?lang=");
   138      result.append(UriEscape(UriEscapeMode::kEscapeAll, language));
   139    }
   140    if (!path.empty()) {
   141      result.append("?path=");
   142      result.append(UriEscape(UriEscapeMode::kEscapePaths, CleanPath(path)));
   143    }
   144    if (!root.empty()) {
   145      result.append("?root=");
   146      result.append(UriEscape(UriEscapeMode::kEscapePaths, root));
   147    }
   148    if (!signature.empty()) {
   149      result.push_back('#');
   150      result.append(UriEscape(UriEscapeMode::kEscapeAll, signature));
   151    }
   152    return result;
   153  }
   154  
   155  /// \brief Separate out the scheme component of `uri` if one exists.
   156  /// \return (scheme, tail) if there was a scheme; ("", uri) otherwise.
   157  static std::pair<absl::string_view, absl::string_view> SplitScheme(
   158      absl::string_view uri) {
   159    for (size_t i = 0, s = uri.size(); i != s; ++i) {
   160      char c = uri[i];
   161      if (c == ':') {
   162        return std::make_pair(uri.substr(0, i), uri.substr(i));
   163      } else if ((i == 0 &&
   164                  ((c >= '0' && c <= '9') || c == '+' || c == '-' || c == '.')) ||
   165                 !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) {
   166        break;
   167      }
   168    }
   169    return std::make_pair(absl::string_view(), uri);
   170  }
   171  
   172  URI::URI(const kythe::proto::VName& from_vname) : vname_(from_vname) {}
   173  
   174  bool URI::ParseString(absl::string_view uri) {
   175    auto head_fragment = Split(uri, '#');
   176    auto head = head_fragment.first, fragment = head_fragment.second;
   177    auto scheme_head = SplitScheme(head);
   178    auto scheme = scheme_head.first;
   179    head = scheme_head.second;
   180    if (scheme.empty()) {
   181      if (absl::StartsWith(head, ":")) {
   182        return false;
   183      }
   184    } else if (scheme != kUriScheme) {
   185      return false;
   186    } else if (!head.empty()) {
   187      head.remove_prefix(1);
   188    }
   189    auto head_attrs = Split(head, '?');
   190    head = head_attrs.first;
   191    auto attrs = head_attrs.second;
   192    std::string corpus;
   193    if (!head.empty()) {
   194      if (!absl::StartsWith(head, "//")) {
   195        return false;
   196      }
   197      auto maybe_corpus = UriUnescape(head.substr(2));
   198      if (!maybe_corpus.first) {
   199        return false;
   200      }
   201      corpus = maybe_corpus.second;
   202    }
   203    auto maybe_sig = UriUnescape(fragment);
   204    if (!maybe_sig.first) {
   205      return false;
   206    }
   207    auto signature = maybe_sig.second;
   208    while (!attrs.empty()) {
   209      auto attr_rest = Split(attrs, '?');
   210      auto attr = attr_rest.first;
   211      attrs = attr_rest.second;
   212      auto name_value = Split(attr, '=');
   213      auto maybe_value = UriUnescape(name_value.second);
   214      if (!maybe_value.first || maybe_value.second.empty()) {
   215        return false;
   216      }
   217      if (name_value.first == "lang") {
   218        vname_.set_language(maybe_value.second);
   219      } else if (name_value.first == "root") {
   220        vname_.set_root(maybe_value.second);
   221      } else if (name_value.first == "path") {
   222        vname_.set_path(CleanPath(maybe_value.second));
   223      } else {
   224        return false;
   225      }
   226    }
   227    vname_.set_signature(signature);
   228    vname_.set_corpus(corpus);
   229    return true;
   230  }
   231  
   232  }  // namespace kythe