kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/path_utils.cc (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "kythe/cxx/common/path_utils.h" 18 19 #include <stdlib.h> 20 #include <unistd.h> 21 22 #include <cerrno> 23 #include <cstdlib> 24 #include <cstring> 25 #include <memory> 26 #include <optional> 27 #include <string> 28 #include <utility> 29 #include <variant> 30 #include <vector> 31 32 #include "absl/algorithm/container.h" 33 #include "absl/log/log.h" 34 #include "absl/status/statusor.h" 35 #include "absl/strings/match.h" 36 #include "absl/strings/str_cat.h" 37 #include "absl/strings/str_join.h" 38 #include "absl/strings/str_split.h" 39 #include "absl/strings/string_view.h" 40 #include "absl/strings/strip.h" 41 #include "absl/synchronization/mutex.h" 42 #include "absl/types/span.h" 43 #include "kythe/cxx/common/regex.h" 44 #include "kythe/cxx/common/status.h" 45 46 namespace kythe { 47 namespace { 48 49 struct FreeDeleter { 50 void operator()(void* pointer) const { free(pointer); } 51 }; 52 53 // Predicate used in CleanPath for skipping empty components 54 // and components consistening of a single '.'. 55 struct SkipEmptyDot { 56 bool operator()(absl::string_view sp) { return !(sp.empty() || sp == "."); } 57 }; 58 59 // Deal with relative paths as well as '/' and '//'. 60 absl::string_view PathPrefix(absl::string_view path) { 61 int slash_count = 0; 62 for (char ch : path) { 63 if (ch == '/' && ++slash_count <= 2) continue; 64 break; 65 } 66 switch (slash_count) { 67 case 0: 68 return ""; 69 case 2: 70 return "//"; 71 default: 72 return "/"; 73 } 74 } 75 76 absl::string_view TrimPathPrefix(const absl::string_view path, 77 absl::string_view prefix) { 78 absl::string_view result = path; 79 if (absl::ConsumePrefix(&result, prefix) && 80 (result.empty() || prefix == "/" || absl::ConsumePrefix(&result, "/"))) { 81 return result; 82 } 83 return path; 84 } 85 86 absl::StatusOr<std::optional<PathRealizer>> MaybeMakeRealizer( 87 PathCanonicalizer::Policy policy, absl::string_view root) { 88 switch (policy) { 89 case PathCanonicalizer::Policy::kCleanOnly: 90 return {std::nullopt}; 91 case PathCanonicalizer::Policy::kPreferRelative: 92 case PathCanonicalizer::Policy::kPreferReal: 93 if (auto realizer = PathRealizer::Create(root); realizer.ok()) { 94 return {*std::move(realizer)}; 95 } else { 96 return realizer.status(); 97 } 98 } 99 return {std::nullopt}; 100 } 101 102 std::optional<std::string> MaybeRealPath( 103 const std::optional<PathRealizer>& realizer, absl::string_view root) { 104 if (realizer.has_value()) { 105 if (auto result = realizer->Relativize(root); result.ok()) { 106 return *std::move(result); 107 } else { 108 LOG(ERROR) << "Unable to resolve " << root << ": " << result.status(); 109 } 110 } 111 return std::nullopt; 112 } 113 114 struct PathParts { 115 absl::string_view dir, base; 116 }; 117 118 PathParts SplitPath(absl::string_view path) { 119 std::string::difference_type pos = path.find_last_of('/'); 120 121 // Handle the case with no '/' in 'path'. 122 if (pos == absl::string_view::npos) return {path.substr(0, 0), path}; 123 124 // Handle the case with a single leading '/' in 'path'. 125 if (pos == 0) return {path.substr(0, 1), absl::ClippedSubstr(path, 1)}; 126 127 return {path.substr(0, pos), absl::ClippedSubstr(path, pos + 1)}; 128 } 129 130 constexpr struct VisitPattern { 131 absl::string_view operator()(absl::string_view pattern) const { 132 return pattern; 133 } 134 absl::string_view operator()(const Regex& pattern) const { 135 return pattern.pattern(); 136 } 137 } kVisitPattern; 138 139 } // namespace 140 141 absl::StatusOr<PathCleaner> PathCleaner::Create(absl::string_view root) { 142 if (absl::StatusOr<std::string> resolved = MakeCleanAbsolutePath(root); 143 resolved.ok()) { 144 return PathCleaner(*std::move(resolved)); 145 } else { 146 return resolved.status(); 147 } 148 } 149 150 absl::StatusOr<std::string> PathCleaner::Relativize( 151 absl::string_view path) const { 152 if (absl::StatusOr<std::string> resolved = MakeCleanAbsolutePath(path); 153 resolved.ok()) { 154 return std::string(TrimPathPrefix(*std::move(resolved), root_)); 155 } else { 156 return resolved.status(); 157 } 158 } 159 160 absl::StatusOr<PathRealizer> PathRealizer::Create(absl::string_view root) { 161 if (absl::StatusOr<std::string> resolved = RealPath(root); resolved.ok()) { 162 return PathRealizer(*std::move(resolved)); 163 } else { 164 return resolved.status(); 165 } 166 } 167 168 // We do not copy the cache on assignment or construction to retain thread 169 // safety. 170 PathRealizer::PathRealizer(const PathRealizer& other) : root_(other.root_) {} 171 PathRealizer& PathRealizer::operator=(const PathRealizer& other) { 172 root_ = other.root_; 173 return *this; 174 } 175 176 template <typename K, typename Fn> 177 absl::StatusOr<std::string> PathRealizer::PathCache::FindOrInsert(K&& key, 178 Fn&& make) { 179 absl::MutexLock lock(&mu_); 180 auto [iter, inserted] = cache_.try_emplace(std::forward<K>(key), ""); 181 if (inserted) { 182 iter->second = std::forward<Fn>(make)(); 183 } 184 return iter->second; 185 } 186 187 absl::StatusOr<std::string> PathRealizer::Relativize( 188 absl::string_view path) const { 189 return cache_->FindOrInsert( 190 CleanPath(path), [this, path]() -> absl::StatusOr<std::string> { 191 if (absl::StatusOr<std::string> resolved = RealPath(path); 192 resolved.ok()) { 193 return std::string(TrimPathPrefix(*std::move(resolved), root_)); 194 } else { 195 return resolved.status(); 196 } 197 }); 198 } 199 200 absl::StatusOr<PathCanonicalizer> PathCanonicalizer::Create( 201 absl::string_view root, Policy policy, 202 absl::Span<const PathEntry> path_map) { 203 absl::StatusOr<PathCleaner> cleaner = PathCleaner::Create(root); 204 if (!cleaner.ok()) { 205 return cleaner.status(); 206 } 207 absl::StatusOr<std::optional<PathRealizer>> realizer = 208 MaybeMakeRealizer(policy, root); 209 if (!realizer.ok()) { 210 return realizer.status(); 211 } 212 213 std::vector<Policy> override_policies; 214 std::vector<absl::string_view> override_paths; 215 for (const auto& [path, policy] : path_map) { 216 if (!realizer->has_value()) { 217 realizer = MaybeMakeRealizer(policy, root); 218 if (!realizer.ok()) { 219 return realizer.status(); 220 } 221 } 222 override_policies.push_back(policy); 223 override_paths.push_back(std::visit(kVisitPattern, path)); 224 } 225 absl::StatusOr<RegexSet> override_set = RegexSet::Build(override_paths); 226 if (!override_set.ok()) { 227 return override_set.status(); 228 } 229 return PathCanonicalizer(policy, *std::move(cleaner), *std::move(realizer), 230 *std::move(override_set), 231 std::move(override_policies)); 232 } 233 234 absl::StatusOr<std::string> PathCanonicalizer::Relativize( 235 absl::string_view path) const { 236 absl::StatusOr<Policy> policy = PolicyFor(path); 237 if (!policy.ok()) return policy.status(); 238 239 switch (*policy) { 240 case Policy::kPreferRelative: 241 if (auto resolved = MaybeRealPath(realizer_, path)) { 242 if (!IsAbsolutePath(*resolved)) { 243 return *std::move(resolved); 244 } 245 } 246 return cleaner_.Relativize(path); 247 case Policy::kPreferReal: 248 if (auto resolved = MaybeRealPath(realizer_, path)) { 249 return *std::move(resolved); 250 } 251 return cleaner_.Relativize(path); 252 case Policy::kCleanOnly: 253 return cleaner_.Relativize(path); 254 } 255 LOG(FATAL) << "Unknown policy: " << static_cast<int>(*policy); 256 return std::string(path); 257 } 258 259 absl::StatusOr<PathCanonicalizer::Policy> PathCanonicalizer::PolicyFor( 260 absl::string_view path) const { 261 absl::StatusOr<std::vector<int>> match = override_set_.ExplainMatch(path); 262 if (!match.ok()) { 263 return match.status(); 264 } 265 if (match->empty()) { 266 return policy_; 267 } 268 return override_policy_[*absl::c_min_element(*match)]; 269 } 270 271 std::optional<PathCanonicalizer::Policy> ParseCanonicalizationPolicy( 272 absl::string_view policy) { 273 using Policy = PathCanonicalizer::Policy; 274 if (policy == "0" || policy == "clean-only") { 275 return Policy::kCleanOnly; 276 } 277 if (policy == "1" || policy == "prefer-relative") { 278 return Policy::kPreferRelative; 279 } 280 if (policy == "2" || policy == "prefer-real") { 281 return Policy::kPreferReal; 282 } 283 return std::nullopt; 284 } 285 286 bool AbslParseFlag(absl::string_view text, PathCanonicalizer::Policy* policy, 287 std::string* error) { 288 if (auto parsed = ParseCanonicalizationPolicy(text)) { 289 *policy = *parsed; 290 return true; 291 } 292 *error = "policy not one of: clean-only, prefer-relative, prefer-real"; 293 return false; 294 } 295 296 std::string AbslUnparseFlag(PathCanonicalizer::Policy policy) { 297 using Policy = PathCanonicalizer::Policy; 298 switch (policy) { 299 case Policy::kCleanOnly: 300 return "clean-only"; 301 case Policy::kPreferRelative: 302 return "prefer-relative"; 303 case Policy::kPreferReal: 304 return "prefer-real"; 305 } 306 LOG(FATAL) << "Invalid path policy provided: " << static_cast<int>(policy); 307 return "(unknown)"; 308 } 309 310 std::string JoinPath(absl::string_view a, absl::string_view b) { 311 return absl::StrCat(absl::StripSuffix(a, "/"), "/", 312 absl::StripPrefix(b, "/")); 313 } 314 315 bool AbslParseFlag(absl::string_view text, PathCanonicalizer::PathEntry* entry, 316 std::string* error) { 317 size_t pos = text.find('@'); 318 if (pos == text.npos) { 319 *error = "missing @ delimiter between path and policy"; 320 return false; 321 } 322 absl::StatusOr<Regex> path = Regex::Compile(text.substr(0, pos)); 323 if (!path.ok()) { 324 *error = path.status().message(); 325 return false; 326 } 327 entry->path = *std::move(path); 328 return AbslParseFlag(text.substr(pos + 1), &entry->policy, error); 329 } 330 331 std::string AbslUnparseFlag(const PathCanonicalizer::PathEntry& entry) { 332 return absl::StrCat(std::visit(kVisitPattern, entry.path), "@", 333 AbslUnparseFlag(entry.policy)); 334 } 335 336 bool AbslParseFlag(absl::string_view text, 337 std::vector<PathCanonicalizer::PathEntry>* entries, 338 std::string* error) { 339 for (const auto& entry : absl::StrSplit(text, ' ', absl::SkipEmpty())) { 340 if (!AbslParseFlag(entry, &entries->emplace_back(), error)) { 341 entries->pop_back(); 342 return false; 343 } 344 } 345 return true; 346 } 347 348 std::string AbslUnparseFlag( 349 const std::vector<PathCanonicalizer::PathEntry>& entries) { 350 return absl::StrJoin(entries, " ", [](std::string* out, const auto& entry) { 351 absl::StrAppend(out, AbslUnparseFlag(entry)); 352 }); 353 } 354 355 std::string CleanPath(absl::string_view input) { 356 const bool is_absolute_path = absl::StartsWith(input, "/"); 357 std::vector<absl::string_view> parts; 358 for (absl::string_view comp : absl::StrSplit(input, '/', SkipEmptyDot{})) { 359 if (comp == "..") { 360 if (!parts.empty() && parts.back() != "..") { 361 parts.pop_back(); 362 continue; 363 } 364 if (is_absolute_path) continue; 365 } 366 parts.push_back(comp); 367 } 368 // Deal with leading '//' as well as '/'. 369 return absl::StrCat(PathPrefix(input), absl::StrJoin(parts, "/")); 370 } 371 372 bool IsAbsolutePath(absl::string_view path) { 373 return absl::StartsWith(path, "/"); 374 } 375 376 absl::StatusOr<std::string> GetCurrentDirectory() { 377 std::string result(128, '\0'); 378 while (::getcwd(&result.front(), result.size() + 1) == nullptr) { 379 if (errno != ERANGE) { 380 return ErrnoToStatus(errno); 381 } 382 result.resize(result.size() * 2); 383 } 384 result.resize(::strlen(result.data())); 385 return result; 386 } 387 388 absl::StatusOr<std::string> MakeCleanAbsolutePath(absl::string_view path) { 389 if (IsAbsolutePath(path)) { 390 return CleanPath(path); 391 } 392 if (absl::StatusOr<std::string> dir = GetCurrentDirectory(); dir.ok()) { 393 return CleanPath(JoinPath(*std::move(dir), path)); 394 } else { 395 return dir.status(); 396 } 397 } 398 399 absl::string_view Dirname(absl::string_view path) { 400 return SplitPath(path).dir; 401 } 402 403 absl::string_view Basename(absl::string_view path) { 404 return SplitPath(path).base; 405 } 406 407 std::string RelativizePath(absl::string_view to_relativize, 408 absl::string_view relativize_against) { 409 absl::StatusOr<PathCleaner> cleaner = PathCleaner::Create(relativize_against); 410 if (!cleaner.ok()) { 411 return ""; 412 } 413 return cleaner->Relativize(to_relativize).value_or(""); 414 } 415 416 absl::StatusOr<std::string> RealPath(absl::string_view path) { 417 // realpath requires a null-terminated cstring, but string_view may not be. 418 // checking whether or not it is null-terminated is potentially UB. 419 std::string zpath(path); 420 421 std::unique_ptr<char, FreeDeleter> resolved( 422 ::realpath(zpath.c_str(), nullptr)); 423 if (resolved == nullptr) { 424 return ErrnoToStatus(errno); 425 } 426 return std::string(resolved.get()); 427 } 428 429 } // namespace kythe