kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/kzip_reader.cc (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "kythe/cxx/common/kzip_reader.h" 18 19 #include <zip.h> 20 #include <zipconf.h> 21 22 #include <algorithm> 23 #include <cstdint> 24 #include <cstdio> 25 #include <iterator> 26 #include <memory> 27 #include <optional> 28 #include <set> 29 #include <string> 30 #include <utility> 31 #include <vector> 32 33 #include "absl/log/check.h" 34 #include "absl/log/log.h" 35 #include "absl/memory/memory.h" 36 #include "absl/status/status.h" 37 #include "absl/status/statusor.h" 38 #include "absl/strings/str_cat.h" 39 #include "absl/strings/string_view.h" 40 #include "absl/strings/strip.h" 41 #include "google/protobuf/io/zero_copy_stream.h" 42 #include "google/protobuf/io/zero_copy_stream_impl_lite.h" 43 #include "kythe/cxx/common/index_reader.h" 44 #include "kythe/cxx/common/json_proto.h" 45 #include "kythe/cxx/common/kzip_encoding.h" 46 #include "kythe/cxx/common/libzip/error.h" 47 #include "kythe/proto/analysis.pb.h" 48 49 namespace kythe { 50 namespace { 51 52 constexpr absl::string_view kJsonUnitsDir = "/units/"; 53 constexpr absl::string_view kProtoUnitsDir = "/pbunits/"; 54 55 struct ZipFileClose { 56 void operator()(zip_file_t* file) { 57 if (file != nullptr) { 58 CHECK_EQ(zip_fclose(file), 0); 59 } 60 } 61 }; 62 using ZipFile = std::unique_ptr<zip_file_t, ZipFileClose>; 63 64 class ZipFileInputStream : public google::protobuf::io::ZeroCopyInputStream { 65 public: 66 explicit ZipFileInputStream(zip_file_t* file) : input_(file) {} 67 68 bool Next(const void** data, int* size) override { 69 return impl_.Next(data, size); 70 } 71 72 void BackUp(int count) override { impl_.BackUp(count); } 73 bool Skip(int count) override { return impl_.Skip(count); } 74 int64_t ByteCount() const override { return impl_.ByteCount(); } 75 76 private: 77 class CopyingZipInputStream 78 : public google::protobuf::io::CopyingInputStream { 79 public: 80 explicit CopyingZipInputStream(zip_file_t* file) : file_(file) {} 81 82 int Read(void* buffer, int size) override { 83 return zip_fread(file_, buffer, size); 84 } 85 86 int Skip(int count) override { 87 zip_int64_t start = zip_ftell(file_); 88 if (start < 0) { 89 return 0; 90 } 91 if (zip_fseek(file_, count, SEEK_CUR) < 0) { 92 return 0; 93 } 94 zip_int64_t end = zip_ftell(file_); 95 if (end < 0) { 96 return 0; 97 } 98 return end - start; 99 } 100 101 private: 102 zip_file_t* file_; 103 }; 104 105 CopyingZipInputStream input_; 106 google::protobuf::io::CopyingInputStreamAdaptor impl_{&input_}; 107 }; 108 109 struct KzipOptions { 110 absl::string_view root; 111 KzipEncoding encoding; 112 }; 113 114 absl::StatusOr<KzipOptions> Validate(zip_t* archive) { 115 if (!zip_get_num_entries(archive, 0)) { 116 return absl::InvalidArgumentError("Empty kzip archive"); 117 } 118 119 // Pull the root directory from an arbitrary entry. 120 absl::string_view root = zip_get_name(archive, 0, 0); 121 auto slashpos = root.find('/'); 122 if (slashpos == 0 || slashpos == absl::string_view::npos) { 123 return absl::InvalidArgumentError( 124 absl::StrCat("Malformed kzip: invalid root: ", root)); 125 } 126 root.remove_suffix(root.size() - slashpos); 127 VLOG(1) << "Using archive root: " << root; 128 std::set<absl::string_view> proto_units; 129 std::set<absl::string_view> json_units; 130 for (int i = 0; i < zip_get_num_entries(archive, 0); ++i) { 131 absl::string_view name = zip_get_name(archive, i, 0); 132 if (!absl::ConsumePrefix(&name, root)) { 133 return absl::InvalidArgumentError( 134 absl::StrCat("Malformed kzip: invalid entry: ", name)); 135 } 136 if (absl::ConsumePrefix(&name, kJsonUnitsDir)) { 137 json_units.insert(name); 138 } else if (absl::ConsumePrefix(&name, kProtoUnitsDir)) { 139 proto_units.insert(name); 140 } 141 } 142 KzipEncoding encoding = KzipEncoding::kJson; 143 if (json_units.empty()) { 144 encoding = KzipEncoding::kProto; 145 } else if (!proto_units.empty()) { 146 std::vector<absl::string_view> diff; 147 std::set_symmetric_difference(json_units.begin(), json_units.end(), 148 proto_units.begin(), proto_units.end(), 149 std::inserter(diff, diff.end())); 150 if (!diff.empty()) { 151 return absl::InvalidArgumentError(absl::StrCat( 152 "Malformed kzip: multiple unit encodings but different entries")); 153 } 154 } 155 return KzipOptions{root, encoding}; 156 } 157 158 std::optional<zip_uint64_t> FileSize(zip_t* archive, zip_uint64_t index) { 159 zip_stat_t sb; 160 zip_stat_init(&sb); 161 162 if (zip_stat_index(archive, index, ZIP_STAT_SIZE, &sb) < 0) { 163 return std::nullopt; 164 } 165 return sb.size; 166 } 167 168 absl::StatusOr<std::string> ReadTextFile(zip_t* archive, 169 const std::string& path) { 170 zip_int64_t index = zip_name_locate(archive, path.c_str(), 0); 171 if (index >= 0) { 172 if (auto file = ZipFile(zip_fopen_index(archive, index, 0))) { 173 if (auto size = FileSize(archive, index)) { 174 std::string result(*size, '\0'); 175 if (*size == 0 || 176 zip_fread(file.get(), result.data(), *size) == *size) { 177 return result; 178 } else { 179 return libzip::ToStatus(zip_file_get_error(file.get())); 180 } 181 } 182 } 183 } 184 absl::Status status = libzip::ToStatus(zip_get_error(archive)); 185 if (!status.ok()) { 186 return status; 187 } 188 return absl::UnknownError(absl::StrCat("Unable to read: ", path)); 189 } 190 191 absl::string_view DirNameForEncoding(KzipEncoding encoding) { 192 switch (encoding) { 193 case KzipEncoding::kJson: 194 return kJsonUnitsDir; 195 case KzipEncoding::kProto: 196 return kProtoUnitsDir; 197 default: 198 LOG(FATAL) << "Unsupported encoding: " << static_cast<int>(encoding); 199 } 200 return ""; 201 } 202 203 } // namespace 204 205 std::optional<absl::string_view> KzipReader::UnitDigest( 206 absl::string_view path) { 207 if (!absl::ConsumePrefix(&path, unit_prefix_) || path.empty()) { 208 return std::nullopt; 209 } 210 return path; 211 } 212 213 /* static */ 214 absl::StatusOr<IndexReader> KzipReader::Open(absl::string_view path) { 215 int error; 216 if (auto archive = 217 ZipHandle(zip_open(std::string(path).c_str(), ZIP_RDONLY, &error))) { 218 if (auto options = Validate(archive.get()); options.ok()) { 219 return IndexReader(absl::WrapUnique(new KzipReader( 220 std::move(archive), options->root, options->encoding))); 221 } else { 222 return options.status(); 223 } 224 } 225 return libzip::Error(error).ToStatus(); 226 } 227 228 /* static */ 229 absl::StatusOr<IndexReader> KzipReader::FromSource(zip_source_t* source) { 230 libzip::Error error; 231 if (auto archive = 232 ZipHandle(zip_open_from_source(source, ZIP_RDONLY, error.get()))) { 233 if (auto options = Validate(archive.get()); options.ok()) { 234 return IndexReader(absl::WrapUnique(new KzipReader( 235 std::move(archive), options->root, options->encoding))); 236 } else { 237 // Ensure source is retained when `archive` is deleted. 238 // It is the callers responsitility to free it on error. 239 zip_source_keep(source); 240 return options.status(); 241 } 242 } 243 return error.ToStatus(); 244 } 245 246 KzipReader::KzipReader(ZipHandle archive, absl::string_view root, 247 KzipEncoding encoding) 248 : archive_(std::move(archive)), 249 encoding_(encoding), 250 files_prefix_(absl::StrCat(root, "/files/")), 251 unit_prefix_(absl::StrCat(root, DirNameForEncoding(encoding))) {} 252 253 absl::StatusOr<proto::IndexedCompilation> KzipReader::ReadUnit( 254 absl::string_view digest) { 255 std::string path = absl::StrCat(unit_prefix_, digest); 256 257 if (auto file = ZipFile(zip_fopen(archive(), path.c_str(), 0))) { 258 proto::IndexedCompilation unit; 259 ZipFileInputStream input(file.get()); 260 absl::Status status; 261 if (encoding_ == KzipEncoding::kJson) { 262 status = ParseFromJsonStream(&input, &unit); 263 } else { 264 if (!unit.ParseFromZeroCopyStream(&input)) { 265 status = absl::InvalidArgumentError("Failure parsing proto unit"); 266 } 267 } 268 if (!status.ok()) { 269 absl::Status zip_status = 270 libzip::ToStatus(zip_file_get_error(file.get())); 271 if (!zip_status.ok()) { 272 // Prefer the underlying zip error, if present. 273 return zip_status; 274 } 275 return status; 276 } 277 return unit; 278 } 279 absl::Status status = libzip::ToStatus(zip_get_error(archive())); 280 if (!status.ok()) { 281 return status; 282 } 283 return absl::UnknownError(absl::StrCat("Unable to open unit ", digest)); 284 } 285 286 absl::StatusOr<std::string> KzipReader::ReadFile(absl::string_view digest) { 287 return ReadTextFile(archive(), absl::StrCat(files_prefix_, digest)); 288 } 289 290 absl::Status KzipReader::Scan(const ScanCallback& callback) { 291 for (int i = 0; i < zip_get_num_entries(archive(), 0); ++i) { 292 if (auto digest = UnitDigest(zip_get_name(archive(), i, 0))) { 293 if (!callback(*digest)) { 294 break; 295 } 296 } 297 } 298 return absl::OkStatus(); 299 } 300 301 } // namespace kythe