kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/kzip_writer.cc (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "kythe/cxx/common/kzip_writer.h" 18 19 #include <openssl/sha.h> 20 #include <zip.h> 21 22 #include <array> 23 #include <cstdlib> 24 #include <ctime> 25 #include <string> 26 #include <type_traits> 27 #include <vector> 28 29 #include "absl/log/check.h" 30 #include "absl/log/log.h" 31 #include "absl/memory/memory.h" 32 #include "absl/status/status.h" 33 #include "absl/status/statusor.h" 34 #include "absl/strings/ascii.h" 35 #include "absl/strings/escaping.h" 36 #include "absl/strings/str_cat.h" 37 #include "absl/strings/string_view.h" 38 #include "kythe/cxx/common/index_writer.h" 39 #include "kythe/cxx/common/json_proto.h" 40 #include "kythe/cxx/common/kzip_encoding.h" 41 #include "kythe/cxx/common/libzip/error.h" 42 #include "kythe/proto/analysis.pb.h" 43 44 namespace kythe { 45 namespace { 46 47 constexpr absl::string_view kRoot = "root/"; 48 constexpr absl::string_view kJsonUnitRoot = "root/units/"; 49 constexpr absl::string_view kProtoUnitRoot = "root/pbunits/"; 50 constexpr absl::string_view kFileRoot = "root/files/"; 51 // Set all file modified times to 0 so zip file diffs only show content diffs, 52 // not zip creation time diffs. 53 constexpr time_t kModTime = 0; 54 55 std::string SHA256Digest(absl::string_view content) { 56 std::array<unsigned char, SHA256_DIGEST_LENGTH> buf; 57 ::SHA256(reinterpret_cast<const unsigned char*>(content.data()), 58 content.size(), buf.data()); 59 return absl::BytesToHexString( 60 absl::string_view(reinterpret_cast<const char*>(buf.data()), buf.size())); 61 } 62 63 absl::Status WriteTextFile(zip_t* archive, const std::string& path, 64 absl::string_view content) { 65 if (auto source = 66 zip_source_buffer(archive, content.data(), content.size(), 0)) { 67 auto idx = zip_file_add(archive, path.c_str(), source, ZIP_FL_ENC_UTF_8); 68 if (idx >= 0) { 69 // If a file was added, set the last modified time. 70 if (zip_file_set_mtime(archive, idx, kModTime, 0) == 0) { 71 return absl::OkStatus(); 72 } 73 } 74 zip_source_free(source); 75 } 76 return libzip::ToStatus(zip_get_error(archive)); 77 } 78 79 absl::string_view Basename(absl::string_view path) { 80 auto pos = path.find_last_of('/'); 81 if (pos == absl::string_view::npos) { 82 return path; 83 } 84 return absl::ClippedSubstr(path, pos + 1); 85 } 86 87 bool HasEncoding(KzipEncoding lhs, KzipEncoding rhs) { 88 return static_cast<typename std::underlying_type<KzipEncoding>::type>(lhs) & 89 static_cast<typename std::underlying_type<KzipEncoding>::type>(rhs); 90 } 91 92 } // namespace 93 94 /* static */ 95 absl::StatusOr<IndexWriter> KzipWriter::Create(absl::string_view path, 96 KzipEncoding encoding) { 97 int error; 98 if (auto archive = 99 zip_open(std::string(path).c_str(), ZIP_CREATE | ZIP_EXCL, &error)) { 100 return IndexWriter(absl::WrapUnique(new KzipWriter(archive, encoding))); 101 } 102 return libzip::Error(error).ToStatus(); 103 } 104 105 /* static */ 106 absl::StatusOr<IndexWriter> KzipWriter::FromSource(zip_source_t* source, 107 KzipEncoding encoding, 108 const int flags) { 109 libzip::Error error; 110 if (auto archive = zip_open_from_source(source, flags, error.get())) { 111 return IndexWriter(absl::WrapUnique(new KzipWriter(archive, encoding))); 112 } 113 return error.ToStatus(); 114 } 115 116 KzipWriter::KzipWriter(zip_t* archive, KzipEncoding encoding) 117 : archive_(archive), encoding_(encoding) {} 118 119 KzipWriter::~KzipWriter() { 120 DCHECK(archive_ == nullptr) << "Disposing of open KzipWriter!"; 121 } 122 123 // Creates entries for the three directories if not already present. 124 absl::Status KzipWriter::InitializeArchive(zip_t* archive) { 125 std::vector<absl::string_view> dirs = {kRoot, kFileRoot}; 126 if (HasEncoding(encoding_, KzipEncoding::kJson)) { 127 dirs.push_back(kJsonUnitRoot); 128 } 129 if (HasEncoding(encoding_, KzipEncoding::kProto)) { 130 dirs.push_back(kProtoUnitRoot); 131 } 132 for (const auto& name : dirs) { 133 auto idx = zip_dir_add(archive, name.data(), ZIP_FL_ENC_UTF_8); 134 if (idx < 0) { 135 absl::Status status = libzip::ToStatus(zip_get_error(archive)); 136 zip_error_clear(archive); 137 return status; 138 } 139 if (zip_file_set_mtime(archive, idx, kModTime, 0) < 0) { 140 absl::Status status = libzip::ToStatus(zip_get_error(archive)); 141 zip_error_clear(archive); 142 return status; 143 } 144 } 145 return absl::OkStatus(); 146 } 147 148 absl::StatusOr<std::string> KzipWriter::WriteUnit( 149 const kythe::proto::IndexedCompilation& unit) { 150 if (!initialized_) { 151 auto status = InitializeArchive(archive_); 152 if (!status.ok()) { 153 return status; 154 } 155 initialized_ = true; 156 } 157 auto json = WriteMessageAsJsonToString(unit); 158 if (!json.ok()) { 159 return json.status(); 160 } 161 auto digest = SHA256Digest(*json); 162 absl::StatusOr<std::string> result = 163 absl::InternalError("unsupported encoding"); 164 if (HasEncoding(encoding_, KzipEncoding::kJson)) { 165 result = InsertFile(absl::StrCat(kJsonUnitRoot, digest), *json); 166 if (!result.ok()) { 167 return result; 168 } 169 } 170 if (HasEncoding(encoding_, KzipEncoding::kProto)) { 171 std::string contents; 172 if (!unit.SerializeToString(&contents)) { 173 return absl::InternalError("Failure serializing compilation unit"); 174 } 175 result = InsertFile(absl::StrCat(kProtoUnitRoot, digest), contents); 176 } 177 return result; 178 } 179 180 absl::StatusOr<std::string> KzipWriter::WriteFile(absl::string_view content) { 181 if (!initialized_) { 182 auto status = InitializeArchive(archive_); 183 if (!status.ok()) { 184 return status; 185 } 186 initialized_ = true; 187 } 188 return InsertFile(absl::StrCat(kFileRoot, SHA256Digest(content)), content); 189 } 190 191 absl::Status KzipWriter::Close() { 192 DCHECK(archive_ != nullptr); 193 194 absl::Status result = absl::OkStatus(); 195 if (zip_close(archive_) != 0) { 196 result = libzip::ToStatus(zip_get_error(archive_)); 197 zip_discard(archive_); 198 } 199 200 archive_ = nullptr; 201 contents_.clear(); 202 return result; 203 } 204 205 absl::StatusOr<std::string> KzipWriter::InsertFile(absl::string_view path, 206 absl::string_view content) { 207 // Initially insert an empty string for the file content. 208 auto insertion = contents_.emplace(std::string(path), ""); 209 if (insertion.second) { 210 // Only copy in the real content if it was actually inserted into the map. 211 auto& entry = insertion.first; 212 entry->second = std::string(content); 213 auto status = WriteTextFile(archive_, entry->first, entry->second); 214 if (!status.ok()) { 215 contents_.erase(entry->first); 216 return status; 217 } 218 } 219 return std::string(Basename(path)); 220 } 221 222 /* static */ 223 KzipEncoding KzipWriter::DefaultEncoding() { 224 if (const char* env_enc = getenv("KYTHE_KZIP_ENCODING")) { 225 std::string enc = absl::AsciiStrToUpper(env_enc); 226 if (enc == "JSON") { 227 return KzipEncoding::kJson; 228 } 229 if (enc == "PROTO") { 230 return KzipEncoding::kProto; 231 } 232 if (enc == "ALL") { 233 return KzipEncoding::kAll; 234 } 235 LOG(ERROR) << "Unknown encoding '" << enc << "', using PROTO"; 236 } 237 return KzipEncoding::kProto; 238 } 239 240 } // namespace kythe