github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/merkletree/merkletree.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package merkletree implements Merkle tree generating and verification. 16 package merkletree 17 18 import ( 19 "bytes" 20 "crypto/sha256" 21 "crypto/sha512" 22 "encoding/gob" 23 "fmt" 24 "io" 25 26 "github.com/SagerNet/gvisor/pkg/abi/linux" 27 28 "github.com/SagerNet/gvisor/pkg/hostarch" 29 ) 30 31 const ( 32 // sha256DigestSize specifies the digest size of a SHA256 hash. 33 sha256DigestSize = 32 34 // sha512DigestSize specifies the digest size of a SHA512 hash. 35 sha512DigestSize = 64 36 ) 37 38 // DigestSize returns the size (in bytes) of a digest. 39 func DigestSize(hashAlgorithm int) int { 40 switch hashAlgorithm { 41 case linux.FS_VERITY_HASH_ALG_SHA256: 42 return sha256DigestSize 43 case linux.FS_VERITY_HASH_ALG_SHA512: 44 return sha512DigestSize 45 default: 46 return -1 47 } 48 } 49 50 // Layout defines the scale of a Merkle tree. 51 type Layout struct { 52 // blockSize is the size of a data block to be hashed. 53 blockSize int64 54 // digestSize is the size of a generated hash. 55 digestSize int64 56 // levelOffset contains the offset of the beginning of each level in 57 // bytes. The number of levels in the tree is the length of the slice. 58 // The leaf nodes (level 0) contain hashes of blocks of the input data. 59 // Each level N contains hashes of the blocks in level N-1. The highest 60 // level is the root hash. 61 levelOffset []int64 62 } 63 64 // InitLayout initializes and returns a new Layout object describing the structure 65 // of a tree. dataSize specifies the size of input data in bytes. 66 func InitLayout(dataSize int64, hashAlgorithms int, dataAndTreeInSameFile bool) (Layout, error) { 67 layout := Layout{ 68 blockSize: hostarch.PageSize, 69 } 70 71 switch hashAlgorithms { 72 case linux.FS_VERITY_HASH_ALG_SHA256: 73 layout.digestSize = sha256DigestSize 74 case linux.FS_VERITY_HASH_ALG_SHA512: 75 layout.digestSize = sha512DigestSize 76 default: 77 return Layout{}, fmt.Errorf("unexpected hash algorithms") 78 } 79 80 // treeStart is the offset (in bytes) of the first level of the tree in 81 // the file. If data and tree are in different files, treeStart should 82 // be zero. If data is in the same file as the tree, treeStart points 83 // to the block after the last data block (which may be zero-padded). 84 var treeStart int64 85 if dataAndTreeInSameFile { 86 treeStart = dataSize 87 if dataSize%layout.blockSize != 0 { 88 treeStart += layout.blockSize - dataSize%layout.blockSize 89 } 90 } 91 92 numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize 93 level := 0 94 offset := int64(0) 95 96 // Calculate the number of levels in the Merkle tree and the beginning 97 // offset of each level. Level 0 consists of the leaf nodes that 98 // contain the hashes of the data blocks, while level numLevels - 1 is 99 // the root. 100 for numBlocks > 1 { 101 layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize) 102 // Round numBlocks up to fill up a block. 103 numBlocks += (layout.hashesPerBlock() - numBlocks%layout.hashesPerBlock()) % layout.hashesPerBlock() 104 offset += numBlocks / layout.hashesPerBlock() 105 numBlocks = numBlocks / layout.hashesPerBlock() 106 level++ 107 } 108 layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize) 109 110 return layout, nil 111 } 112 113 // hashesPerBlock() returns the number of digests in each block. For example, 114 // if blockSize is 4096 bytes, and digestSize is 32 bytes, there will be 128 115 // hashesPerBlock. Therefore 128 hashes in one level will be combined in one 116 // hash in the level above. 117 func (layout Layout) hashesPerBlock() int64 { 118 return layout.blockSize / layout.digestSize 119 } 120 121 // numLevels returns the total number of levels in the Merkle tree. 122 func (layout Layout) numLevels() int { 123 return len(layout.levelOffset) 124 } 125 126 // rootLevel returns the level of the root hash. 127 func (layout Layout) rootLevel() int { 128 return layout.numLevels() - 1 129 } 130 131 // digestOffset finds the offset of a digest from the beginning of the tree. 132 // The target digest is at level of the tree, with index from the beginning of 133 // the current level. 134 func (layout Layout) digestOffset(level int, index int64) int64 { 135 return layout.levelOffset[level] + index*layout.digestSize 136 } 137 138 // blockOffset finds the offset of a block from the beginning of the tree. The 139 // target block is at level of the tree, with index from the beginning of the 140 // current level. 141 func (layout Layout) blockOffset(level int, index int64) int64 { 142 return layout.levelOffset[level] + index*layout.blockSize 143 } 144 145 // VerityDescriptor is a struct that is serialized and hashed to get a file's 146 // root hash, which contains the root hash of the raw content and the file's 147 // meatadata. 148 type VerityDescriptor struct { 149 Name string 150 FileSize int64 151 Mode uint32 152 UID uint32 153 GID uint32 154 Children []string 155 SymlinkTarget string 156 RootHash []byte 157 } 158 159 func (d *VerityDescriptor) encode() []byte { 160 b := new(bytes.Buffer) 161 e := gob.NewEncoder(b) 162 e.Encode(d) 163 return b.Bytes() 164 } 165 166 // verify generates a hash from d, and compares it with expected. 167 func (d *VerityDescriptor) verify(expected []byte, hashAlgorithms int) error { 168 h, err := hashData(d.encode(), hashAlgorithms) 169 if err != nil { 170 return err 171 } 172 if !bytes.Equal(h[:], expected) { 173 return fmt.Errorf("unexpected root hash") 174 } 175 return nil 176 177 } 178 179 // hashData hashes data and returns the result hash based on the hash 180 // algorithms. 181 func hashData(data []byte, hashAlgorithms int) ([]byte, error) { 182 var digest []byte 183 switch hashAlgorithms { 184 case linux.FS_VERITY_HASH_ALG_SHA256: 185 digestArray := sha256.Sum256(data) 186 digest = digestArray[:] 187 case linux.FS_VERITY_HASH_ALG_SHA512: 188 digestArray := sha512.Sum512(data) 189 digest = digestArray[:] 190 default: 191 return nil, fmt.Errorf("unexpected hash algorithms") 192 } 193 return digest, nil 194 } 195 196 // GenerateParams contains the parameters used to generate a Merkle tree for a 197 // given file. 198 type GenerateParams struct { 199 // File is a reader of the file to be hashed. 200 File io.ReaderAt 201 // Size is the size of the file. 202 Size int64 203 // Name is the name of the target file. 204 Name string 205 // Mode is the mode of the target file. 206 Mode uint32 207 // UID is the user ID of the target file. 208 UID uint32 209 // GID is the group ID of the target file. 210 GID uint32 211 // Children is a map of children names for a directory. It should be 212 // empty for a regular file. 213 Children []string 214 // SymlinkTarget is the target path of a symlink file, or "" if the file is not a symlink. 215 SymlinkTarget string 216 // HashAlgorithms is the algorithms used to hash data. 217 HashAlgorithms int 218 // TreeReader is a reader for the Merkle tree. 219 TreeReader io.ReaderAt 220 // TreeWriter is a writer for the Merkle tree. 221 TreeWriter io.Writer 222 // DataAndTreeInSameFile is true if data and Merkle tree are in the same 223 // file, or false if Merkle tree is a separate file from data. 224 DataAndTreeInSameFile bool 225 } 226 227 // Generate constructs a Merkle tree for the contents of params.File. The 228 // output is written to params.TreeWriter. 229 // 230 // Generate returns a hash of a VerityDescriptor, which contains the file 231 // metadata and the hash from file content. 232 func Generate(params *GenerateParams) ([]byte, error) { 233 descriptor := VerityDescriptor{ 234 FileSize: params.Size, 235 Name: params.Name, 236 Mode: params.Mode, 237 UID: params.UID, 238 GID: params.GID, 239 Children: params.Children, 240 SymlinkTarget: params.SymlinkTarget, 241 } 242 243 // If file is a symlink do not generate root hash for file content. 244 if params.SymlinkTarget != "" { 245 return hashData(descriptor.encode(), params.HashAlgorithms) 246 } 247 248 layout, err := InitLayout(params.Size, params.HashAlgorithms, params.DataAndTreeInSameFile) 249 if err != nil { 250 return nil, err 251 } 252 253 numBlocks := (params.Size + layout.blockSize - 1) / layout.blockSize 254 255 // If the data is in the same file as the tree, zero pad the last data 256 // block. 257 bytesInLastBlock := params.Size % layout.blockSize 258 if params.DataAndTreeInSameFile && bytesInLastBlock != 0 { 259 zeroBuf := make([]byte, layout.blockSize-bytesInLastBlock) 260 if _, err := params.TreeWriter.Write(zeroBuf); err != nil { 261 return nil, err 262 } 263 } 264 265 var root []byte 266 for level := 0; level < layout.numLevels(); level++ { 267 for i := int64(0); i < numBlocks; i++ { 268 buf := make([]byte, layout.blockSize) 269 var ( 270 n int 271 err error 272 ) 273 if level == 0 { 274 // Read data block from the target file since level 0 includes hashes 275 // of blocks in the input data. 276 n, err = params.File.ReadAt(buf, i*layout.blockSize) 277 } else { 278 // Read data block from the tree file since levels higher than 0 are 279 // hashing the lower level hashes. 280 n, err = params.TreeReader.ReadAt(buf, layout.blockOffset(level-1, i)) 281 } 282 283 // err is populated as long as the bytes read is smaller than the buffer 284 // size. This could be the case if we are reading the last block, and 285 // break in that case. If this is the last block, the end of the block 286 // will be zero-padded. 287 if n == 0 && err == io.EOF { 288 break 289 } else if err != nil && err != io.EOF { 290 return nil, err 291 } 292 // Hash the bytes in buf. 293 digest, err := hashData(buf, params.HashAlgorithms) 294 if err != nil { 295 return nil, err 296 } 297 298 if level == layout.rootLevel() { 299 root = digest 300 } 301 302 // Write the generated hash to the end of the tree file. 303 if _, err = params.TreeWriter.Write(digest[:]); err != nil { 304 return nil, err 305 } 306 } 307 // If the generated digests do not round up to a block, zero-padding the 308 // remaining of the last block. But no need to do so for root. 309 if level != layout.rootLevel() && numBlocks%layout.hashesPerBlock() != 0 { 310 zeroBuf := make([]byte, layout.blockSize-(numBlocks%layout.hashesPerBlock())*layout.digestSize) 311 if _, err := params.TreeWriter.Write(zeroBuf[:]); err != nil { 312 return nil, err 313 } 314 } 315 numBlocks = (numBlocks + layout.hashesPerBlock() - 1) / layout.hashesPerBlock() 316 } 317 descriptor.RootHash = root 318 return hashData(descriptor.encode(), params.HashAlgorithms) 319 } 320 321 // VerifyParams contains the params used to verify a portion of a file against 322 // a Merkle tree. 323 type VerifyParams struct { 324 // Out will be filled with verified data. 325 Out io.Writer 326 // File is a handler on the file to be verified. 327 File io.ReaderAt 328 // tree is a handler on the Merkle tree used to verify file. 329 Tree io.ReaderAt 330 // Size is the size of the file. 331 Size int64 332 // Name is the name of the target file. 333 Name string 334 // Mode is the mode of the target file. 335 Mode uint32 336 // UID is the user ID of the target file. 337 UID uint32 338 // GID is the group ID of the target file. 339 GID uint32 340 // Children is a map of children names for a directory. It should be 341 // empty for a regular file. 342 Children []string 343 // SymlinkTarget is the target path of a symlink file, or "" if the file is not a symlink. 344 SymlinkTarget string 345 // HashAlgorithms is the algorithms used to hash data. 346 HashAlgorithms int 347 // ReadOffset is the offset of the data range to be verified. 348 ReadOffset int64 349 // ReadSize is the size of the data range to be verified. 350 ReadSize int64 351 // Expected is a trusted hash for the file. It is compared with the 352 // calculated root hash to verify the content. 353 Expected []byte 354 // DataAndTreeInSameFile is true if data and Merkle tree are in the same 355 // file, or false if Merkle tree is a separate file from data. 356 DataAndTreeInSameFile bool 357 } 358 359 // verifyMetadata verifies the metadata by hashing a descriptor that contains 360 // the metadata and compare the generated hash with expected. 361 // 362 // For verifyMetadata, params.data is not needed. It only accesses params.tree 363 // for the raw root hash. 364 func verifyMetadata(params *VerifyParams, layout *Layout) error { 365 var root []byte 366 // Only read the root hash if we expect that the file is not a symlink and its 367 // Merkle tree file is non-empty. 368 if params.Size != 0 && params.SymlinkTarget == "" { 369 root = make([]byte, layout.digestSize) 370 if _, err := params.Tree.ReadAt(root, layout.blockOffset(layout.rootLevel(), 0 /* index */)); err != nil { 371 return fmt.Errorf("failed to read root hash: %w", err) 372 } 373 } 374 descriptor := VerityDescriptor{ 375 Name: params.Name, 376 FileSize: params.Size, 377 Mode: params.Mode, 378 UID: params.UID, 379 GID: params.GID, 380 Children: params.Children, 381 SymlinkTarget: params.SymlinkTarget, 382 RootHash: root, 383 } 384 return descriptor.verify(params.Expected, params.HashAlgorithms) 385 } 386 387 // Verify verifies the content read from data with offset. The content is 388 // verified against tree. If content spans across multiple blocks, each block is 389 // verified. Verification fails if the hash of the data does not match the tree 390 // at any level, or if the final root hash does not match expected. 391 // Once the data is verified, it will be written using params.Out. 392 // 393 // Verify checks for both target file content and metadata. If readSize is 0, 394 // only metadata is checked. 395 func Verify(params *VerifyParams) (int64, error) { 396 if params.ReadSize < 0 { 397 return 0, fmt.Errorf("unexpected read size: %d", params.ReadSize) 398 } 399 layout, err := InitLayout(int64(params.Size), params.HashAlgorithms, params.DataAndTreeInSameFile) 400 if err != nil { 401 return 0, err 402 } 403 if params.ReadSize == 0 { 404 return 0, verifyMetadata(params, &layout) 405 } 406 407 // Calculate the index of blocks that includes the target range in input 408 // data. 409 firstDataBlock := params.ReadOffset / layout.blockSize 410 lastDataBlock := (params.ReadOffset + params.ReadSize - 1) / layout.blockSize 411 412 buf := make([]byte, layout.blockSize) 413 var readErr error 414 total := int64(0) 415 for i := firstDataBlock; i <= lastDataBlock; i++ { 416 // Read a block that includes all or part of target range in 417 // input data. 418 bytesRead, err := params.File.ReadAt(buf, i*layout.blockSize) 419 readErr = err 420 // If at the end of input data and all previous blocks are 421 // verified, return the verified input data and EOF. 422 if readErr == io.EOF && bytesRead == 0 { 423 break 424 } 425 if readErr != nil && readErr != io.EOF { 426 return 0, fmt.Errorf("read from data failed: %w", err) 427 } 428 // If this is the end of file, zero the remaining bytes in buf, 429 // otherwise they are still from the previous block. 430 if bytesRead < len(buf) { 431 for j := bytesRead; j < len(buf); j++ { 432 buf[j] = 0 433 } 434 } 435 descriptor := VerityDescriptor{ 436 Name: params.Name, 437 FileSize: params.Size, 438 Mode: params.Mode, 439 UID: params.UID, 440 GID: params.GID, 441 SymlinkTarget: params.SymlinkTarget, 442 Children: params.Children, 443 } 444 if err := verifyBlock(params.Tree, &descriptor, &layout, buf, i, params.HashAlgorithms, params.Expected); err != nil { 445 return 0, err 446 } 447 448 // startOff is the beginning of the read range within the 449 // current data block. Note that for all blocks other than the 450 // first, startOff should be 0. 451 startOff := int64(0) 452 if i == firstDataBlock { 453 startOff = params.ReadOffset % layout.blockSize 454 } 455 // endOff is the end of the read range within the current data 456 // block. Note that for all blocks other than the last, endOff 457 // should be the block size. 458 endOff := layout.blockSize 459 if i == lastDataBlock { 460 endOff = (params.ReadOffset+params.ReadSize-1)%layout.blockSize + 1 461 } 462 // If the provided size exceeds the end of input data, we should 463 // only copy the parts in buf that's part of input data. 464 if startOff > int64(bytesRead) { 465 startOff = int64(bytesRead) 466 } 467 if endOff > int64(bytesRead) { 468 endOff = int64(bytesRead) 469 } 470 n, err := params.Out.Write(buf[startOff:endOff]) 471 if err != nil { 472 return total, err 473 } 474 total += int64(n) 475 476 } 477 return total, readErr 478 } 479 480 // verifyBlock verifies a block against tree. index is the number of block in 481 // original data. The block is verified through each level of the tree. It 482 // fails if the calculated hash from block is different from any level of 483 // hashes stored in tree. And the final root hash is compared with 484 // expected. 485 func verifyBlock(tree io.ReaderAt, descriptor *VerityDescriptor, layout *Layout, dataBlock []byte, blockIndex int64, hashAlgorithms int, expected []byte) error { 486 if len(dataBlock) != int(layout.blockSize) { 487 return fmt.Errorf("incorrect block size") 488 } 489 490 expectedDigest := make([]byte, layout.digestSize) 491 treeBlock := make([]byte, layout.blockSize) 492 var digest []byte 493 for level := 0; level < layout.numLevels(); level++ { 494 // Calculate hash. 495 if level == 0 { 496 h, err := hashData(dataBlock, hashAlgorithms) 497 if err != nil { 498 return err 499 } 500 digest = h 501 } else { 502 // Read a block in previous level that contains the 503 // hash we just generated, and generate a next level 504 // hash from it. 505 if _, err := tree.ReadAt(treeBlock, layout.blockOffset(level-1, blockIndex)); err != nil { 506 return err 507 } 508 h, err := hashData(treeBlock, hashAlgorithms) 509 if err != nil { 510 return err 511 } 512 digest = h 513 } 514 515 // Read the digest for the current block and store in 516 // expectedDigest. 517 if _, err := tree.ReadAt(expectedDigest, layout.digestOffset(level, blockIndex)); err != nil { 518 return err 519 } 520 521 if !bytes.Equal(digest, expectedDigest) { 522 return fmt.Errorf("verification failed") 523 } 524 blockIndex = blockIndex / layout.hashesPerBlock() 525 } 526 527 // Verification for the tree succeeded. Now hash the descriptor with 528 // the root hash and compare it with expected. 529 descriptor.RootHash = digest 530 return descriptor.verify(expected, hashAlgorithms) 531 }