github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/merkletree/merkletree.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package merkletree implements Merkle tree generating and verification.
    16  package merkletree
    17  
    18  import (
    19  	"bytes"
    20  	"crypto/sha256"
    21  	"crypto/sha512"
    22  	"encoding/gob"
    23  	"fmt"
    24  	"io"
    25  
    26  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    27  
    28  	"github.com/SagerNet/gvisor/pkg/hostarch"
    29  )
    30  
    31  const (
    32  	// sha256DigestSize specifies the digest size of a SHA256 hash.
    33  	sha256DigestSize = 32
    34  	// sha512DigestSize specifies the digest size of a SHA512 hash.
    35  	sha512DigestSize = 64
    36  )
    37  
    38  // DigestSize returns the size (in bytes) of a digest.
    39  func DigestSize(hashAlgorithm int) int {
    40  	switch hashAlgorithm {
    41  	case linux.FS_VERITY_HASH_ALG_SHA256:
    42  		return sha256DigestSize
    43  	case linux.FS_VERITY_HASH_ALG_SHA512:
    44  		return sha512DigestSize
    45  	default:
    46  		return -1
    47  	}
    48  }
    49  
    50  // Layout defines the scale of a Merkle tree.
    51  type Layout struct {
    52  	// blockSize is the size of a data block to be hashed.
    53  	blockSize int64
    54  	// digestSize is the size of a generated hash.
    55  	digestSize int64
    56  	// levelOffset contains the offset of the beginning of each level in
    57  	// bytes. The number of levels in the tree is the length of the slice.
    58  	// The leaf nodes (level 0) contain hashes of blocks of the input data.
    59  	// Each level N contains hashes of the blocks in level N-1. The highest
    60  	// level is the root hash.
    61  	levelOffset []int64
    62  }
    63  
    64  // InitLayout initializes and returns a new Layout object describing the structure
    65  // of a tree. dataSize specifies the size of input data in bytes.
    66  func InitLayout(dataSize int64, hashAlgorithms int, dataAndTreeInSameFile bool) (Layout, error) {
    67  	layout := Layout{
    68  		blockSize: hostarch.PageSize,
    69  	}
    70  
    71  	switch hashAlgorithms {
    72  	case linux.FS_VERITY_HASH_ALG_SHA256:
    73  		layout.digestSize = sha256DigestSize
    74  	case linux.FS_VERITY_HASH_ALG_SHA512:
    75  		layout.digestSize = sha512DigestSize
    76  	default:
    77  		return Layout{}, fmt.Errorf("unexpected hash algorithms")
    78  	}
    79  
    80  	// treeStart is the offset (in bytes) of the first level of the tree in
    81  	// the file. If data and tree are in different files, treeStart should
    82  	// be zero. If data is in the same file as the tree, treeStart points
    83  	// to the block after the last data block (which may be zero-padded).
    84  	var treeStart int64
    85  	if dataAndTreeInSameFile {
    86  		treeStart = dataSize
    87  		if dataSize%layout.blockSize != 0 {
    88  			treeStart += layout.blockSize - dataSize%layout.blockSize
    89  		}
    90  	}
    91  
    92  	numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize
    93  	level := 0
    94  	offset := int64(0)
    95  
    96  	// Calculate the number of levels in the Merkle tree and the beginning
    97  	// offset of each level. Level 0 consists of the leaf nodes that
    98  	// contain the hashes of the data blocks, while level numLevels - 1 is
    99  	// the root.
   100  	for numBlocks > 1 {
   101  		layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize)
   102  		// Round numBlocks up to fill up a block.
   103  		numBlocks += (layout.hashesPerBlock() - numBlocks%layout.hashesPerBlock()) % layout.hashesPerBlock()
   104  		offset += numBlocks / layout.hashesPerBlock()
   105  		numBlocks = numBlocks / layout.hashesPerBlock()
   106  		level++
   107  	}
   108  	layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize)
   109  
   110  	return layout, nil
   111  }
   112  
   113  // hashesPerBlock() returns the number of digests in each block.  For example,
   114  // if blockSize is 4096 bytes, and digestSize is 32 bytes, there will be 128
   115  // hashesPerBlock. Therefore 128 hashes in one level will be combined in one
   116  // hash in the level above.
   117  func (layout Layout) hashesPerBlock() int64 {
   118  	return layout.blockSize / layout.digestSize
   119  }
   120  
   121  // numLevels returns the total number of levels in the Merkle tree.
   122  func (layout Layout) numLevels() int {
   123  	return len(layout.levelOffset)
   124  }
   125  
   126  // rootLevel returns the level of the root hash.
   127  func (layout Layout) rootLevel() int {
   128  	return layout.numLevels() - 1
   129  }
   130  
   131  // digestOffset finds the offset of a digest from the beginning of the tree.
   132  // The target digest is at level of the tree, with index from the beginning of
   133  // the current level.
   134  func (layout Layout) digestOffset(level int, index int64) int64 {
   135  	return layout.levelOffset[level] + index*layout.digestSize
   136  }
   137  
   138  // blockOffset finds the offset of a block from the beginning of the tree.  The
   139  // target block is at level of the tree, with index from the beginning of the
   140  // current level.
   141  func (layout Layout) blockOffset(level int, index int64) int64 {
   142  	return layout.levelOffset[level] + index*layout.blockSize
   143  }
   144  
   145  // VerityDescriptor is a struct that is serialized and hashed to get a file's
   146  // root hash, which contains the root hash of the raw content and the file's
   147  // meatadata.
   148  type VerityDescriptor struct {
   149  	Name          string
   150  	FileSize      int64
   151  	Mode          uint32
   152  	UID           uint32
   153  	GID           uint32
   154  	Children      []string
   155  	SymlinkTarget string
   156  	RootHash      []byte
   157  }
   158  
   159  func (d *VerityDescriptor) encode() []byte {
   160  	b := new(bytes.Buffer)
   161  	e := gob.NewEncoder(b)
   162  	e.Encode(d)
   163  	return b.Bytes()
   164  }
   165  
   166  // verify generates a hash from d, and compares it with expected.
   167  func (d *VerityDescriptor) verify(expected []byte, hashAlgorithms int) error {
   168  	h, err := hashData(d.encode(), hashAlgorithms)
   169  	if err != nil {
   170  		return err
   171  	}
   172  	if !bytes.Equal(h[:], expected) {
   173  		return fmt.Errorf("unexpected root hash")
   174  	}
   175  	return nil
   176  
   177  }
   178  
   179  // hashData hashes data and returns the result hash based on the hash
   180  // algorithms.
   181  func hashData(data []byte, hashAlgorithms int) ([]byte, error) {
   182  	var digest []byte
   183  	switch hashAlgorithms {
   184  	case linux.FS_VERITY_HASH_ALG_SHA256:
   185  		digestArray := sha256.Sum256(data)
   186  		digest = digestArray[:]
   187  	case linux.FS_VERITY_HASH_ALG_SHA512:
   188  		digestArray := sha512.Sum512(data)
   189  		digest = digestArray[:]
   190  	default:
   191  		return nil, fmt.Errorf("unexpected hash algorithms")
   192  	}
   193  	return digest, nil
   194  }
   195  
   196  // GenerateParams contains the parameters used to generate a Merkle tree for a
   197  // given file.
   198  type GenerateParams struct {
   199  	// File is a reader of the file to be hashed.
   200  	File io.ReaderAt
   201  	// Size is the size of the file.
   202  	Size int64
   203  	// Name is the name of the target file.
   204  	Name string
   205  	// Mode is the mode of the target file.
   206  	Mode uint32
   207  	// UID is the user ID of the target file.
   208  	UID uint32
   209  	// GID is the group ID of the target file.
   210  	GID uint32
   211  	// Children is a map of children names for a directory. It should be
   212  	// empty for a regular file.
   213  	Children []string
   214  	// SymlinkTarget is the target path of a symlink file, or "" if the file is not a symlink.
   215  	SymlinkTarget string
   216  	// HashAlgorithms is the algorithms used to hash data.
   217  	HashAlgorithms int
   218  	// TreeReader is a reader for the Merkle tree.
   219  	TreeReader io.ReaderAt
   220  	// TreeWriter is a writer for the Merkle tree.
   221  	TreeWriter io.Writer
   222  	// DataAndTreeInSameFile is true if data and Merkle tree are in the same
   223  	// file, or false if Merkle tree is a separate file from data.
   224  	DataAndTreeInSameFile bool
   225  }
   226  
   227  // Generate constructs a Merkle tree for the contents of params.File. The
   228  // output is written to params.TreeWriter.
   229  //
   230  // Generate returns a hash of a VerityDescriptor, which contains the file
   231  // metadata and the hash from file content.
   232  func Generate(params *GenerateParams) ([]byte, error) {
   233  	descriptor := VerityDescriptor{
   234  		FileSize:      params.Size,
   235  		Name:          params.Name,
   236  		Mode:          params.Mode,
   237  		UID:           params.UID,
   238  		GID:           params.GID,
   239  		Children:      params.Children,
   240  		SymlinkTarget: params.SymlinkTarget,
   241  	}
   242  
   243  	// If file is a symlink do not generate root hash for file content.
   244  	if params.SymlinkTarget != "" {
   245  		return hashData(descriptor.encode(), params.HashAlgorithms)
   246  	}
   247  
   248  	layout, err := InitLayout(params.Size, params.HashAlgorithms, params.DataAndTreeInSameFile)
   249  	if err != nil {
   250  		return nil, err
   251  	}
   252  
   253  	numBlocks := (params.Size + layout.blockSize - 1) / layout.blockSize
   254  
   255  	// If the data is in the same file as the tree, zero pad the last data
   256  	// block.
   257  	bytesInLastBlock := params.Size % layout.blockSize
   258  	if params.DataAndTreeInSameFile && bytesInLastBlock != 0 {
   259  		zeroBuf := make([]byte, layout.blockSize-bytesInLastBlock)
   260  		if _, err := params.TreeWriter.Write(zeroBuf); err != nil {
   261  			return nil, err
   262  		}
   263  	}
   264  
   265  	var root []byte
   266  	for level := 0; level < layout.numLevels(); level++ {
   267  		for i := int64(0); i < numBlocks; i++ {
   268  			buf := make([]byte, layout.blockSize)
   269  			var (
   270  				n   int
   271  				err error
   272  			)
   273  			if level == 0 {
   274  				// Read data block from the target file since level 0 includes hashes
   275  				// of blocks in the input data.
   276  				n, err = params.File.ReadAt(buf, i*layout.blockSize)
   277  			} else {
   278  				// Read data block from the tree file since levels higher than 0 are
   279  				// hashing the lower level hashes.
   280  				n, err = params.TreeReader.ReadAt(buf, layout.blockOffset(level-1, i))
   281  			}
   282  
   283  			// err is populated as long as the bytes read is smaller than the buffer
   284  			// size. This could be the case if we are reading the last block, and
   285  			// break in that case. If this is the last block, the end of the block
   286  			// will be zero-padded.
   287  			if n == 0 && err == io.EOF {
   288  				break
   289  			} else if err != nil && err != io.EOF {
   290  				return nil, err
   291  			}
   292  			// Hash the bytes in buf.
   293  			digest, err := hashData(buf, params.HashAlgorithms)
   294  			if err != nil {
   295  				return nil, err
   296  			}
   297  
   298  			if level == layout.rootLevel() {
   299  				root = digest
   300  			}
   301  
   302  			// Write the generated hash to the end of the tree file.
   303  			if _, err = params.TreeWriter.Write(digest[:]); err != nil {
   304  				return nil, err
   305  			}
   306  		}
   307  		// If the generated digests do not round up to a block, zero-padding the
   308  		// remaining of the last block. But no need to do so for root.
   309  		if level != layout.rootLevel() && numBlocks%layout.hashesPerBlock() != 0 {
   310  			zeroBuf := make([]byte, layout.blockSize-(numBlocks%layout.hashesPerBlock())*layout.digestSize)
   311  			if _, err := params.TreeWriter.Write(zeroBuf[:]); err != nil {
   312  				return nil, err
   313  			}
   314  		}
   315  		numBlocks = (numBlocks + layout.hashesPerBlock() - 1) / layout.hashesPerBlock()
   316  	}
   317  	descriptor.RootHash = root
   318  	return hashData(descriptor.encode(), params.HashAlgorithms)
   319  }
   320  
   321  // VerifyParams contains the params used to verify a portion of a file against
   322  // a Merkle tree.
   323  type VerifyParams struct {
   324  	// Out will be filled with verified data.
   325  	Out io.Writer
   326  	// File is a handler on the file to be verified.
   327  	File io.ReaderAt
   328  	// tree is a handler on the Merkle tree used to verify file.
   329  	Tree io.ReaderAt
   330  	// Size is the size of the file.
   331  	Size int64
   332  	// Name is the name of the target file.
   333  	Name string
   334  	// Mode is the mode of the target file.
   335  	Mode uint32
   336  	// UID is the user ID of the target file.
   337  	UID uint32
   338  	// GID is the group ID of the target file.
   339  	GID uint32
   340  	// Children is a map of children names for a directory. It should be
   341  	// empty for a regular file.
   342  	Children []string
   343  	// SymlinkTarget is the target path of a symlink file, or "" if the file is not a symlink.
   344  	SymlinkTarget string
   345  	// HashAlgorithms is the algorithms used to hash data.
   346  	HashAlgorithms int
   347  	// ReadOffset is the offset of the data range to be verified.
   348  	ReadOffset int64
   349  	// ReadSize is the size of the data range to be verified.
   350  	ReadSize int64
   351  	// Expected is a trusted hash for the file. It is compared with the
   352  	// calculated root hash to verify the content.
   353  	Expected []byte
   354  	// DataAndTreeInSameFile is true if data and Merkle tree are in the same
   355  	// file, or false if Merkle tree is a separate file from data.
   356  	DataAndTreeInSameFile bool
   357  }
   358  
   359  // verifyMetadata verifies the metadata by hashing a descriptor that contains
   360  // the metadata and compare the generated hash with expected.
   361  //
   362  // For verifyMetadata, params.data is not needed. It only accesses params.tree
   363  // for the raw root hash.
   364  func verifyMetadata(params *VerifyParams, layout *Layout) error {
   365  	var root []byte
   366  	// Only read the root hash if we expect that the file is not a symlink and its
   367  	// Merkle tree file is non-empty.
   368  	if params.Size != 0 && params.SymlinkTarget == "" {
   369  		root = make([]byte, layout.digestSize)
   370  		if _, err := params.Tree.ReadAt(root, layout.blockOffset(layout.rootLevel(), 0 /* index */)); err != nil {
   371  			return fmt.Errorf("failed to read root hash: %w", err)
   372  		}
   373  	}
   374  	descriptor := VerityDescriptor{
   375  		Name:          params.Name,
   376  		FileSize:      params.Size,
   377  		Mode:          params.Mode,
   378  		UID:           params.UID,
   379  		GID:           params.GID,
   380  		Children:      params.Children,
   381  		SymlinkTarget: params.SymlinkTarget,
   382  		RootHash:      root,
   383  	}
   384  	return descriptor.verify(params.Expected, params.HashAlgorithms)
   385  }
   386  
   387  // Verify verifies the content read from data with offset. The content is
   388  // verified against tree. If content spans across multiple blocks, each block is
   389  // verified. Verification fails if the hash of the data does not match the tree
   390  // at any level, or if the final root hash does not match expected.
   391  // Once the data is verified, it will be written using params.Out.
   392  //
   393  // Verify checks for both target file content and metadata. If readSize is 0,
   394  // only metadata is checked.
   395  func Verify(params *VerifyParams) (int64, error) {
   396  	if params.ReadSize < 0 {
   397  		return 0, fmt.Errorf("unexpected read size: %d", params.ReadSize)
   398  	}
   399  	layout, err := InitLayout(int64(params.Size), params.HashAlgorithms, params.DataAndTreeInSameFile)
   400  	if err != nil {
   401  		return 0, err
   402  	}
   403  	if params.ReadSize == 0 {
   404  		return 0, verifyMetadata(params, &layout)
   405  	}
   406  
   407  	// Calculate the index of blocks that includes the target range in input
   408  	// data.
   409  	firstDataBlock := params.ReadOffset / layout.blockSize
   410  	lastDataBlock := (params.ReadOffset + params.ReadSize - 1) / layout.blockSize
   411  
   412  	buf := make([]byte, layout.blockSize)
   413  	var readErr error
   414  	total := int64(0)
   415  	for i := firstDataBlock; i <= lastDataBlock; i++ {
   416  		// Read a block that includes all or part of target range in
   417  		// input data.
   418  		bytesRead, err := params.File.ReadAt(buf, i*layout.blockSize)
   419  		readErr = err
   420  		// If at the end of input data and all previous blocks are
   421  		// verified, return the verified input data and EOF.
   422  		if readErr == io.EOF && bytesRead == 0 {
   423  			break
   424  		}
   425  		if readErr != nil && readErr != io.EOF {
   426  			return 0, fmt.Errorf("read from data failed: %w", err)
   427  		}
   428  		// If this is the end of file, zero the remaining bytes in buf,
   429  		// otherwise they are still from the previous block.
   430  		if bytesRead < len(buf) {
   431  			for j := bytesRead; j < len(buf); j++ {
   432  				buf[j] = 0
   433  			}
   434  		}
   435  		descriptor := VerityDescriptor{
   436  			Name:          params.Name,
   437  			FileSize:      params.Size,
   438  			Mode:          params.Mode,
   439  			UID:           params.UID,
   440  			GID:           params.GID,
   441  			SymlinkTarget: params.SymlinkTarget,
   442  			Children:      params.Children,
   443  		}
   444  		if err := verifyBlock(params.Tree, &descriptor, &layout, buf, i, params.HashAlgorithms, params.Expected); err != nil {
   445  			return 0, err
   446  		}
   447  
   448  		// startOff is the beginning of the read range within the
   449  		// current data block. Note that for all blocks other than the
   450  		// first, startOff should be 0.
   451  		startOff := int64(0)
   452  		if i == firstDataBlock {
   453  			startOff = params.ReadOffset % layout.blockSize
   454  		}
   455  		// endOff is the end of the read range within the current data
   456  		// block. Note that for all blocks other than the last,  endOff
   457  		// should be the block size.
   458  		endOff := layout.blockSize
   459  		if i == lastDataBlock {
   460  			endOff = (params.ReadOffset+params.ReadSize-1)%layout.blockSize + 1
   461  		}
   462  		// If the provided size exceeds the end of input data, we should
   463  		// only copy the parts in buf that's part of input data.
   464  		if startOff > int64(bytesRead) {
   465  			startOff = int64(bytesRead)
   466  		}
   467  		if endOff > int64(bytesRead) {
   468  			endOff = int64(bytesRead)
   469  		}
   470  		n, err := params.Out.Write(buf[startOff:endOff])
   471  		if err != nil {
   472  			return total, err
   473  		}
   474  		total += int64(n)
   475  
   476  	}
   477  	return total, readErr
   478  }
   479  
   480  // verifyBlock verifies a block against tree. index is the number of block in
   481  // original data. The block is verified through each level of the tree. It
   482  // fails if the calculated hash from block is different from any level of
   483  // hashes stored in tree. And the final root hash is compared with
   484  // expected.
   485  func verifyBlock(tree io.ReaderAt, descriptor *VerityDescriptor, layout *Layout, dataBlock []byte, blockIndex int64, hashAlgorithms int, expected []byte) error {
   486  	if len(dataBlock) != int(layout.blockSize) {
   487  		return fmt.Errorf("incorrect block size")
   488  	}
   489  
   490  	expectedDigest := make([]byte, layout.digestSize)
   491  	treeBlock := make([]byte, layout.blockSize)
   492  	var digest []byte
   493  	for level := 0; level < layout.numLevels(); level++ {
   494  		// Calculate hash.
   495  		if level == 0 {
   496  			h, err := hashData(dataBlock, hashAlgorithms)
   497  			if err != nil {
   498  				return err
   499  			}
   500  			digest = h
   501  		} else {
   502  			// Read a block in previous level that contains the
   503  			// hash we just generated, and generate a next level
   504  			// hash from it.
   505  			if _, err := tree.ReadAt(treeBlock, layout.blockOffset(level-1, blockIndex)); err != nil {
   506  				return err
   507  			}
   508  			h, err := hashData(treeBlock, hashAlgorithms)
   509  			if err != nil {
   510  				return err
   511  			}
   512  			digest = h
   513  		}
   514  
   515  		// Read the digest for the current block and store in
   516  		// expectedDigest.
   517  		if _, err := tree.ReadAt(expectedDigest, layout.digestOffset(level, blockIndex)); err != nil {
   518  			return err
   519  		}
   520  
   521  		if !bytes.Equal(digest, expectedDigest) {
   522  			return fmt.Errorf("verification failed")
   523  		}
   524  		blockIndex = blockIndex / layout.hashesPerBlock()
   525  	}
   526  
   527  	// Verification for the tree succeeded. Now hash the descriptor with
   528  	// the root hash and compare it with expected.
   529  	descriptor.RootHash = digest
   530  	return descriptor.verify(expected, hashAlgorithms)
   531  }