kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/riegeli/util.go (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package riegeli
    18  
    19  import (
    20  	"fmt"
    21  
    22  	"github.com/minio/highwayhash"
    23  )
    24  
    25  func hashBytes(b []byte) uint64 {
    26  	h, _ := highwayhash.New64(hashKey[:])
    27  	h.Write(b)
    28  	return h.Sum64()
    29  }
    30  
    31  // https://github.com/google/riegeli/blob/master/doc/riegeli_records_file_format.md#conventions
    32  // Binary-encoding of ('Riegeli/', 'records\n', 'Riegeli/', 'records\n')
    33  var hashKey = []byte{
    34  	0x52, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x69, 0x2f,
    35  	0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x0a,
    36  	0x52, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x69, 0x2f,
    37  	0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x0a,
    38  }
    39  
    40  // A blockHeader is located every 64KiB in a Riegeli file.
    41  // https://github.com/google/riegeli/blob/master/doc/riegeli_records_file_format.md#block-header
    42  type blockHeader struct { // 24 bytes
    43  	// HeaderHash    [8]byte
    44  	PreviousChunk uint64 // 8 bytes
    45  	NextChunk     uint64 // 8 bytes
    46  }
    47  
    48  // A chunk is the unit of dat within a Riegeli block.
    49  // https://github.com/google/riegeli/blob/master/doc/riegeli_records_file_format.md#chunk
    50  type chunk struct { // 40 bytes + len(Data) + padding
    51  	Header chunkHeader
    52  	Data   []byte
    53  }
    54  type chunkHeader struct { // 40 bytes
    55  	// HeaderHash      [8]byte
    56  	DataSize        uint64 // 8 bytes
    57  	DataHash        [8]byte
    58  	ChunkType       chunkType
    59  	NumRecords      uint64 // 7 bytes
    60  	DecodedDataSize uint64 // 8 bytes
    61  }
    62  
    63  type chunkType byte
    64  
    65  const (
    66  	fileSignatureChunkType chunkType = 0x73
    67  	fileMetadataChunkType  chunkType = 0x6d
    68  	paddingChunkType       chunkType = 0x70
    69  	recordChunkType        chunkType = 0x72
    70  	transposedChunkType    chunkType = 0x74
    71  )
    72  
    73  // compressionType is the compression format for a chunk
    74  // https://github.com/google/riegeli/blob/master/doc/riegeli_records_file_format.md#chunk-data
    75  type compressionType byte
    76  
    77  const (
    78  	noCompression     compressionType = 0
    79  	brotliCompression compressionType = 0x62
    80  	zstdCompression   compressionType = 0x7a
    81  	snappyCompression compressionType = 0x73
    82  )
    83  
    84  // A recordChunk is the standard chunk type for user records in a Riegeli file.
    85  // https://github.com/google/riegeli/blob/master/doc/riegeli_records_file_format.md#simple-chunk-with-records
    86  type recordChunk struct { // 1 + len(varint(CompressedSizesSize)) + len(CompressedSizes) + len(CompressedValues)
    87  	CompressionType compressionType
    88  	// CompressedSizesSize uint64 == len(CompressedSizes)
    89  	CompressedSizes  []byte // len([]varint64) == NumRecords
    90  	CompressedValues []byte
    91  }
    92  
    93  // https://github.com/google/riegeli/blob/master/doc/riegeli_records_file_format.md#implementation-notes
    94  const (
    95  	blockSize       = 1 << 16
    96  	blockHeaderSize = 24
    97  	usableBlockSize = blockSize - blockHeaderSize
    98  	chunkHeaderSize = 40
    99  )
   100  
   101  func interveningBlockHeaders(pos, size int) int {
   102  	if pos%blockSize == blockHeaderSize {
   103  		panic(fmt.Errorf("invalid chunk boundary: %d", pos))
   104  	}
   105  	return (size + (pos+usableBlockSize-1)%blockSize) / usableBlockSize
   106  }
   107  
   108  func paddingSize(pos int, h *chunkHeader) int {
   109  	size := chunkHeaderSize + int(h.DataSize)
   110  	if int(h.NumRecords) <= size {
   111  		return 0
   112  	}
   113  	return int(h.NumRecords) - size
   114  }