github.com/tsuna/gohbase@v0.0.0-20250731002811-4ffcadfba63e/region/compressor.go (about) 1 // Copyright (C) 2020 The GoHBase Authors. All rights reserved. 2 // This file is part of GoHBase. 3 // Use of this source code is governed by the Apache License 2.0 4 // that can be found in the COPYING file. 5 6 package region 7 8 import ( 9 "encoding/binary" 10 "fmt" 11 "io" 12 "net" 13 "slices" 14 15 "github.com/tsuna/gohbase/compression" 16 ) 17 18 type compressor struct { 19 compression.Codec 20 } 21 22 func min(x, y uint32) int { 23 if x < y { 24 return int(x) 25 } 26 return int(y) 27 } 28 29 func (c *compressor) compressCellblocks(cbs net.Buffers, uncompressedLen uint32) []byte { 30 b := newBuffer(4) 31 32 // put uncompressed length 33 binary.BigEndian.PutUint32(b, uncompressedLen) 34 35 uncompressedBuffer := newBuffer(min(uncompressedLen, c.ChunkLen())) 36 defer freeBuffer(uncompressedBuffer) 37 38 var chunkLen uint32 39 var lenOffset int 40 for { 41 n, err := cbs.Read(uncompressedBuffer) 42 if n == 0 { 43 break 44 } 45 46 // grow for chunk length 47 lenOffset = len(b) 48 b = append(b, make([]byte, 4)...) 49 50 b, chunkLen = c.Encode(uncompressedBuffer[:n], b) 51 52 // write the chunk length 53 binary.BigEndian.PutUint32(b[lenOffset:], chunkLen) 54 55 if err == io.EOF { 56 break 57 } else if err != nil { 58 panic(err) // unexpected error 59 } 60 } 61 return b 62 } 63 64 func readN(b []byte, n int) ([]byte, []byte, error) { 65 if len(b) < n { 66 return nil, nil, fmt.Errorf( 67 "short read: want %d bytes, got %d", n, len(b)) 68 } 69 return b[:n], b[n:], nil 70 } 71 72 func readUint32(b []byte) (uint32, []byte, error) { 73 head, tail, err := readN(b, 4) 74 if err != nil { 75 return 0, nil, err 76 } 77 return binary.BigEndian.Uint32(head), tail, nil 78 } 79 80 // decompressCellblocks decodes block stream format of hadoop. 81 // The wire format is as follows: 82 // 83 // <length of uncompressed block> 84 // <length of compressed chunk><compressed chunk> 85 // <length of compressed chunk><compressed chunk> 86 // ... 87 // <length of compressed chunk><compressed chunk> 88 // <length of uncompressed block> 89 // <length of compressed chunk><compressed chunk> 90 // ... 91 // ... 92 func (c *compressor) decompressCellblocks(b []byte) ([]byte, error) { 93 var ( 94 err error 95 out []byte 96 compressedChunk []byte 97 compressedChunkLen uint32 98 uncompressedBlockLen uint32 99 uncompressedChunkLen uint32 100 ) 101 for len(b) > 0 { 102 // read uncompressed block length 103 uncompressedBlockLen, b, err = readUint32(b) 104 if err != nil { 105 return nil, fmt.Errorf("failed to read uncompressed block length: %w", err) 106 } 107 108 out = slices.Grow(out, int(uncompressedBlockLen)) 109 110 // read and decompress encoded chunks until whole block is read 111 var uncompressedSoFar uint32 112 for uncompressedSoFar < uncompressedBlockLen { 113 compressedChunkLen, b, err = readUint32(b) 114 if err != nil { 115 return nil, fmt.Errorf( 116 "failed to read compressed chunk block length: %w", err) 117 } 118 119 compressedChunk, b, err = readN(b, int(compressedChunkLen)) 120 if err != nil { 121 return nil, fmt.Errorf("failed to read compressed chunk: %w", err) 122 } 123 out, uncompressedChunkLen, err = c.Decode(compressedChunk, out) 124 if err != nil { 125 return nil, fmt.Errorf("failed to decode compressed chunk: %w", err) 126 } 127 uncompressedSoFar += uncompressedChunkLen 128 } 129 130 // check that uncompressed lengths add up 131 if uncompressedSoFar > uncompressedBlockLen { 132 return nil, fmt.Errorf( 133 "uncompressed more than expected: expected %d, got %d so far", 134 uncompressedBlockLen, uncompressedSoFar) 135 } 136 } 137 return out, nil 138 }