github.com/tsuna/gohbase@v0.0.0-20250731002811-4ffcadfba63e/region/compressor.go (about)

     1  // Copyright (C) 2020  The GoHBase Authors.  All rights reserved.
     2  // This file is part of GoHBase.
     3  // Use of this source code is governed by the Apache License 2.0
     4  // that can be found in the COPYING file.
     5  
     6  package region
     7  
     8  import (
     9  	"encoding/binary"
    10  	"fmt"
    11  	"io"
    12  	"net"
    13  	"slices"
    14  
    15  	"github.com/tsuna/gohbase/compression"
    16  )
    17  
    18  type compressor struct {
    19  	compression.Codec
    20  }
    21  
    22  func min(x, y uint32) int {
    23  	if x < y {
    24  		return int(x)
    25  	}
    26  	return int(y)
    27  }
    28  
    29  func (c *compressor) compressCellblocks(cbs net.Buffers, uncompressedLen uint32) []byte {
    30  	b := newBuffer(4)
    31  
    32  	// put uncompressed length
    33  	binary.BigEndian.PutUint32(b, uncompressedLen)
    34  
    35  	uncompressedBuffer := newBuffer(min(uncompressedLen, c.ChunkLen()))
    36  	defer freeBuffer(uncompressedBuffer)
    37  
    38  	var chunkLen uint32
    39  	var lenOffset int
    40  	for {
    41  		n, err := cbs.Read(uncompressedBuffer)
    42  		if n == 0 {
    43  			break
    44  		}
    45  
    46  		// grow for chunk length
    47  		lenOffset = len(b)
    48  		b = append(b, make([]byte, 4)...)
    49  
    50  		b, chunkLen = c.Encode(uncompressedBuffer[:n], b)
    51  
    52  		// write the chunk length
    53  		binary.BigEndian.PutUint32(b[lenOffset:], chunkLen)
    54  
    55  		if err == io.EOF {
    56  			break
    57  		} else if err != nil {
    58  			panic(err) // unexpected error
    59  		}
    60  	}
    61  	return b
    62  }
    63  
    64  func readN(b []byte, n int) ([]byte, []byte, error) {
    65  	if len(b) < n {
    66  		return nil, nil, fmt.Errorf(
    67  			"short read: want %d bytes, got %d", n, len(b))
    68  	}
    69  	return b[:n], b[n:], nil
    70  }
    71  
    72  func readUint32(b []byte) (uint32, []byte, error) {
    73  	head, tail, err := readN(b, 4)
    74  	if err != nil {
    75  		return 0, nil, err
    76  	}
    77  	return binary.BigEndian.Uint32(head), tail, nil
    78  }
    79  
    80  // decompressCellblocks decodes block stream format of hadoop.
    81  // The wire format is as follows:
    82  //
    83  //	<length of uncompressed block>
    84  //	  <length of compressed chunk><compressed chunk>
    85  //	  <length of compressed chunk><compressed chunk>
    86  //	  ...
    87  //	  <length of compressed chunk><compressed chunk>
    88  //	<length of uncompressed block>
    89  //	  <length of compressed chunk><compressed chunk>
    90  //	  ...
    91  //	...
    92  func (c *compressor) decompressCellblocks(b []byte) ([]byte, error) {
    93  	var (
    94  		err                  error
    95  		out                  []byte
    96  		compressedChunk      []byte
    97  		compressedChunkLen   uint32
    98  		uncompressedBlockLen uint32
    99  		uncompressedChunkLen uint32
   100  	)
   101  	for len(b) > 0 {
   102  		// read uncompressed block length
   103  		uncompressedBlockLen, b, err = readUint32(b)
   104  		if err != nil {
   105  			return nil, fmt.Errorf("failed to read uncompressed block length: %w", err)
   106  		}
   107  
   108  		out = slices.Grow(out, int(uncompressedBlockLen))
   109  
   110  		// read and decompress encoded chunks until whole block is read
   111  		var uncompressedSoFar uint32
   112  		for uncompressedSoFar < uncompressedBlockLen {
   113  			compressedChunkLen, b, err = readUint32(b)
   114  			if err != nil {
   115  				return nil, fmt.Errorf(
   116  					"failed to read compressed chunk block length: %w", err)
   117  			}
   118  
   119  			compressedChunk, b, err = readN(b, int(compressedChunkLen))
   120  			if err != nil {
   121  				return nil, fmt.Errorf("failed to read compressed chunk: %w", err)
   122  			}
   123  			out, uncompressedChunkLen, err = c.Decode(compressedChunk, out)
   124  			if err != nil {
   125  				return nil, fmt.Errorf("failed to decode compressed chunk: %w", err)
   126  			}
   127  			uncompressedSoFar += uncompressedChunkLen
   128  		}
   129  
   130  		// check that uncompressed lengths add up
   131  		if uncompressedSoFar > uncompressedBlockLen {
   132  			return nil, fmt.Errorf(
   133  				"uncompressed more than expected: expected %d, got %d so far",
   134  				uncompressedBlockLen, uncompressedSoFar)
   135  		}
   136  	}
   137  	return out, nil
   138  }