go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/data/packedintset/set.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package packedintset implements a way to store integer sets in compact form.
    16  //
    17  // Integers to be packed must already be sorted. They are converted into
    18  // delta-encoded varints and then zlib-compressed.
    19  //
    20  // This is used primarily in `cas` binary to store sets of file sizes it
    21  // downloads or uploads.
    22  package packedintset
    23  
    24  import (
    25  	"bytes"
    26  	"io"
    27  
    28  	"github.com/klauspost/compress/zlib"
    29  
    30  	"go.chromium.org/luci/common/errors"
    31  )
    32  
    33  // Pack returns a deflate'd buffer of delta encoded varints.
    34  //
    35  // Inputs must be sorted in ascending order already.
    36  func Pack(values []int64) ([]byte, error) {
    37  	if len(values) == 0 {
    38  		return nil, nil
    39  	}
    40  	if values[0] < 0 {
    41  		return nil, errors.Reason("values must be between 0 and 2**63").Err()
    42  	}
    43  	if values[len(values)-1] < 0 {
    44  		return nil, errors.Reason("values must be between 0 and 2**63").Err()
    45  	}
    46  
    47  	var b bytes.Buffer
    48  	w := zlib.NewWriter(&b)
    49  	var last int64
    50  	for _, value := range values {
    51  		v := value
    52  		value -= last
    53  		if value < 0 {
    54  			_ = w.Close()
    55  			return nil, errors.Reason("list must be sorted ascending").Err()
    56  		}
    57  		last = v
    58  		for value > 127 {
    59  			if _, err := w.Write([]byte{byte(1<<7 | value&0x7f)}); err != nil {
    60  				_ = w.Close()
    61  				return nil, errors.Annotate(err, "failed to write").Err()
    62  			}
    63  			value >>= 7
    64  		}
    65  		if _, err := w.Write([]byte{byte(value)}); err != nil {
    66  			_ = w.Close()
    67  			return nil, errors.Annotate(err, "failed to write").Err()
    68  		}
    69  	}
    70  
    71  	if err := w.Close(); err != nil {
    72  		return nil, errors.Annotate(err, "failed to close zlib writer").Err()
    73  	}
    74  
    75  	return b.Bytes(), nil
    76  }
    77  
    78  // Unpack decompresses a deflate'd delta encoded list of varints.
    79  func Unpack(data []byte) ([]int64, error) {
    80  	if len(data) == 0 {
    81  		return nil, nil
    82  	}
    83  
    84  	var ret []int64
    85  	var value int64
    86  	var base int64 = 1
    87  	var last int64
    88  
    89  	r, err := zlib.NewReader(bytes.NewReader(data))
    90  	if err != nil {
    91  		return nil, errors.Annotate(err, "failed to get zlib reader").Err()
    92  	}
    93  
    94  	data, err = io.ReadAll(r)
    95  	if err != nil {
    96  		_ = r.Close()
    97  		return nil, errors.Annotate(err, "failed to read all").Err()
    98  	}
    99  
   100  	for _, valByte := range data {
   101  		value += int64(valByte&0x7f) * base
   102  		if valByte&0x80 > 0 {
   103  			base <<= 7
   104  			continue
   105  		}
   106  		ret = append(ret, value+last)
   107  		last += value
   108  		value = 0
   109  		base = 1
   110  	}
   111  
   112  	if err := r.Close(); err != nil {
   113  		return nil, errors.Annotate(err, "failed to close zlib reader").Err()
   114  	}
   115  
   116  	return ret, nil
   117  }