go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gae/service/datastore/protos.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package datastore
    16  
    17  import (
    18  	"encoding/binary"
    19  	"fmt"
    20  	"reflect"
    21  
    22  	"google.golang.org/protobuf/proto"
    23  
    24  	"go.chromium.org/luci/gae/internal/zstd"
    25  )
    26  
    27  // protoOption specifies how to handle field implementing proto.Message.
    28  //
    29  // **Modern format**: first byte reserved for denoting format and is thus not
    30  // compatible with legacy format. Options supported currently:
    31  //
    32  //	"nocompress"   - (same as default) no compression.
    33  //	                 Able to read compressed items in modern format.
    34  //	"zstd"          - compress serialized proto with zstd encoding.
    35  //	                 Able to read non-compressed items in modern format.
    36  //
    37  // **Legacy formats**: not compatible between each other or modern formats.
    38  // Options supported:
    39  //
    40  //	"legacy"       - reads/writes serialized proto message. Useful for
    41  //	                 migrating off `proto-gae` tool.
    42  type protoOption string
    43  
    44  const (
    45  	// non-legacy proto serialization first writes a varint with its kind.
    46  	// To avoid accidental overlap with legacy protobuf encoding and ensure
    47  	// that proto unmarshaling will error out on it, use
    48  	//
    49  	//     number := (N<<3) | 4
    50  	//
    51  	// Explanation:
    52  	// Proto serialization also first writes a varint on the wire representing so
    53  	// called "tag", which is comprised of field number and wire type (see [1]):
    54  	//     tag := (field_number<<3) | wire_type
    55  	//
    56  	// There are 2 long deprecated wire type which isn't even supported by most
    57  	// languages (see [2]), one of which is "group end" which has a value of 4.
    58  	// Group end specifically shouldn't be at the beginning of a message,
    59  	// notwithstanding smart-ass hackery like this one, of course.
    60  	// Therefore, for any field number N, value of `(N<<3) | 4`, incorrect proto
    61  	// decoding will error out pretty quickly.
    62  	//
    63  	// [1] https://developers.google.com/protocol-buffers/docs/encoding#structure
    64  	// [2] https://stackoverflow.com/a/33821387
    65  
    66  	// WARNING: changing these values is not backwards compatible.
    67  
    68  	protoBinOptNoCompress = (1 << 3) | 4
    69  	protoBinOptZSTD       = (2 << 3) | 4
    70  )
    71  
    72  // compressionThreshold is the number of bytes of serialized proto value after which
    73  // compression kicks in.
    74  const compressionThreshold = 16 * 1024
    75  
    76  var errInvalidProtoPrefix = fmt.Errorf("invalid gae proto serialization or unrecognized compression scheme")
    77  
    78  func protoToProperty(pb proto.Message, opt protoOption) (prop Property, err error) {
    79  	// proto can't marshall to io.Writer, so might as well serialize it now,
    80  	// but leave first byte free for "nocompress" case.
    81  	blob := make([]byte, 1, 16)
    82  	if blob, err = (proto.MarshalOptions{}).MarshalAppend(blob, pb); err != nil {
    83  		return
    84  	}
    85  	pbblob := blob[1:]
    86  
    87  	if opt == "" /*default*/ {
    88  		opt = "nocompress"
    89  		if len(pbblob) > compressionThreshold {
    90  			opt = "zstd"
    91  		}
    92  	}
    93  
    94  	switch opt {
    95  	case "legacy":
    96  		prop = MkPropertyNI(pbblob)
    97  		return
    98  	case "nocompress":
    99  		write1ByteProtoOpt(blob, protoBinOptNoCompress)
   100  		prop = MkPropertyNI(blob)
   101  		return
   102  	case "zstd":
   103  		// allocate new buffer for compressed data, hoping for ~2x compression.
   104  		blob = make([]byte, 1, len(pbblob)/2)
   105  		write1ByteProtoOpt(blob, protoBinOptZSTD)
   106  		blob = zstd.EncodeAll(pbblob, blob)
   107  		prop = MkPropertyNI(blob)
   108  		return
   109  	default:
   110  		panic(fmt.Errorf("unrecognized proto option: %q", opt))
   111  	}
   112  }
   113  
   114  func protoFromProperty(field reflect.Value, prop Property, opt protoOption) error {
   115  	pm, _ := field.Interface().(proto.Message)
   116  	data, err := prop.Project(PTBytes)
   117  	if err != nil {
   118  		return err
   119  	}
   120  	blob := data.([]byte)
   121  	pm = pm.ProtoReflect().New().Interface()
   122  
   123  	switch opt {
   124  	case "legacy":
   125  		break // read entire blob.
   126  	case "zstd", "", "nocompress":
   127  		switch binOpt, readBytes := binary.Uvarint(blob); {
   128  		case readBytes != 1:
   129  			return errInvalidProtoPrefix
   130  		case protoBinOptNoCompress == binOpt:
   131  			blob = blob[1:]
   132  		case protoBinOptZSTD == binOpt:
   133  			if blob, err = zstd.DecodeAll(blob[1:], nil); err != nil {
   134  				return err
   135  			}
   136  		default:
   137  			return errInvalidProtoPrefix
   138  		}
   139  	default:
   140  		panic(fmt.Errorf("unrecognized proto option: %q", opt))
   141  	}
   142  
   143  	if err = proto.Unmarshal(blob, pm); err != nil {
   144  		return err
   145  	}
   146  	field.Set(reflect.ValueOf(pm))
   147  	return nil
   148  }
   149  
   150  func write1ByteProtoOpt(b []byte, opt uint64) {
   151  	if n := binary.PutUvarint(b, opt); n != 1 {
   152  		panic(fmt.Errorf("protoOption longer than 1 byte: %d", n))
   153  	}
   154  }