kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/pipeline/beamio/beamio.go (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package beamio provides Beam transformations for common IO patterns.
    18  package beamio // import "kythe.io/kythe/go/serving/pipeline/beamio"
    19  
    20  import (
    21  	"bytes"
    22  	"encoding/binary"
    23  	"fmt"
    24  	"reflect"
    25  
    26  	"github.com/apache/beam/sdks/go/pkg/beam"
    27  )
    28  
    29  func init() {
    30  	beam.RegisterType(reflect.TypeOf((*encodeKeyValue)(nil)).Elem())
    31  	beam.RegisterType(reflect.TypeOf((*KeyValue)(nil)).Elem())
    32  }
    33  
    34  // EncodeKeyValues encodes each PCollection of KVs into encoded KeyValues and
    35  // flattens all entries into a single PCollection.
    36  func EncodeKeyValues(s beam.Scope, tables ...beam.PCollection) beam.PCollection {
    37  	var encodings []beam.PCollection
    38  	for _, table := range tables {
    39  		t := table.Type()
    40  		encoded := beam.ParDo(s, &encodeKeyValue{
    41  			KeyType:   beam.EncodedType{t.Components()[0].Type()},
    42  			ValueType: beam.EncodedType{t.Components()[1].Type()},
    43  		}, table)
    44  		encodings = append(encodings, encoded)
    45  	}
    46  	return beam.Flatten(s, encodings...)
    47  }
    48  
    49  type encodeKeyValue struct{ KeyType, ValueType beam.EncodedType }
    50  
    51  func (e *encodeKeyValue) ProcessElement(key beam.T, val beam.U) (KeyValue, error) {
    52  	keyEnc := beam.NewElementEncoder(e.KeyType.T)
    53  	var keyBuf bytes.Buffer
    54  	if err := keyEnc.Encode(key, &keyBuf); err != nil {
    55  		return KeyValue{}, err
    56  	} else if _, err := binary.ReadUvarint(&keyBuf); err != nil {
    57  		return KeyValue{}, fmt.Errorf("error removing varint prefix from key encoding: %v", err)
    58  	}
    59  	valEnc := beam.NewElementEncoder(e.ValueType.T)
    60  	var valBuf bytes.Buffer
    61  	if err := valEnc.Encode(val, &valBuf); err != nil {
    62  		return KeyValue{}, err
    63  	} else if _, err := binary.ReadUvarint(&valBuf); err != nil {
    64  		return KeyValue{}, fmt.Errorf("error removing varint prefix from value encoding: %v", err)
    65  	}
    66  	return KeyValue{Key: keyBuf.Bytes(), Value: valBuf.Bytes()}, nil
    67  }
    68  
    69  // A KeyValue is a concrete form of a Beam KV.
    70  type KeyValue struct {
    71  	Key   []byte `json:"k"`
    72  	Value []byte `json:"v"`
    73  }