kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/pipeline/beamio/beamio.go (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package beamio provides Beam transformations for common IO patterns. 18 package beamio // import "kythe.io/kythe/go/serving/pipeline/beamio" 19 20 import ( 21 "bytes" 22 "encoding/binary" 23 "fmt" 24 "reflect" 25 26 "github.com/apache/beam/sdks/go/pkg/beam" 27 ) 28 29 func init() { 30 beam.RegisterType(reflect.TypeOf((*encodeKeyValue)(nil)).Elem()) 31 beam.RegisterType(reflect.TypeOf((*KeyValue)(nil)).Elem()) 32 } 33 34 // EncodeKeyValues encodes each PCollection of KVs into encoded KeyValues and 35 // flattens all entries into a single PCollection. 36 func EncodeKeyValues(s beam.Scope, tables ...beam.PCollection) beam.PCollection { 37 var encodings []beam.PCollection 38 for _, table := range tables { 39 t := table.Type() 40 encoded := beam.ParDo(s, &encodeKeyValue{ 41 KeyType: beam.EncodedType{t.Components()[0].Type()}, 42 ValueType: beam.EncodedType{t.Components()[1].Type()}, 43 }, table) 44 encodings = append(encodings, encoded) 45 } 46 return beam.Flatten(s, encodings...) 47 } 48 49 type encodeKeyValue struct{ KeyType, ValueType beam.EncodedType } 50 51 func (e *encodeKeyValue) ProcessElement(key beam.T, val beam.U) (KeyValue, error) { 52 keyEnc := beam.NewElementEncoder(e.KeyType.T) 53 var keyBuf bytes.Buffer 54 if err := keyEnc.Encode(key, &keyBuf); err != nil { 55 return KeyValue{}, err 56 } else if _, err := binary.ReadUvarint(&keyBuf); err != nil { 57 return KeyValue{}, fmt.Errorf("error removing varint prefix from key encoding: %v", err) 58 } 59 valEnc := beam.NewElementEncoder(e.ValueType.T) 60 var valBuf bytes.Buffer 61 if err := valEnc.Encode(val, &valBuf); err != nil { 62 return KeyValue{}, err 63 } else if _, err := binary.ReadUvarint(&valBuf); err != nil { 64 return KeyValue{}, fmt.Errorf("error removing varint prefix from value encoding: %v", err) 65 } 66 return KeyValue{Key: keyBuf.Bytes(), Value: valBuf.Bytes()}, nil 67 } 68 69 // A KeyValue is a concrete form of a Beam KV. 70 type KeyValue struct { 71 Key []byte `json:"k"` 72 Value []byte `json:"v"` 73 }