github.com/apache/beam/sdks/v2@v2.48.2/go/examples/readavro/readavro.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // readavro is a simple Avro read/write Example 17 // This example uses a 500 Byte sample avro file [twitter.avro] 18 // download here: https://s3-eu-west-1.amazonaws.com/daidokoro-dev/apache/twitter.avro 19 package main 20 21 import ( 22 "context" 23 "encoding/json" 24 "flag" 25 "log" 26 "reflect" 27 28 "github.com/apache/beam/sdks/v2/go/pkg/beam" 29 "github.com/apache/beam/sdks/v2/go/pkg/beam/io/avroio" 30 "github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx" 31 "github.com/apache/beam/sdks/v2/go/pkg/beam/x/debug" 32 ) 33 34 var ( 35 input = flag.String("input", "./twitter.avro", "input avro file") 36 output = flag.String("output", "./output.avro", "output avro file") 37 ) 38 39 // Doc type used to unmarshal avro json data 40 type Doc struct { 41 Stamp int64 `json:"timestamp"` 42 Tweet string `json:"tweet"` 43 User string `json:"username"` 44 } 45 46 // Note that the schema is only required for Writing avro. 47 // not Reading. 48 const schema = `{ 49 "type": "record", 50 "name": "tweet", 51 "namespace": "twitter", 52 "fields": [ 53 { "name": "timestamp", "type": "double" }, 54 { "name": "tweet", "type": "string" }, 55 { "name": "username", "type": "string" } 56 ] 57 }` 58 59 func main() { 60 flag.Parse() 61 beam.Init() 62 63 p := beam.NewPipeline() 64 s := p.Root() 65 66 // read rows and return JSON string PCollection - PCollection<string> 67 rows := avroio.Read(s, *input, reflect.TypeOf("")) 68 debug.Print(s, rows) 69 70 // read rows and return Doc Type PCollection - PCollection<Doc> 71 docs := avroio.Read(s, *input, reflect.TypeOf(Doc{})) 72 debug.Print(s, docs) 73 74 // update all values with a single user and tweet. 75 format := beam.ParDo(s, func(d Doc, emit func(string)) { 76 d.User = "daidokoro" 77 d.Tweet = "I was here......" 78 79 b, _ := json.Marshal(d) 80 emit(string(b)) 81 }, docs) 82 83 debug.Print(s, format) 84 85 // write output 86 avroio.Write(s, *output, schema, format) 87 88 if err := beamx.Run(context.Background(), p); err != nil { 89 log.Fatalf("Failed to execute job: %v", err) 90 } 91 }