github.com/apache/beam/sdks/v2@v2.48.2/go/examples/cookbook/max/max.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package main 17 18 // See: https://github.com/apache/beam/blob/master/examples/java/src/main/java/org/apache/beam/examples/cookbook/MaxPerKeyExamples.java 19 20 import ( 21 "context" 22 "flag" 23 "reflect" 24 25 "github.com/apache/beam/sdks/v2/go/pkg/beam" 26 "github.com/apache/beam/sdks/v2/go/pkg/beam/io/bigqueryio" 27 "github.com/apache/beam/sdks/v2/go/pkg/beam/log" 28 "github.com/apache/beam/sdks/v2/go/pkg/beam/options/gcpopts" 29 "github.com/apache/beam/sdks/v2/go/pkg/beam/register" 30 "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/stats" 31 "github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx" 32 ) 33 34 var ( 35 input = flag.String("input", "clouddataflow-readonly:samples.weather_stations", "Weather data BQ table.") 36 output = flag.String("output", "", "Output BQ table.") 37 ) 38 39 func init() { 40 register.Function2x1(formatFn) 41 register.Function1x2(extractFn) 42 } 43 44 type WeatherDataRow struct { 45 Month int `bigquery:"month"` 46 MeanTemp float64 `bigquery:"mean_temp"` 47 } 48 49 type MaxMeanTempRow struct { 50 Month int `bigquery:"month"` 51 MaxMeanTemp float64 `bigquery:"max_mean_temp"` 52 } 53 54 // MaxMeanTemp finds the max mean_temp for each month. It takes a 55 // PCollection<WeatherDataRow> and returns a PCollection<MaxMeanTempRow>. 56 func MaxMeanTemp(s beam.Scope, rows beam.PCollection) beam.PCollection { 57 s = s.Scope("MaxMeanTemp") 58 59 keyed := beam.ParDo(s, extractFn, rows) 60 maxTemps := stats.MaxPerKey(s, keyed) 61 return beam.ParDo(s, formatFn, maxTemps) 62 } 63 64 func extractFn(row WeatherDataRow) (int, float64) { 65 return row.Month, row.MeanTemp 66 } 67 68 func formatFn(month int, temp float64) MaxMeanTempRow { 69 return MaxMeanTempRow{Month: month, MaxMeanTemp: temp} 70 } 71 72 func main() { 73 flag.Parse() 74 beam.Init() 75 76 ctx := context.Background() 77 78 if *output == "" { 79 log.Exit(ctx, "No output table specified. Use --output=<table>") 80 } 81 project := gcpopts.GetProject(ctx) 82 83 log.Info(ctx, "Running max") 84 85 p := beam.NewPipeline() 86 s := p.Root() 87 rows := bigqueryio.Read(s, project, *input, reflect.TypeOf(WeatherDataRow{})) 88 out := MaxMeanTemp(s, rows) 89 bigqueryio.Write(s, project, *output, out) 90 91 if err := beamx.Run(ctx, p); err != nil { 92 log.Exitf(ctx, "Failed to execute job: %v", err) 93 } 94 }