github.com/apache/beam/sdks/v2@v2.48.2/go/examples/cookbook/max/max.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package main
    17  
    18  // See: https://github.com/apache/beam/blob/master/examples/java/src/main/java/org/apache/beam/examples/cookbook/MaxPerKeyExamples.java
    19  
    20  import (
    21  	"context"
    22  	"flag"
    23  	"reflect"
    24  
    25  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    26  	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/bigqueryio"
    27  	"github.com/apache/beam/sdks/v2/go/pkg/beam/log"
    28  	"github.com/apache/beam/sdks/v2/go/pkg/beam/options/gcpopts"
    29  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    30  	"github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/stats"
    31  	"github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx"
    32  )
    33  
    34  var (
    35  	input  = flag.String("input", "clouddataflow-readonly:samples.weather_stations", "Weather data BQ table.")
    36  	output = flag.String("output", "", "Output BQ table.")
    37  )
    38  
    39  func init() {
    40  	register.Function2x1(formatFn)
    41  	register.Function1x2(extractFn)
    42  }
    43  
    44  type WeatherDataRow struct {
    45  	Month    int     `bigquery:"month"`
    46  	MeanTemp float64 `bigquery:"mean_temp"`
    47  }
    48  
    49  type MaxMeanTempRow struct {
    50  	Month       int     `bigquery:"month"`
    51  	MaxMeanTemp float64 `bigquery:"max_mean_temp"`
    52  }
    53  
    54  // MaxMeanTemp finds the max mean_temp for each month. It takes a
    55  // PCollection<WeatherDataRow> and returns a PCollection<MaxMeanTempRow>.
    56  func MaxMeanTemp(s beam.Scope, rows beam.PCollection) beam.PCollection {
    57  	s = s.Scope("MaxMeanTemp")
    58  
    59  	keyed := beam.ParDo(s, extractFn, rows)
    60  	maxTemps := stats.MaxPerKey(s, keyed)
    61  	return beam.ParDo(s, formatFn, maxTemps)
    62  }
    63  
    64  func extractFn(row WeatherDataRow) (int, float64) {
    65  	return row.Month, row.MeanTemp
    66  }
    67  
    68  func formatFn(month int, temp float64) MaxMeanTempRow {
    69  	return MaxMeanTempRow{Month: month, MaxMeanTemp: temp}
    70  }
    71  
    72  func main() {
    73  	flag.Parse()
    74  	beam.Init()
    75  
    76  	ctx := context.Background()
    77  
    78  	if *output == "" {
    79  		log.Exit(ctx, "No output table specified. Use --output=<table>")
    80  	}
    81  	project := gcpopts.GetProject(ctx)
    82  
    83  	log.Info(ctx, "Running max")
    84  
    85  	p := beam.NewPipeline()
    86  	s := p.Root()
    87  	rows := bigqueryio.Read(s, project, *input, reflect.TypeOf(WeatherDataRow{}))
    88  	out := MaxMeanTemp(s, rows)
    89  	bigqueryio.Write(s, project, *output, out)
    90  
    91  	if err := beamx.Run(ctx, p); err != nil {
    92  		log.Exitf(ctx, "Failed to execute job: %v", err)
    93  	}
    94  }