github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/aggregation/groupbykey.py (about) 1 # coding=utf-8 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 # pytype: skip-file 20 21 22 def groupbykey(test=None): 23 # [START groupbykey] 24 import apache_beam as beam 25 26 with beam.Pipeline() as pipeline: 27 produce_counts = ( 28 pipeline 29 | 'Create produce counts' >> beam.Create([ 30 ('spring', '🍓'), 31 ('spring', '🥕'), 32 ('spring', '🍆'), 33 ('spring', '🍅'), 34 ('summer', '🥕'), 35 ('summer', '🍅'), 36 ('summer', '🌽'), 37 ('fall', '🥕'), 38 ('fall', '🍅'), 39 ('winter', '🍆'), 40 ]) 41 | 'Group counts per produce' >> beam.GroupByKey() 42 | beam.MapTuple(lambda k, vs: (k, sorted(vs))) # sort and format 43 | beam.Map(print)) 44 # [END groupbykey] 45 if test: 46 test(produce_counts)