github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/aggregation/top.py (about) 1 # coding=utf-8 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 20 def top_largest(test=None): 21 # [START top_largest] 22 import apache_beam as beam 23 24 with beam.Pipeline() as pipeline: 25 largest_elements = ( 26 pipeline 27 | 'Create numbers' >> beam.Create([3, 4, 1, 2]) 28 | 'Largest N values' >> beam.combiners.Top.Largest(2) 29 | beam.Map(print)) 30 # [END top_largest] 31 if test: 32 test(largest_elements) 33 34 35 def top_largest_per_key(test=None): 36 # [START top_largest_per_key] 37 import apache_beam as beam 38 39 with beam.Pipeline() as pipeline: 40 largest_elements_per_key = ( 41 pipeline 42 | 'Create produce' >> beam.Create([ 43 ('🥕', 3), 44 ('🥕', 2), 45 ('🍆', 1), 46 ('🍅', 4), 47 ('🍅', 5), 48 ('🍅', 3), 49 ]) 50 | 'Largest N values per key' >> beam.combiners.Top.LargestPerKey(2) 51 | beam.Map(print)) 52 # [END top_largest_per_key] 53 if test: 54 test(largest_elements_per_key) 55 56 57 def top_smallest(test=None): 58 # [START top_smallest] 59 import apache_beam as beam 60 61 with beam.Pipeline() as pipeline: 62 smallest_elements = ( 63 pipeline 64 | 'Create numbers' >> beam.Create([3, 4, 1, 2]) 65 | 'Smallest N values' >> beam.combiners.Top.Smallest(2) 66 | beam.Map(print)) 67 # [END top_smallest] 68 if test: 69 test(smallest_elements) 70 71 72 def top_smallest_per_key(test=None): 73 # [START top_smallest_per_key] 74 import apache_beam as beam 75 76 with beam.Pipeline() as pipeline: 77 smallest_elements_per_key = ( 78 pipeline 79 | 'Create produce' >> beam.Create([ 80 ('🥕', 3), 81 ('🥕', 2), 82 ('🍆', 1), 83 ('🍅', 4), 84 ('🍅', 5), 85 ('🍅', 3), 86 ]) 87 | 'Smallest N values per key' >> beam.combiners.Top.SmallestPerKey(2) 88 | beam.Map(print)) 89 # [END top_smallest_per_key] 90 if test: 91 test(smallest_elements_per_key) 92 93 94 def top_of(test=None): 95 # [START top_of] 96 import apache_beam as beam 97 98 with beam.Pipeline() as pipeline: 99 shortest_elements = ( 100 pipeline 101 | 'Create produce names' >> beam.Create([ 102 '🍓 Strawberry', 103 '🥕 Carrot', 104 '🍏 Green apple', 105 '🍆 Eggplant', 106 '🌽 Corn', 107 ]) 108 | 'Shortest names' >> beam.combiners.Top.Of( 109 2, # number of elements 110 key=len, # optional, defaults to the element itself 111 reverse=True, # optional, defaults to False (largest/descending) 112 ) 113 | beam.Map(print) 114 ) 115 # [END top_of] 116 if test: 117 test(shortest_elements) 118 119 120 def top_per_key(test=None): 121 # [START top_per_key] 122 import apache_beam as beam 123 124 with beam.Pipeline() as pipeline: 125 shortest_elements_per_key = ( 126 pipeline 127 | 'Create produce names' >> beam.Create([ 128 ('spring', '🥕 Carrot'), 129 ('spring', '🍓 Strawberry'), 130 ('summer', '🥕 Carrot'), 131 ('summer', '🌽 Corn'), 132 ('summer', '🍏 Green apple'), 133 ('fall', '🥕 Carrot'), 134 ('fall', '🍏 Green apple'), 135 ('winter', '🍆 Eggplant'), 136 ]) 137 | 'Shortest names per key' >> beam.combiners.Top.PerKey( 138 2, # number of elements 139 key=len, # optional, defaults to the value itself 140 reverse=True, # optional, defaults to False (largest/descending) 141 ) 142 | beam.Map(print) 143 ) 144 # [END top_per_key] 145 if test: 146 test(shortest_elements_per_key)