github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/aggregation/top.py (about)

     1  # coding=utf-8
     2  #
     3  # Licensed to the Apache Software Foundation (ASF) under one or more
     4  # contributor license agreements.  See the NOTICE file distributed with
     5  # this work for additional information regarding copyright ownership.
     6  # The ASF licenses this file to You under the Apache License, Version 2.0
     7  # (the "License"); you may not use this file except in compliance with
     8  # the License.  You may obtain a copy of the License at
     9  #
    10  #    http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  # Unless required by applicable law or agreed to in writing, software
    13  # distributed under the License is distributed on an "AS IS" BASIS,
    14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  # See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  #
    18  
    19  
    20  def top_largest(test=None):
    21    # [START top_largest]
    22    import apache_beam as beam
    23  
    24    with beam.Pipeline() as pipeline:
    25      largest_elements = (
    26          pipeline
    27          | 'Create numbers' >> beam.Create([3, 4, 1, 2])
    28          | 'Largest N values' >> beam.combiners.Top.Largest(2)
    29          | beam.Map(print))
    30      # [END top_largest]
    31      if test:
    32        test(largest_elements)
    33  
    34  
    35  def top_largest_per_key(test=None):
    36    # [START top_largest_per_key]
    37    import apache_beam as beam
    38  
    39    with beam.Pipeline() as pipeline:
    40      largest_elements_per_key = (
    41          pipeline
    42          | 'Create produce' >> beam.Create([
    43              ('🥕', 3),
    44              ('🥕', 2),
    45              ('🍆', 1),
    46              ('🍅', 4),
    47              ('🍅', 5),
    48              ('🍅', 3),
    49          ])
    50          | 'Largest N values per key' >> beam.combiners.Top.LargestPerKey(2)
    51          | beam.Map(print))
    52      # [END top_largest_per_key]
    53      if test:
    54        test(largest_elements_per_key)
    55  
    56  
    57  def top_smallest(test=None):
    58    # [START top_smallest]
    59    import apache_beam as beam
    60  
    61    with beam.Pipeline() as pipeline:
    62      smallest_elements = (
    63          pipeline
    64          | 'Create numbers' >> beam.Create([3, 4, 1, 2])
    65          | 'Smallest N values' >> beam.combiners.Top.Smallest(2)
    66          | beam.Map(print))
    67      # [END top_smallest]
    68      if test:
    69        test(smallest_elements)
    70  
    71  
    72  def top_smallest_per_key(test=None):
    73    # [START top_smallest_per_key]
    74    import apache_beam as beam
    75  
    76    with beam.Pipeline() as pipeline:
    77      smallest_elements_per_key = (
    78          pipeline
    79          | 'Create produce' >> beam.Create([
    80              ('🥕', 3),
    81              ('🥕', 2),
    82              ('🍆', 1),
    83              ('🍅', 4),
    84              ('🍅', 5),
    85              ('🍅', 3),
    86          ])
    87          | 'Smallest N values per key' >> beam.combiners.Top.SmallestPerKey(2)
    88          | beam.Map(print))
    89      # [END top_smallest_per_key]
    90      if test:
    91        test(smallest_elements_per_key)
    92  
    93  
    94  def top_of(test=None):
    95    # [START top_of]
    96    import apache_beam as beam
    97  
    98    with beam.Pipeline() as pipeline:
    99      shortest_elements = (
   100          pipeline
   101          | 'Create produce names' >> beam.Create([
   102              '🍓 Strawberry',
   103              '🥕 Carrot',
   104              '🍏 Green apple',
   105              '🍆 Eggplant',
   106              '🌽 Corn',
   107          ])
   108          | 'Shortest names' >> beam.combiners.Top.Of(
   109              2,             # number of elements
   110              key=len,       # optional, defaults to the element itself
   111              reverse=True,  # optional, defaults to False (largest/descending)
   112          )
   113          | beam.Map(print)
   114      )
   115      # [END top_of]
   116      if test:
   117        test(shortest_elements)
   118  
   119  
   120  def top_per_key(test=None):
   121    # [START top_per_key]
   122    import apache_beam as beam
   123  
   124    with beam.Pipeline() as pipeline:
   125      shortest_elements_per_key = (
   126          pipeline
   127          | 'Create produce names' >> beam.Create([
   128              ('spring', '🥕 Carrot'),
   129              ('spring', '🍓 Strawberry'),
   130              ('summer', '🥕 Carrot'),
   131              ('summer', '🌽 Corn'),
   132              ('summer', '🍏 Green apple'),
   133              ('fall', '🥕 Carrot'),
   134              ('fall', '🍏 Green apple'),
   135              ('winter', '🍆 Eggplant'),
   136          ])
   137          | 'Shortest names per key' >> beam.combiners.Top.PerKey(
   138              2,             # number of elements
   139              key=len,       # optional, defaults to the value itself
   140              reverse=True,  # optional, defaults to False (largest/descending)
   141          )
   142          | beam.Map(print)
   143      )
   144      # [END top_per_key]
   145      if test:
   146        test(shortest_elements_per_key)