github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/aggregation/latest.py (about)

     1  # coding=utf-8
     2  #
     3  # Licensed to the Apache Software Foundation (ASF) under one or more
     4  # contributor license agreements.  See the NOTICE file distributed with
     5  # this work for additional information regarding copyright ownership.
     6  # The ASF licenses this file to You under the Apache License, Version 2.0
     7  # (the "License"); you may not use this file except in compliance with
     8  # the License.  You may obtain a copy of the License at
     9  #
    10  #    http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  # Unless required by applicable law or agreed to in writing, software
    13  # distributed under the License is distributed on an "AS IS" BASIS,
    14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  # See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  #
    18  
    19  # pytype: skip-file
    20  
    21  
    22  def latest_globally(test=None):
    23    # [START latest_globally]
    24    import apache_beam as beam
    25    import time
    26  
    27    def to_unix_time(time_str, format='%Y-%m-%d %H:%M:%S'):
    28      return time.mktime(time.strptime(time_str, format))
    29  
    30    with beam.Pipeline() as pipeline:
    31      latest_element = (
    32          pipeline
    33          | 'Create crops' >> beam.Create([
    34              {
    35                  'item': '🥬', 'harvest': '2020-02-24 00:00:00'
    36              },
    37              {
    38                  'item': '🍓', 'harvest': '2020-06-16 00:00:00'
    39              },
    40              {
    41                  'item': '🥕', 'harvest': '2020-07-17 00:00:00'
    42              },
    43              {
    44                  'item': '🍆', 'harvest': '2020-10-26 00:00:00'
    45              },
    46              {
    47                  'item': '🍅', 'harvest': '2020-10-01 00:00:00'
    48              },
    49          ])
    50          | 'With timestamps' >> beam.Map(
    51              lambda crop: beam.window.TimestampedValue(
    52                  crop['item'], to_unix_time(crop['harvest'])))
    53          | 'Get latest element' >> beam.combiners.Latest.Globally()
    54          | beam.Map(print))
    55      # [END latest_globally]
    56      if test:
    57        test(latest_element)
    58  
    59  
    60  def latest_per_key(test=None):
    61    # [START latest_per_key]
    62    import apache_beam as beam
    63    import time
    64  
    65    def to_unix_time(time_str, format='%Y-%m-%d %H:%M:%S'):
    66      return time.mktime(time.strptime(time_str, format))
    67  
    68    with beam.Pipeline() as pipeline:
    69      latest_elements_per_key = (
    70          pipeline
    71          | 'Create crops' >> beam.Create([
    72              ('spring', {
    73                  'item': '🥕', 'harvest': '2020-06-28 00:00:00'
    74              }),
    75              ('spring', {
    76                  'item': '🍓', 'harvest': '2020-06-16 00:00:00'
    77              }),
    78              ('summer', {
    79                  'item': '🥕', 'harvest': '2020-07-17 00:00:00'
    80              }),
    81              ('summer', {
    82                  'item': '🍓', 'harvest': '2020-08-26 00:00:00'
    83              }),
    84              ('summer', {
    85                  'item': '🍆', 'harvest': '2020-09-04 00:00:00'
    86              }),
    87              ('summer', {
    88                  'item': '🥬', 'harvest': '2020-09-18 00:00:00'
    89              }),
    90              ('summer', {
    91                  'item': '🍅', 'harvest': '2020-09-22 00:00:00'
    92              }),
    93              ('autumn', {
    94                  'item': '🍅', 'harvest': '2020-10-01 00:00:00'
    95              }),
    96              ('autumn', {
    97                  'item': '🥬', 'harvest': '2020-10-20 00:00:00'
    98              }),
    99              ('autumn', {
   100                  'item': '🍆', 'harvest': '2020-10-26 00:00:00'
   101              }),
   102              ('winter', {
   103                  'item': '🥬', 'harvest': '2020-02-24 00:00:00'
   104              }),
   105          ])
   106          | 'With timestamps' >> beam.Map(
   107              lambda pair: beam.window.TimestampedValue(
   108                  (pair[0], pair[1]['item']), to_unix_time(pair[1]['harvest'])))
   109          | 'Get latest elements per key' >> beam.combiners.Latest.PerKey()
   110          | beam.Map(print))
   111      # [END latest_per_key]
   112      if test:
   113        test(latest_elements_per_key)