github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/aggregation/latest.py (about) 1 # coding=utf-8 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 # pytype: skip-file 20 21 22 def latest_globally(test=None): 23 # [START latest_globally] 24 import apache_beam as beam 25 import time 26 27 def to_unix_time(time_str, format='%Y-%m-%d %H:%M:%S'): 28 return time.mktime(time.strptime(time_str, format)) 29 30 with beam.Pipeline() as pipeline: 31 latest_element = ( 32 pipeline 33 | 'Create crops' >> beam.Create([ 34 { 35 'item': '🥬', 'harvest': '2020-02-24 00:00:00' 36 }, 37 { 38 'item': '🍓', 'harvest': '2020-06-16 00:00:00' 39 }, 40 { 41 'item': '🥕', 'harvest': '2020-07-17 00:00:00' 42 }, 43 { 44 'item': '🍆', 'harvest': '2020-10-26 00:00:00' 45 }, 46 { 47 'item': '🍅', 'harvest': '2020-10-01 00:00:00' 48 }, 49 ]) 50 | 'With timestamps' >> beam.Map( 51 lambda crop: beam.window.TimestampedValue( 52 crop['item'], to_unix_time(crop['harvest']))) 53 | 'Get latest element' >> beam.combiners.Latest.Globally() 54 | beam.Map(print)) 55 # [END latest_globally] 56 if test: 57 test(latest_element) 58 59 60 def latest_per_key(test=None): 61 # [START latest_per_key] 62 import apache_beam as beam 63 import time 64 65 def to_unix_time(time_str, format='%Y-%m-%d %H:%M:%S'): 66 return time.mktime(time.strptime(time_str, format)) 67 68 with beam.Pipeline() as pipeline: 69 latest_elements_per_key = ( 70 pipeline 71 | 'Create crops' >> beam.Create([ 72 ('spring', { 73 'item': '🥕', 'harvest': '2020-06-28 00:00:00' 74 }), 75 ('spring', { 76 'item': '🍓', 'harvest': '2020-06-16 00:00:00' 77 }), 78 ('summer', { 79 'item': '🥕', 'harvest': '2020-07-17 00:00:00' 80 }), 81 ('summer', { 82 'item': '🍓', 'harvest': '2020-08-26 00:00:00' 83 }), 84 ('summer', { 85 'item': '🍆', 'harvest': '2020-09-04 00:00:00' 86 }), 87 ('summer', { 88 'item': '🥬', 'harvest': '2020-09-18 00:00:00' 89 }), 90 ('summer', { 91 'item': '🍅', 'harvest': '2020-09-22 00:00:00' 92 }), 93 ('autumn', { 94 'item': '🍅', 'harvest': '2020-10-01 00:00:00' 95 }), 96 ('autumn', { 97 'item': '🥬', 'harvest': '2020-10-20 00:00:00' 98 }), 99 ('autumn', { 100 'item': '🍆', 'harvest': '2020-10-26 00:00:00' 101 }), 102 ('winter', { 103 'item': '🥬', 'harvest': '2020-02-24 00:00:00' 104 }), 105 ]) 106 | 'With timestamps' >> beam.Map( 107 lambda pair: beam.window.TimestampedValue( 108 (pair[0], pair[1]['item']), to_unix_time(pair[1]['harvest']))) 109 | 'Get latest elements per key' >> beam.combiners.Latest.PerKey() 110 | beam.Map(print)) 111 # [END latest_per_key] 112 if test: 113 test(latest_elements_per_key)