github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps.py (about) 1 # coding=utf-8 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 # pytype: skip-file 20 21 22 def withtimestamps_event_time(test=None): 23 # [START withtimestamps_event_time] 24 import apache_beam as beam 25 26 class GetTimestamp(beam.DoFn): 27 def process(self, plant, timestamp=beam.DoFn.TimestampParam): 28 yield '{} - {}'.format(timestamp.to_utc_datetime(), plant['name']) 29 30 with beam.Pipeline() as pipeline: 31 plant_timestamps = ( 32 pipeline 33 | 'Garden plants' >> beam.Create([ 34 {'name': 'Strawberry', 'season': 1585699200}, # April, 2020 35 {'name': 'Carrot', 'season': 1590969600}, # June, 2020 36 {'name': 'Artichoke', 'season': 1583020800}, # March, 2020 37 {'name': 'Tomato', 'season': 1588291200}, # May, 2020 38 {'name': 'Potato', 'season': 1598918400}, # September, 2020 39 ]) 40 | 'With timestamps' >> beam.Map( 41 lambda plant: beam.window.TimestampedValue(plant, plant['season'])) 42 | 'Get timestamp' >> beam.ParDo(GetTimestamp()) 43 | beam.Map(print) 44 ) 45 # [END withtimestamps_event_time] 46 if test: 47 test(plant_timestamps) 48 49 50 def withtimestamps_logical_clock(test=None): 51 # [START withtimestamps_logical_clock] 52 import apache_beam as beam 53 54 class GetTimestamp(beam.DoFn): 55 def process(self, plant, timestamp=beam.DoFn.TimestampParam): 56 event_id = int(timestamp.micros / 1e6) # equivalent to seconds 57 yield '{} - {}'.format(event_id, plant['name']) 58 59 with beam.Pipeline() as pipeline: 60 plant_events = ( 61 pipeline 62 | 'Garden plants' >> beam.Create([ 63 {'name': 'Strawberry', 'event_id': 1}, 64 {'name': 'Carrot', 'event_id': 4}, 65 {'name': 'Artichoke', 'event_id': 2}, 66 {'name': 'Tomato', 'event_id': 3}, 67 {'name': 'Potato', 'event_id': 5}, 68 ]) 69 | 'With timestamps' >> beam.Map(lambda plant: \ 70 beam.window.TimestampedValue(plant, plant['event_id'])) 71 | 'Get timestamp' >> beam.ParDo(GetTimestamp()) 72 | beam.Map(print) 73 ) 74 # [END withtimestamps_logical_clock] 75 if test: 76 test(plant_events) 77 78 79 def withtimestamps_processing_time(test=None): 80 # [START withtimestamps_processing_time] 81 import apache_beam as beam 82 import time 83 84 class GetTimestamp(beam.DoFn): 85 def process(self, plant, timestamp=beam.DoFn.TimestampParam): 86 yield '{} - {}'.format(timestamp.to_utc_datetime(), plant['name']) 87 88 with beam.Pipeline() as pipeline: 89 plant_processing_times = ( 90 pipeline 91 | 'Garden plants' >> beam.Create([ 92 {'name': 'Strawberry'}, 93 {'name': 'Carrot'}, 94 {'name': 'Artichoke'}, 95 {'name': 'Tomato'}, 96 {'name': 'Potato'}, 97 ]) 98 | 'With timestamps' >> beam.Map(lambda plant: \ 99 beam.window.TimestampedValue(plant, time.time())) 100 | 'Get timestamp' >> beam.ParDo(GetTimestamp()) 101 | beam.Map(print) 102 ) 103 # [END withtimestamps_processing_time] 104 if test: 105 test(plant_processing_times) 106 107 108 def time_tuple2unix_time(): 109 # [START time_tuple2unix_time] 110 import time 111 112 time_tuple = time.strptime('2020-03-19 20:50:00', '%Y-%m-%d %H:%M:%S') 113 unix_time = time.mktime(time_tuple) 114 # [END time_tuple2unix_time] 115 return unix_time 116 117 118 def datetime2unix_time(): 119 # [START datetime2unix_time] 120 import time 121 import datetime 122 123 now = datetime.datetime.now() 124 time_tuple = now.timetuple() 125 unix_time = time.mktime(time_tuple) 126 # [END datetime2unix_time] 127 return unix_time