github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps.py (about)

     1  # coding=utf-8
     2  #
     3  # Licensed to the Apache Software Foundation (ASF) under one or more
     4  # contributor license agreements.  See the NOTICE file distributed with
     5  # this work for additional information regarding copyright ownership.
     6  # The ASF licenses this file to You under the Apache License, Version 2.0
     7  # (the "License"); you may not use this file except in compliance with
     8  # the License.  You may obtain a copy of the License at
     9  #
    10  #    http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  # Unless required by applicable law or agreed to in writing, software
    13  # distributed under the License is distributed on an "AS IS" BASIS,
    14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  # See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  #
    18  
    19  # pytype: skip-file
    20  
    21  
    22  def withtimestamps_event_time(test=None):
    23    # [START withtimestamps_event_time]
    24    import apache_beam as beam
    25  
    26    class GetTimestamp(beam.DoFn):
    27      def process(self, plant, timestamp=beam.DoFn.TimestampParam):
    28        yield '{} - {}'.format(timestamp.to_utc_datetime(), plant['name'])
    29  
    30    with beam.Pipeline() as pipeline:
    31      plant_timestamps = (
    32          pipeline
    33          | 'Garden plants' >> beam.Create([
    34              {'name': 'Strawberry', 'season': 1585699200}, # April, 2020
    35              {'name': 'Carrot', 'season': 1590969600},     # June, 2020
    36              {'name': 'Artichoke', 'season': 1583020800},  # March, 2020
    37              {'name': 'Tomato', 'season': 1588291200},     # May, 2020
    38              {'name': 'Potato', 'season': 1598918400},     # September, 2020
    39          ])
    40          | 'With timestamps' >> beam.Map(
    41              lambda plant: beam.window.TimestampedValue(plant, plant['season']))
    42          | 'Get timestamp' >> beam.ParDo(GetTimestamp())
    43          | beam.Map(print)
    44      )
    45      # [END withtimestamps_event_time]
    46      if test:
    47        test(plant_timestamps)
    48  
    49  
    50  def withtimestamps_logical_clock(test=None):
    51    # [START withtimestamps_logical_clock]
    52    import apache_beam as beam
    53  
    54    class GetTimestamp(beam.DoFn):
    55      def process(self, plant, timestamp=beam.DoFn.TimestampParam):
    56        event_id = int(timestamp.micros / 1e6)  # equivalent to seconds
    57        yield '{} - {}'.format(event_id, plant['name'])
    58  
    59    with beam.Pipeline() as pipeline:
    60      plant_events = (
    61          pipeline
    62          | 'Garden plants' >> beam.Create([
    63              {'name': 'Strawberry', 'event_id': 1},
    64              {'name': 'Carrot', 'event_id': 4},
    65              {'name': 'Artichoke', 'event_id': 2},
    66              {'name': 'Tomato', 'event_id': 3},
    67              {'name': 'Potato', 'event_id': 5},
    68          ])
    69          | 'With timestamps' >> beam.Map(lambda plant: \
    70              beam.window.TimestampedValue(plant, plant['event_id']))
    71          | 'Get timestamp' >> beam.ParDo(GetTimestamp())
    72          | beam.Map(print)
    73      )
    74      # [END withtimestamps_logical_clock]
    75      if test:
    76        test(plant_events)
    77  
    78  
    79  def withtimestamps_processing_time(test=None):
    80    # [START withtimestamps_processing_time]
    81    import apache_beam as beam
    82    import time
    83  
    84    class GetTimestamp(beam.DoFn):
    85      def process(self, plant, timestamp=beam.DoFn.TimestampParam):
    86        yield '{} - {}'.format(timestamp.to_utc_datetime(), plant['name'])
    87  
    88    with beam.Pipeline() as pipeline:
    89      plant_processing_times = (
    90          pipeline
    91          | 'Garden plants' >> beam.Create([
    92              {'name': 'Strawberry'},
    93              {'name': 'Carrot'},
    94              {'name': 'Artichoke'},
    95              {'name': 'Tomato'},
    96              {'name': 'Potato'},
    97          ])
    98          | 'With timestamps' >> beam.Map(lambda plant: \
    99              beam.window.TimestampedValue(plant, time.time()))
   100          | 'Get timestamp' >> beam.ParDo(GetTimestamp())
   101          | beam.Map(print)
   102      )
   103      # [END withtimestamps_processing_time]
   104      if test:
   105        test(plant_processing_times)
   106  
   107  
   108  def time_tuple2unix_time():
   109    # [START time_tuple2unix_time]
   110    import time
   111  
   112    time_tuple = time.strptime('2020-03-19 20:50:00', '%Y-%m-%d %H:%M:%S')
   113    unix_time = time.mktime(time_tuple)
   114    # [END time_tuple2unix_time]
   115    return unix_time
   116  
   117  
   118  def datetime2unix_time():
   119    # [START datetime2unix_time]
   120    import time
   121    import datetime
   122  
   123    now = datetime.datetime.now()
   124    time_tuple = now.timetuple()
   125    unix_time = time.mktime(time_tuple)
   126    # [END datetime2unix_time]
   127    return unix_time