github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/elementwise/pardo.py (about)

     1  # coding=utf-8
     2  #
     3  # Licensed to the Apache Software Foundation (ASF) under one or more
     4  # contributor license agreements.  See the NOTICE file distributed with
     5  # this work for additional information regarding copyright ownership.
     6  # The ASF licenses this file to You under the Apache License, Version 2.0
     7  # (the "License"); you may not use this file except in compliance with
     8  # the License.  You may obtain a copy of the License at
     9  #
    10  #    http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  # Unless required by applicable law or agreed to in writing, software
    13  # distributed under the License is distributed on an "AS IS" BASIS,
    14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  # See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  #
    18  
    19  # pytype: skip-file
    20  
    21  
    22  def pardo_dofn(test=None):
    23    # [START pardo_dofn]
    24    import apache_beam as beam
    25  
    26    class SplitWords(beam.DoFn):
    27      def __init__(self, delimiter=','):
    28        self.delimiter = delimiter
    29  
    30      def process(self, text):
    31        for word in text.split(self.delimiter):
    32          yield word
    33  
    34    with beam.Pipeline() as pipeline:
    35      plants = (
    36          pipeline
    37          | 'Gardening plants' >> beam.Create([
    38              '🍓Strawberry,🥕Carrot,🍆Eggplant',
    39              '🍅Tomato,🥔Potato',
    40          ])
    41          | 'Split words' >> beam.ParDo(SplitWords(','))
    42          | beam.Map(print))
    43      # [END pardo_dofn]
    44      if test:
    45        test(plants)
    46  
    47  
    48  def pardo_dofn_params(test=None):
    49    # pylint: disable=line-too-long
    50    # [START pardo_dofn_params]
    51    import apache_beam as beam
    52  
    53    class AnalyzeElement(beam.DoFn):
    54      def process(
    55          self,
    56          elem,
    57          timestamp=beam.DoFn.TimestampParam,
    58          window=beam.DoFn.WindowParam):
    59        yield '\n'.join([
    60            '# timestamp',
    61            'type(timestamp) -> ' + repr(type(timestamp)),
    62            'timestamp.micros -> ' + repr(timestamp.micros),
    63            'timestamp.to_rfc3339() -> ' + repr(timestamp.to_rfc3339()),
    64            'timestamp.to_utc_datetime() -> ' + repr(timestamp.to_utc_datetime()),
    65            '',
    66            '# window',
    67            'type(window) -> ' + repr(type(window)),
    68            'window.start -> {} ({})'.format(
    69                window.start, window.start.to_utc_datetime()),
    70            'window.end -> {} ({})'.format(
    71                window.end, window.end.to_utc_datetime()),
    72            'window.max_timestamp() -> {} ({})'.format(
    73                window.max_timestamp(), window.max_timestamp().to_utc_datetime()),
    74        ])
    75  
    76    with beam.Pipeline() as pipeline:
    77      dofn_params = (
    78          pipeline
    79          | 'Create a single test element' >> beam.Create([':)'])
    80          | 'Add timestamp (Spring equinox 2020)' >>
    81          beam.Map(lambda elem: beam.window.TimestampedValue(elem, 1584675660))
    82          |
    83          'Fixed 30sec windows' >> beam.WindowInto(beam.window.FixedWindows(30))
    84          | 'Analyze element' >> beam.ParDo(AnalyzeElement())
    85          | beam.Map(print))
    86      # [END pardo_dofn_params]
    87      # pylint: enable=line-too-long
    88      if test:
    89        test(dofn_params)
    90  
    91  
    92  def pardo_dofn_methods(test=None):
    93    # [START pardo_dofn_methods]
    94    import apache_beam as beam
    95  
    96    class DoFnMethods(beam.DoFn):
    97      def __init__(self):
    98        print('__init__')
    99        self.window = beam.transforms.window.GlobalWindow()
   100  
   101      def setup(self):
   102        print('setup')
   103  
   104      def start_bundle(self):
   105        print('start_bundle')
   106  
   107      def process(self, element, window=beam.DoFn.WindowParam):
   108        self.window = window
   109        yield '* process: ' + element
   110  
   111      def finish_bundle(self):
   112        yield beam.utils.windowed_value.WindowedValue(
   113            value='* finish_bundle: 🌱🌳🌍',
   114            timestamp=0,
   115            windows=[self.window],
   116        )
   117  
   118      def teardown(self):
   119        print('teardown')
   120  
   121    with beam.Pipeline() as pipeline:
   122      results = (
   123          pipeline
   124          | 'Create inputs' >> beam.Create(['🍓', '🥕', '🍆', '🍅', '🥔'])
   125          | 'DoFn methods' >> beam.ParDo(DoFnMethods())
   126          | beam.Map(print))
   127      # [END pardo_dofn_methods]
   128      if test:
   129        return test(results)