github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/elementwise/pardo.py (about) 1 # coding=utf-8 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 # pytype: skip-file 20 21 22 def pardo_dofn(test=None): 23 # [START pardo_dofn] 24 import apache_beam as beam 25 26 class SplitWords(beam.DoFn): 27 def __init__(self, delimiter=','): 28 self.delimiter = delimiter 29 30 def process(self, text): 31 for word in text.split(self.delimiter): 32 yield word 33 34 with beam.Pipeline() as pipeline: 35 plants = ( 36 pipeline 37 | 'Gardening plants' >> beam.Create([ 38 '🍓Strawberry,🥕Carrot,🍆Eggplant', 39 '🍅Tomato,🥔Potato', 40 ]) 41 | 'Split words' >> beam.ParDo(SplitWords(',')) 42 | beam.Map(print)) 43 # [END pardo_dofn] 44 if test: 45 test(plants) 46 47 48 def pardo_dofn_params(test=None): 49 # pylint: disable=line-too-long 50 # [START pardo_dofn_params] 51 import apache_beam as beam 52 53 class AnalyzeElement(beam.DoFn): 54 def process( 55 self, 56 elem, 57 timestamp=beam.DoFn.TimestampParam, 58 window=beam.DoFn.WindowParam): 59 yield '\n'.join([ 60 '# timestamp', 61 'type(timestamp) -> ' + repr(type(timestamp)), 62 'timestamp.micros -> ' + repr(timestamp.micros), 63 'timestamp.to_rfc3339() -> ' + repr(timestamp.to_rfc3339()), 64 'timestamp.to_utc_datetime() -> ' + repr(timestamp.to_utc_datetime()), 65 '', 66 '# window', 67 'type(window) -> ' + repr(type(window)), 68 'window.start -> {} ({})'.format( 69 window.start, window.start.to_utc_datetime()), 70 'window.end -> {} ({})'.format( 71 window.end, window.end.to_utc_datetime()), 72 'window.max_timestamp() -> {} ({})'.format( 73 window.max_timestamp(), window.max_timestamp().to_utc_datetime()), 74 ]) 75 76 with beam.Pipeline() as pipeline: 77 dofn_params = ( 78 pipeline 79 | 'Create a single test element' >> beam.Create([':)']) 80 | 'Add timestamp (Spring equinox 2020)' >> 81 beam.Map(lambda elem: beam.window.TimestampedValue(elem, 1584675660)) 82 | 83 'Fixed 30sec windows' >> beam.WindowInto(beam.window.FixedWindows(30)) 84 | 'Analyze element' >> beam.ParDo(AnalyzeElement()) 85 | beam.Map(print)) 86 # [END pardo_dofn_params] 87 # pylint: enable=line-too-long 88 if test: 89 test(dofn_params) 90 91 92 def pardo_dofn_methods(test=None): 93 # [START pardo_dofn_methods] 94 import apache_beam as beam 95 96 class DoFnMethods(beam.DoFn): 97 def __init__(self): 98 print('__init__') 99 self.window = beam.transforms.window.GlobalWindow() 100 101 def setup(self): 102 print('setup') 103 104 def start_bundle(self): 105 print('start_bundle') 106 107 def process(self, element, window=beam.DoFn.WindowParam): 108 self.window = window 109 yield '* process: ' + element 110 111 def finish_bundle(self): 112 yield beam.utils.windowed_value.WindowedValue( 113 value='* finish_bundle: 🌱🌳🌍', 114 timestamp=0, 115 windows=[self.window], 116 ) 117 118 def teardown(self): 119 print('teardown') 120 121 with beam.Pipeline() as pipeline: 122 results = ( 123 pipeline 124 | 'Create inputs' >> beam.Create(['🍓', '🥕', '🍆', '🍅', '🥔']) 125 | 'DoFn methods' >> beam.ParDo(DoFnMethods()) 126 | beam.Map(print)) 127 # [END pardo_dofn_methods] 128 if test: 129 return test(results)