github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/elementwise/map.py (about) 1 # coding=utf-8 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 # pytype: skip-file 20 21 22 def map_simple(test=None): 23 # [START map_simple] 24 import apache_beam as beam 25 26 with beam.Pipeline() as pipeline: 27 plants = ( 28 pipeline 29 | 'Gardening plants' >> beam.Create([ 30 ' 🍓Strawberry \n', 31 ' 🥕Carrot \n', 32 ' 🍆Eggplant \n', 33 ' 🍅Tomato \n', 34 ' 🥔Potato \n', 35 ]) 36 | 'Strip' >> beam.Map(str.strip) 37 | beam.Map(print)) 38 # [END map_simple] 39 if test: 40 test(plants) 41 42 43 def map_function(test=None): 44 # [START map_function] 45 import apache_beam as beam 46 47 def strip_header_and_newline(text): 48 return text.strip('# \n') 49 50 with beam.Pipeline() as pipeline: 51 plants = ( 52 pipeline 53 | 'Gardening plants' >> beam.Create([ 54 '# 🍓Strawberry\n', 55 '# 🥕Carrot\n', 56 '# 🍆Eggplant\n', 57 '# 🍅Tomato\n', 58 '# 🥔Potato\n', 59 ]) 60 | 'Strip header' >> beam.Map(strip_header_and_newline) 61 | beam.Map(print)) 62 # [END map_function] 63 if test: 64 test(plants) 65 66 67 def map_lambda(test=None): 68 # [START map_lambda] 69 import apache_beam as beam 70 71 with beam.Pipeline() as pipeline: 72 plants = ( 73 pipeline 74 | 'Gardening plants' >> beam.Create([ 75 '# 🍓Strawberry\n', 76 '# 🥕Carrot\n', 77 '# 🍆Eggplant\n', 78 '# 🍅Tomato\n', 79 '# 🥔Potato\n', 80 ]) 81 | 'Strip header' >> beam.Map(lambda text: text.strip('# \n')) 82 | beam.Map(print)) 83 # [END map_lambda] 84 if test: 85 test(plants) 86 87 88 def map_multiple_arguments(test=None): 89 # [START map_multiple_arguments] 90 import apache_beam as beam 91 92 def strip(text, chars=None): 93 return text.strip(chars) 94 95 with beam.Pipeline() as pipeline: 96 plants = ( 97 pipeline 98 | 'Gardening plants' >> beam.Create([ 99 '# 🍓Strawberry\n', 100 '# 🥕Carrot\n', 101 '# 🍆Eggplant\n', 102 '# 🍅Tomato\n', 103 '# 🥔Potato\n', 104 ]) 105 | 'Strip header' >> beam.Map(strip, chars='# \n') 106 | beam.Map(print)) 107 # [END map_multiple_arguments] 108 if test: 109 test(plants) 110 111 112 def map_tuple(test=None): 113 # [START map_tuple] 114 import apache_beam as beam 115 116 with beam.Pipeline() as pipeline: 117 plants = ( 118 pipeline 119 | 'Gardening plants' >> beam.Create([ 120 ('🍓', 'Strawberry'), 121 ('🥕', 'Carrot'), 122 ('🍆', 'Eggplant'), 123 ('🍅', 'Tomato'), 124 ('🥔', 'Potato'), 125 ]) 126 | 'Format' >> 127 beam.MapTuple(lambda icon, plant: '{}{}'.format(icon, plant)) 128 | beam.Map(print)) 129 # [END map_tuple] 130 if test: 131 test(plants) 132 133 134 def map_side_inputs_singleton(test=None): 135 # [START map_side_inputs_singleton] 136 import apache_beam as beam 137 138 with beam.Pipeline() as pipeline: 139 chars = pipeline | 'Create chars' >> beam.Create(['# \n']) 140 141 plants = ( 142 pipeline 143 | 'Gardening plants' >> beam.Create([ 144 '# 🍓Strawberry\n', 145 '# 🥕Carrot\n', 146 '# 🍆Eggplant\n', 147 '# 🍅Tomato\n', 148 '# 🥔Potato\n', 149 ]) 150 | 'Strip header' >> beam.Map( 151 lambda text, 152 chars: text.strip(chars), 153 chars=beam.pvalue.AsSingleton(chars), 154 ) 155 | beam.Map(print)) 156 # [END map_side_inputs_singleton] 157 if test: 158 test(plants) 159 160 161 def map_side_inputs_iter(test=None): 162 # [START map_side_inputs_iter] 163 import apache_beam as beam 164 165 with beam.Pipeline() as pipeline: 166 chars = pipeline | 'Create chars' >> beam.Create(['#', ' ', '\n']) 167 168 plants = ( 169 pipeline 170 | 'Gardening plants' >> beam.Create([ 171 '# 🍓Strawberry\n', 172 '# 🥕Carrot\n', 173 '# 🍆Eggplant\n', 174 '# 🍅Tomato\n', 175 '# 🥔Potato\n', 176 ]) 177 | 'Strip header' >> beam.Map( 178 lambda text, 179 chars: text.strip(''.join(chars)), 180 chars=beam.pvalue.AsIter(chars), 181 ) 182 | beam.Map(print)) 183 # [END map_side_inputs_iter] 184 if test: 185 test(plants) 186 187 188 def map_side_inputs_dict(test=None): 189 # [START map_side_inputs_dict] 190 import apache_beam as beam 191 192 def replace_duration(plant, durations): 193 plant['duration'] = durations[plant['duration']] 194 return plant 195 196 with beam.Pipeline() as pipeline: 197 durations = pipeline | 'Durations' >> beam.Create([ 198 (0, 'annual'), 199 (1, 'biennial'), 200 (2, 'perennial'), 201 ]) 202 203 plant_details = ( 204 pipeline 205 | 'Gardening plants' >> beam.Create([ 206 { 207 'icon': '🍓', 'name': 'Strawberry', 'duration': 2 208 }, 209 { 210 'icon': '🥕', 'name': 'Carrot', 'duration': 1 211 }, 212 { 213 'icon': '🍆', 'name': 'Eggplant', 'duration': 2 214 }, 215 { 216 'icon': '🍅', 'name': 'Tomato', 'duration': 0 217 }, 218 { 219 'icon': '🥔', 'name': 'Potato', 'duration': 2 220 }, 221 ]) 222 | 'Replace duration' >> beam.Map( 223 replace_duration, 224 durations=beam.pvalue.AsDict(durations), 225 ) 226 | beam.Map(print)) 227 # [END map_side_inputs_dict] 228 if test: 229 test(plant_details)