github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/elementwise/flatmap.py (about) 1 # coding=utf-8 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 # pytype: skip-file 20 21 22 def flatmap_simple(test=None): 23 # [START flatmap_simple] 24 import apache_beam as beam 25 26 with beam.Pipeline() as pipeline: 27 plants = ( 28 pipeline 29 | 'Gardening plants' >> beam.Create([ 30 '🍓Strawberry 🥕Carrot 🍆Eggplant', 31 '🍅Tomato 🥔Potato', 32 ]) 33 | 'Split words' >> beam.FlatMap(str.split) 34 | beam.Map(print)) 35 # [END flatmap_simple] 36 if test: 37 test(plants) 38 39 40 def flatmap_function(test=None): 41 # [START flatmap_function] 42 import apache_beam as beam 43 44 def split_words(text): 45 return text.split(',') 46 47 with beam.Pipeline() as pipeline: 48 plants = ( 49 pipeline 50 | 'Gardening plants' >> beam.Create([ 51 '🍓Strawberry,🥕Carrot,🍆Eggplant', 52 '🍅Tomato,🥔Potato', 53 ]) 54 | 'Split words' >> beam.FlatMap(split_words) 55 | beam.Map(print)) 56 # [END flatmap_function] 57 if test: 58 test(plants) 59 60 61 def flatmap_lambda(test=None): 62 # [START flatmap_lambda] 63 import apache_beam as beam 64 65 with beam.Pipeline() as pipeline: 66 plants = ( 67 pipeline 68 | 'Gardening plants' >> beam.Create([ 69 ['🍓Strawberry', '🥕Carrot', '🍆Eggplant'], 70 ['🍅Tomato', '🥔Potato'], 71 ]) 72 | 'Flatten lists' >> beam.FlatMap(lambda elements: elements) 73 | beam.Map(print)) 74 # [END flatmap_lambda] 75 if test: 76 test(plants) 77 78 79 def flatmap_generator(test=None): 80 # [START flatmap_generator] 81 import apache_beam as beam 82 83 def generate_elements(elements): 84 for element in elements: 85 yield element 86 87 with beam.Pipeline() as pipeline: 88 plants = ( 89 pipeline 90 | 'Gardening plants' >> beam.Create([ 91 ['🍓Strawberry', '🥕Carrot', '🍆Eggplant'], 92 ['🍅Tomato', '🥔Potato'], 93 ]) 94 | 'Flatten lists' >> beam.FlatMap(generate_elements) 95 | beam.Map(print)) 96 # [END flatmap_generator] 97 if test: 98 test(plants) 99 100 101 def flatmap_multiple_arguments(test=None): 102 # [START flatmap_multiple_arguments] 103 import apache_beam as beam 104 105 def split_words(text, delimiter=None): 106 return text.split(delimiter) 107 108 with beam.Pipeline() as pipeline: 109 plants = ( 110 pipeline 111 | 'Gardening plants' >> beam.Create([ 112 '🍓Strawberry,🥕Carrot,🍆Eggplant', 113 '🍅Tomato,🥔Potato', 114 ]) 115 | 'Split words' >> beam.FlatMap(split_words, delimiter=',') 116 | beam.Map(print)) 117 # [END flatmap_multiple_arguments] 118 if test: 119 test(plants) 120 121 122 def flatmap_tuple(test=None): 123 # [START flatmap_tuple] 124 import apache_beam as beam 125 126 def format_plant(icon, plant): 127 if icon: 128 yield '{}{}'.format(icon, plant) 129 130 with beam.Pipeline() as pipeline: 131 plants = ( 132 pipeline 133 | 'Gardening plants' >> beam.Create([ 134 ('🍓', 'Strawberry'), 135 ('🥕', 'Carrot'), 136 ('🍆', 'Eggplant'), 137 ('🍅', 'Tomato'), 138 ('🥔', 'Potato'), 139 (None, 'Invalid'), 140 ]) 141 | 'Format' >> beam.FlatMapTuple(format_plant) 142 | beam.Map(print)) 143 # [END flatmap_tuple] 144 if test: 145 test(plants) 146 147 148 def flatmap_side_inputs_singleton(test=None): 149 # [START flatmap_side_inputs_singleton] 150 import apache_beam as beam 151 152 with beam.Pipeline() as pipeline: 153 delimiter = pipeline | 'Create delimiter' >> beam.Create([',']) 154 155 plants = ( 156 pipeline 157 | 'Gardening plants' >> beam.Create([ 158 '🍓Strawberry,🥕Carrot,🍆Eggplant', 159 '🍅Tomato,🥔Potato', 160 ]) 161 | 'Split words' >> beam.FlatMap( 162 lambda text, 163 delimiter: text.split(delimiter), 164 delimiter=beam.pvalue.AsSingleton(delimiter), 165 ) 166 | beam.Map(print)) 167 # [END flatmap_side_inputs_singleton] 168 if test: 169 test(plants) 170 171 172 def flatmap_side_inputs_iter(test=None): 173 # [START flatmap_side_inputs_iter] 174 import apache_beam as beam 175 176 def normalize_and_validate_durations(plant, valid_durations): 177 plant['duration'] = plant['duration'].lower() 178 if plant['duration'] in valid_durations: 179 yield plant 180 181 with beam.Pipeline() as pipeline: 182 valid_durations = pipeline | 'Valid durations' >> beam.Create([ 183 'annual', 184 'biennial', 185 'perennial', 186 ]) 187 188 valid_plants = ( 189 pipeline 190 | 'Gardening plants' >> beam.Create([ 191 { 192 'icon': '🍓', 'name': 'Strawberry', 'duration': 'Perennial' 193 }, 194 { 195 'icon': '🥕', 'name': 'Carrot', 'duration': 'BIENNIAL' 196 }, 197 { 198 'icon': '🍆', 'name': 'Eggplant', 'duration': 'perennial' 199 }, 200 { 201 'icon': '🍅', 'name': 'Tomato', 'duration': 'annual' 202 }, 203 { 204 'icon': '🥔', 'name': 'Potato', 'duration': 'unknown' 205 }, 206 ]) 207 | 'Normalize and validate durations' >> beam.FlatMap( 208 normalize_and_validate_durations, 209 valid_durations=beam.pvalue.AsIter(valid_durations), 210 ) 211 | beam.Map(print)) 212 # [END flatmap_side_inputs_iter] 213 if test: 214 test(valid_plants) 215 216 217 def flatmap_side_inputs_dict(test=None): 218 # [START flatmap_side_inputs_dict] 219 import apache_beam as beam 220 221 def replace_duration_if_valid(plant, durations): 222 if plant['duration'] in durations: 223 plant['duration'] = durations[plant['duration']] 224 yield plant 225 226 with beam.Pipeline() as pipeline: 227 durations = pipeline | 'Durations dict' >> beam.Create([ 228 (0, 'annual'), 229 (1, 'biennial'), 230 (2, 'perennial'), 231 ]) 232 233 valid_plants = ( 234 pipeline 235 | 'Gardening plants' >> beam.Create([ 236 { 237 'icon': '🍓', 'name': 'Strawberry', 'duration': 2 238 }, 239 { 240 'icon': '🥕', 'name': 'Carrot', 'duration': 1 241 }, 242 { 243 'icon': '🍆', 'name': 'Eggplant', 'duration': 2 244 }, 245 { 246 'icon': '🍅', 'name': 'Tomato', 'duration': 0 247 }, 248 { 249 'icon': '🥔', 'name': 'Potato', 'duration': -1 250 }, 251 ]) 252 | 'Replace duration if valid' >> beam.FlatMap( 253 replace_duration_if_valid, 254 durations=beam.pvalue.AsDict(durations), 255 ) 256 | beam.Map(print)) 257 # [END flatmap_side_inputs_dict] 258 if test: 259 test(valid_plants)