github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/snippets/transforms/elementwise/map.py (about)

     1  # coding=utf-8
     2  #
     3  # Licensed to the Apache Software Foundation (ASF) under one or more
     4  # contributor license agreements.  See the NOTICE file distributed with
     5  # this work for additional information regarding copyright ownership.
     6  # The ASF licenses this file to You under the Apache License, Version 2.0
     7  # (the "License"); you may not use this file except in compliance with
     8  # the License.  You may obtain a copy of the License at
     9  #
    10  #    http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  # Unless required by applicable law or agreed to in writing, software
    13  # distributed under the License is distributed on an "AS IS" BASIS,
    14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  # See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  #
    18  
    19  # pytype: skip-file
    20  
    21  
    22  def map_simple(test=None):
    23    # [START map_simple]
    24    import apache_beam as beam
    25  
    26    with beam.Pipeline() as pipeline:
    27      plants = (
    28          pipeline
    29          | 'Gardening plants' >> beam.Create([
    30              '   🍓Strawberry   \n',
    31              '   🥕Carrot   \n',
    32              '   🍆Eggplant   \n',
    33              '   🍅Tomato   \n',
    34              '   🥔Potato   \n',
    35          ])
    36          | 'Strip' >> beam.Map(str.strip)
    37          | beam.Map(print))
    38      # [END map_simple]
    39      if test:
    40        test(plants)
    41  
    42  
    43  def map_function(test=None):
    44    # [START map_function]
    45    import apache_beam as beam
    46  
    47    def strip_header_and_newline(text):
    48      return text.strip('# \n')
    49  
    50    with beam.Pipeline() as pipeline:
    51      plants = (
    52          pipeline
    53          | 'Gardening plants' >> beam.Create([
    54              '# 🍓Strawberry\n',
    55              '# 🥕Carrot\n',
    56              '# 🍆Eggplant\n',
    57              '# 🍅Tomato\n',
    58              '# 🥔Potato\n',
    59          ])
    60          | 'Strip header' >> beam.Map(strip_header_and_newline)
    61          | beam.Map(print))
    62      # [END map_function]
    63      if test:
    64        test(plants)
    65  
    66  
    67  def map_lambda(test=None):
    68    # [START map_lambda]
    69    import apache_beam as beam
    70  
    71    with beam.Pipeline() as pipeline:
    72      plants = (
    73          pipeline
    74          | 'Gardening plants' >> beam.Create([
    75              '# 🍓Strawberry\n',
    76              '# 🥕Carrot\n',
    77              '# 🍆Eggplant\n',
    78              '# 🍅Tomato\n',
    79              '# 🥔Potato\n',
    80          ])
    81          | 'Strip header' >> beam.Map(lambda text: text.strip('# \n'))
    82          | beam.Map(print))
    83      # [END map_lambda]
    84      if test:
    85        test(plants)
    86  
    87  
    88  def map_multiple_arguments(test=None):
    89    # [START map_multiple_arguments]
    90    import apache_beam as beam
    91  
    92    def strip(text, chars=None):
    93      return text.strip(chars)
    94  
    95    with beam.Pipeline() as pipeline:
    96      plants = (
    97          pipeline
    98          | 'Gardening plants' >> beam.Create([
    99              '# 🍓Strawberry\n',
   100              '# 🥕Carrot\n',
   101              '# 🍆Eggplant\n',
   102              '# 🍅Tomato\n',
   103              '# 🥔Potato\n',
   104          ])
   105          | 'Strip header' >> beam.Map(strip, chars='# \n')
   106          | beam.Map(print))
   107      # [END map_multiple_arguments]
   108      if test:
   109        test(plants)
   110  
   111  
   112  def map_tuple(test=None):
   113    # [START map_tuple]
   114    import apache_beam as beam
   115  
   116    with beam.Pipeline() as pipeline:
   117      plants = (
   118          pipeline
   119          | 'Gardening plants' >> beam.Create([
   120              ('🍓', 'Strawberry'),
   121              ('🥕', 'Carrot'),
   122              ('🍆', 'Eggplant'),
   123              ('🍅', 'Tomato'),
   124              ('🥔', 'Potato'),
   125          ])
   126          | 'Format' >>
   127          beam.MapTuple(lambda icon, plant: '{}{}'.format(icon, plant))
   128          | beam.Map(print))
   129      # [END map_tuple]
   130      if test:
   131        test(plants)
   132  
   133  
   134  def map_side_inputs_singleton(test=None):
   135    # [START map_side_inputs_singleton]
   136    import apache_beam as beam
   137  
   138    with beam.Pipeline() as pipeline:
   139      chars = pipeline | 'Create chars' >> beam.Create(['# \n'])
   140  
   141      plants = (
   142          pipeline
   143          | 'Gardening plants' >> beam.Create([
   144              '# 🍓Strawberry\n',
   145              '# 🥕Carrot\n',
   146              '# 🍆Eggplant\n',
   147              '# 🍅Tomato\n',
   148              '# 🥔Potato\n',
   149          ])
   150          | 'Strip header' >> beam.Map(
   151              lambda text,
   152              chars: text.strip(chars),
   153              chars=beam.pvalue.AsSingleton(chars),
   154          )
   155          | beam.Map(print))
   156      # [END map_side_inputs_singleton]
   157      if test:
   158        test(plants)
   159  
   160  
   161  def map_side_inputs_iter(test=None):
   162    # [START map_side_inputs_iter]
   163    import apache_beam as beam
   164  
   165    with beam.Pipeline() as pipeline:
   166      chars = pipeline | 'Create chars' >> beam.Create(['#', ' ', '\n'])
   167  
   168      plants = (
   169          pipeline
   170          | 'Gardening plants' >> beam.Create([
   171              '# 🍓Strawberry\n',
   172              '# 🥕Carrot\n',
   173              '# 🍆Eggplant\n',
   174              '# 🍅Tomato\n',
   175              '# 🥔Potato\n',
   176          ])
   177          | 'Strip header' >> beam.Map(
   178              lambda text,
   179              chars: text.strip(''.join(chars)),
   180              chars=beam.pvalue.AsIter(chars),
   181          )
   182          | beam.Map(print))
   183      # [END map_side_inputs_iter]
   184      if test:
   185        test(plants)
   186  
   187  
   188  def map_side_inputs_dict(test=None):
   189    # [START map_side_inputs_dict]
   190    import apache_beam as beam
   191  
   192    def replace_duration(plant, durations):
   193      plant['duration'] = durations[plant['duration']]
   194      return plant
   195  
   196    with beam.Pipeline() as pipeline:
   197      durations = pipeline | 'Durations' >> beam.Create([
   198          (0, 'annual'),
   199          (1, 'biennial'),
   200          (2, 'perennial'),
   201      ])
   202  
   203      plant_details = (
   204          pipeline
   205          | 'Gardening plants' >> beam.Create([
   206              {
   207                  'icon': '🍓', 'name': 'Strawberry', 'duration': 2
   208              },
   209              {
   210                  'icon': '🥕', 'name': 'Carrot', 'duration': 1
   211              },
   212              {
   213                  'icon': '🍆', 'name': 'Eggplant', 'duration': 2
   214              },
   215              {
   216                  'icon': '🍅', 'name': 'Tomato', 'duration': 0
   217              },
   218              {
   219                  'icon': '🥔', 'name': 'Potato', 'duration': 2
   220              },
   221          ])
   222          | 'Replace duration' >> beam.Map(
   223              replace_duration,
   224              durations=beam.pvalue.AsDict(durations),
   225          )
   226          | beam.Map(print))
   227      # [END map_side_inputs_dict]
   228      if test:
   229        test(plant_details)