github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/complete/estimate_pi.py

github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/complete/estimate_pi.py (about)

     1  # -*- coding: utf-8 -*-
     2  #
     3  # Licensed to the Apache Software Foundation (ASF) under one or more
     4  # contributor license agreements.  See the NOTICE file distributed with
     5  # this work for additional information regarding copyright ownership.
     6  # The ASF licenses this file to You under the Apache License, Version 2.0
     7  # (the "License"); you may not use this file except in compliance with
     8  # the License.  You may obtain a copy of the License at
     9  #
    10  #    http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  # Unless required by applicable law or agreed to in writing, software
    13  # distributed under the License is distributed on an "AS IS" BASIS,
    14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  # See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  #
    18  
    19  """A workflow that uses a simple Monte Carlo method to estimate π.
    20  
    21  The algorithm computes the fraction of points drawn uniformly within the unit
    22  square that also fall in the quadrant of the unit circle that overlaps the
    23  square. A simple area calculation shows that this fraction should be π/4, so
    24  we multiply our counts ratio by four to estimate π.
    25  """
    26  
    27  # pytype: skip-file
    28  
    29  import argparse
    30  import json
    31  import logging
    32  import random
    33  from typing import Any
    34  from typing import Iterable
    35  from typing import Tuple
    36  
    37  import apache_beam as beam
    38  from apache_beam.io import WriteToText
    39  from apache_beam.options.pipeline_options import PipelineOptions
    40  from apache_beam.options.pipeline_options import SetupOptions
    41  
    42  
    43  @beam.typehints.with_output_types(Tuple[int, int, int])
    44  @beam.typehints.with_input_types(int)
    45  def run_trials(runs):
    46    """Run trials and return a 3-tuple representing the results.
    47  
    48    Args:
    49      runs: Number of trial runs to be executed.
    50  
    51    Returns:
    52      A 3-tuple (total trials, inside trials, 0).
    53  
    54    The final zero is needed solely to make sure that the combine_results function
    55    has same type for inputs and outputs (a requirement for combiner functions).
    56    """
    57    inside_runs = 0
    58    for _ in range(runs):
    59      x = random.uniform(0, 1)
    60      y = random.uniform(0, 1)
    61      inside_runs += 1 if x * x + y * y <= 1.0 else 0
    62    return runs, inside_runs, 0
    63  
    64  
    65  @beam.typehints.with_output_types(Tuple[int, int, float])
    66  @beam.typehints.with_input_types(Iterable[Tuple[int, int, Any]])
    67  def combine_results(results):
    68    """Combiner function to sum up trials and compute the estimate.
    69  
    70    Args:
    71      results: An iterable of 3-tuples (total trials, inside trials, ignored).
    72  
    73    Returns:
    74      A 3-tuple containing the sum of total trials, sum of inside trials, and
    75      the probability computed from the two numbers.
    76    """
    77    # TODO(silviuc): Do we guarantee that argument can be iterated repeatedly?
    78    # Should document one way or the other.
    79    total, inside = sum(r[0] for r in results), sum(r[1] for r in results)
    80    return total, inside, 4 * float(inside) / total
    81  
    82  
    83  class JsonCoder(object):
    84    """A JSON coder used to format the final result."""
    85    def encode(self, x):
    86      return json.dumps(x).encode('utf-8')
    87  
    88  
    89  class EstimatePiTransform(beam.PTransform):
    90    """Runs 10M trials, and combine the results to estimate pi."""
    91    def __init__(self, tries_per_work_item=100000):
    92      self.tries_per_work_item = tries_per_work_item
    93  
    94    def expand(self, pcoll):
    95      # A hundred work items of a hundred thousand tries each.
    96      return (
    97          pcoll
    98          | 'Initialize' >> beam.Create(
    99              [self.tries_per_work_item] * 100).with_output_types(int)
   100          | 'Run trials' >> beam.Map(run_trials)
   101          | 'Sum' >> beam.CombineGlobally(combine_results).without_defaults())
   102  
   103  
   104  def run(argv=None):
   105  
   106    parser = argparse.ArgumentParser()
   107    parser.add_argument(
   108        '--output', required=True, help='Output file to write results to.')
   109    known_args, pipeline_args = parser.parse_known_args(argv)
   110    # We use the save_main_session option because one or more DoFn's in this
   111    # workflow rely on global context (e.g., a module imported at module level).
   112    pipeline_options = PipelineOptions(pipeline_args)
   113    pipeline_options.view_as(SetupOptions).save_main_session = True
   114    with beam.Pipeline(options=pipeline_options) as p:
   115  
   116      (  # pylint: disable=expression-not-assigned
   117          p
   118          | EstimatePiTransform()
   119          | WriteToText(known_args.output, coder=JsonCoder()))
   120  
   121  
   122  if __name__ == '__main__':
   123    logging.getLogger().setLevel(logging.INFO)
   124    run()