github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/complete/estimate_pi.py (about) 1 # -*- coding: utf-8 -*- 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 """A workflow that uses a simple Monte Carlo method to estimate π. 20 21 The algorithm computes the fraction of points drawn uniformly within the unit 22 square that also fall in the quadrant of the unit circle that overlaps the 23 square. A simple area calculation shows that this fraction should be π/4, so 24 we multiply our counts ratio by four to estimate π. 25 """ 26 27 # pytype: skip-file 28 29 import argparse 30 import json 31 import logging 32 import random 33 from typing import Any 34 from typing import Iterable 35 from typing import Tuple 36 37 import apache_beam as beam 38 from apache_beam.io import WriteToText 39 from apache_beam.options.pipeline_options import PipelineOptions 40 from apache_beam.options.pipeline_options import SetupOptions 41 42 43 @beam.typehints.with_output_types(Tuple[int, int, int]) 44 @beam.typehints.with_input_types(int) 45 def run_trials(runs): 46 """Run trials and return a 3-tuple representing the results. 47 48 Args: 49 runs: Number of trial runs to be executed. 50 51 Returns: 52 A 3-tuple (total trials, inside trials, 0). 53 54 The final zero is needed solely to make sure that the combine_results function 55 has same type for inputs and outputs (a requirement for combiner functions). 56 """ 57 inside_runs = 0 58 for _ in range(runs): 59 x = random.uniform(0, 1) 60 y = random.uniform(0, 1) 61 inside_runs += 1 if x * x + y * y <= 1.0 else 0 62 return runs, inside_runs, 0 63 64 65 @beam.typehints.with_output_types(Tuple[int, int, float]) 66 @beam.typehints.with_input_types(Iterable[Tuple[int, int, Any]]) 67 def combine_results(results): 68 """Combiner function to sum up trials and compute the estimate. 69 70 Args: 71 results: An iterable of 3-tuples (total trials, inside trials, ignored). 72 73 Returns: 74 A 3-tuple containing the sum of total trials, sum of inside trials, and 75 the probability computed from the two numbers. 76 """ 77 # TODO(silviuc): Do we guarantee that argument can be iterated repeatedly? 78 # Should document one way or the other. 79 total, inside = sum(r[0] for r in results), sum(r[1] for r in results) 80 return total, inside, 4 * float(inside) / total 81 82 83 class JsonCoder(object): 84 """A JSON coder used to format the final result.""" 85 def encode(self, x): 86 return json.dumps(x).encode('utf-8') 87 88 89 class EstimatePiTransform(beam.PTransform): 90 """Runs 10M trials, and combine the results to estimate pi.""" 91 def __init__(self, tries_per_work_item=100000): 92 self.tries_per_work_item = tries_per_work_item 93 94 def expand(self, pcoll): 95 # A hundred work items of a hundred thousand tries each. 96 return ( 97 pcoll 98 | 'Initialize' >> beam.Create( 99 [self.tries_per_work_item] * 100).with_output_types(int) 100 | 'Run trials' >> beam.Map(run_trials) 101 | 'Sum' >> beam.CombineGlobally(combine_results).without_defaults()) 102 103 104 def run(argv=None): 105 106 parser = argparse.ArgumentParser() 107 parser.add_argument( 108 '--output', required=True, help='Output file to write results to.') 109 known_args, pipeline_args = parser.parse_known_args(argv) 110 # We use the save_main_session option because one or more DoFn's in this 111 # workflow rely on global context (e.g., a module imported at module level). 112 pipeline_options = PipelineOptions(pipeline_args) 113 pipeline_options.view_as(SetupOptions).save_main_session = True 114 with beam.Pipeline(options=pipeline_options) as p: 115 116 ( # pylint: disable=expression-not-assigned 117 p 118 | EstimatePiTransform() 119 | WriteToText(known_args.output, coder=JsonCoder())) 120 121 122 if __name__ == '__main__': 123 logging.getLogger().setLevel(logging.INFO) 124 run()