github.com/pachyderm/pachyderm@v1.13.4/examples/spark/pi/estimate_pi.py (about)

     1  #!/usr/bin/env pyspark
     2  
     3  """
     4  Estimate Pi
     5  
     6  This uses a random "dart throwing" approach, with sampling spread across a Spark cluster, then writes out result into PFS.
     7  
     8  The number of samples to take is sourced from a config file versioned in a Pachyderm repo
     9  """
    10  
    11  import random
    12  
    13  # check if sc is already defined from pyspark
    14  try:
    15      sc
    16  except NameError:
    17      from pyspark import SparkContext
    18      sc = SparkContext(appName="Estimate_Pi")
    19  
    20  def inside(p):
    21      x, y = random.random(), random.random()
    22      return x*x + y*y < 1
    23  
    24  try:
    25      num_samples = int(open('/pfs/estimate_pi_config/num_samples').read())
    26  except:
    27      print 'no config found in pfs, falling back to 100000 samples'
    28      num_samples = 100000
    29  
    30  count = sc.parallelize(range(0, num_samples)).filter(inside).count()
    31  
    32  pi = 4.0 * count / num_samples
    33  
    34  print 'pi estimate:', pi
    35  open('/pfs/out/pi_estimate', 'w').write(str(pi))
    36  
    37  # vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4