github.com/pachyderm/pachyderm@v1.13.4/examples/spark/pi/estimate_pi.py (about) 1 #!/usr/bin/env pyspark 2 3 """ 4 Estimate Pi 5 6 This uses a random "dart throwing" approach, with sampling spread across a Spark cluster, then writes out result into PFS. 7 8 The number of samples to take is sourced from a config file versioned in a Pachyderm repo 9 """ 10 11 import random 12 13 # check if sc is already defined from pyspark 14 try: 15 sc 16 except NameError: 17 from pyspark import SparkContext 18 sc = SparkContext(appName="Estimate_Pi") 19 20 def inside(p): 21 x, y = random.random(), random.random() 22 return x*x + y*y < 1 23 24 try: 25 num_samples = int(open('/pfs/estimate_pi_config/num_samples').read()) 26 except: 27 print 'no config found in pfs, falling back to 100000 samples' 28 num_samples = 100000 29 30 count = sc.parallelize(range(0, num_samples)).filter(inside).count() 31 32 pi = 4.0 * count / num_samples 33 34 print 'pi estimate:', pi 35 open('/pfs/out/pi_estimate', 'w').write(str(pi)) 36 37 # vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4