golang.org/x/exp@v0.0.0-20240506185415-9bf2ced13842/shootout/timing.log (about)

     1  All tests on r45 or r70
     2  
     3  Aug 3 2009
     4  
     5  First version of fasta. Translation of fasta.c, fetched from
     6  	http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4
     7  
     8  fasta -n 25000000
     9  	gcc -O2 fasta.c	5.98u 0.00s 6.01r
    10  	gccgo -O2 fasta.go	8.82u 0.02s 8.85r
    11  	6g fasta.go	13.50u 0.02s 13.53r
    12  	6g -B fata.go	12.99u 0.02s 13.02r
    13  
    14  Aug 4 2009
    15  [added timing.sh]
    16  
    17  # myrandom:
    18  #   hand-written optimization of integer division
    19  #   use int32->float conversion
    20  fasta -n 25000000
    21  	# probably I/O library inefficiencies
    22  	gcc -O2 fasta.c	5.99u 0.00s 6.00r 
    23  	gccgo -O2 fasta.go	8.82u 0.02s 8.85r
    24  	gc fasta	10.70u 0.00s 10.77r
    25  	gc_B fasta	10.09u 0.03s 10.12r
    26  
    27  reverse-complement < output-of-fasta-25000000
    28  	# we don't know - memory cache behavior?
    29  	gcc -O2 reverse-complement.c	2.04u 0.94s 10.54r
    30  	gccgo -O2 reverse-complement.go	6.54u 0.63s 7.17r
    31  	gc reverse-complement	6.55u 0.70s 7.26r
    32  	gc_B reverse-complement	6.32u 0.70s 7.10r
    33  
    34  nbody 50000000
    35  	# math.Sqrt needs to be in assembly; inlining is probably the other 50%
    36  	gcc -O2 nbody.c	21.61u 0.01s 24.80r
    37  	gccgo -O2 nbody.go	118.55u 0.02s 120.32r
    38  	gc nbody	100.84u 0.00s 100.85r
    39  	gc_B nbody	103.33u 0.00s 103.39r
    40  [
    41  hacked Sqrt in assembler
    42  	gc nbody	31.97u 0.00s 32.01r
    43  ]
    44  
    45  binary-tree 15 # too slow to use 20
    46  	# memory allocation and garbage collection
    47  	gcc -O2 binary-tree.c -lm	0.86u 0.00s 0.87r
    48  	gccgo -O2 binary-tree.go	1.69u 0.46s 2.15r
    49  	gccgo -O2 binary-tree-freelist.go	8.48u 0.00s 8.48r
    50  	gc binary-tree	9.60u 0.01s 9.62r
    51  	gc binary-tree-freelist	0.48u 0.01s 0.50r
    52  
    53  August 5, 2009
    54  
    55  fannkuch 12
    56  	# bounds checking is half the difference
    57  	# rest might be registerization
    58  	gcc -O2 fannkuch.c	60.09u 0.01s 60.32r
    59  	gccgo -O2 fannkuch.go	64.89u 0.00s 64.92r
    60  	gc fannkuch	124.59u 0.00s 124.67r
    61  	gc_B fannkuch	91.14u 0.00s 91.16r
    62  
    63  regex-dna 100000
    64  	# regexp code is slow on trivial regexp
    65  	gcc -O2 regex-dna.c -lpcre	0.92u 0.00s 0.99r
    66  	gc regexp-dna	26.94u 0.18s 28.75r
    67  	gc_B regexp-dna	26.51u 0.09s 26.75r
    68  
    69  spectral-norm 5500
    70  	gcc -O2 spectral-norm.c -lm	11.54u 0.00s 11.55r
    71  	gccgo -O2 spectral-norm.go	12.20u 0.00s 12.23r
    72  	gc spectral-norm	50.23u 0.00s 50.36r
    73  	gc_B spectral-norm	49.69u 0.01s 49.83r
    74  	gc spectral-norm-parallel	24.47u 0.03s 11.05r  # has shift >>1 not div /2
    75  	[using >>1 instead of /2 : gc gives 24.33u 0.00s 24.33r]
    76  
    77  August 6, 2009
    78  
    79  k-nucleotide 5000000
    80  	# string maps are slower than glib string maps
    81  	gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0	k-nucleotide.c: 10.72u 0.01s 10.74r
    82  	gccgo -O2 k-nucleotide.go	21.64u 0.83s 22.78r
    83  	gc k-nucleotide	16.08u 0.06s 16.50r
    84  	gc_B k-nucleotide	17.32u 0.02s 17.37r
    85  
    86  mandelbrot 5500
    87  	# floating point code generator should use more registers
    88  	gcc -O2 mandelbrot.c	56.13u 0.02s 56.17r
    89  	gccgo -O2 mandelbrot.go	57.49u 0.01s 57.51r
    90  	gc mandelbrot	74.32u 0.00s 74.35r
    91  	gc_B mandelbrot	74.28u 0.01s 74.31r
    92  
    93  meteor 2100
    94  	# we don't know
    95  	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
    96  	gccgo -O2 meteor-contest.go	0.12u 0.00s 0.14r
    97  	gc meteor-contest	0.24u 0.00s 0.26r
    98  	gc_B meteor-contest	0.23u 0.00s 0.24r
    99  
   100  pidigits 10000
   101  	# bignum is slower than gmp
   102  	gcc -O2 pidigits.c -lgmp	2.60u 0.00s 2.62r
   103  	gc pidigits	77.69u 0.14s 78.18r
   104  	gc_B pidigits	74.26u 0.18s 75.41r
   105  	gc_B pidigits	68.48u 0.20s 69.31r   # special case: no bounds checking in bignum
   106  
   107  August 7 2009
   108  
   109  # New gc does better division by powers of 2.  Significant improvements:
   110  
   111  spectral-norm 5500
   112  	# floating point code generator should use more registers; possibly inline evalA
   113  	gcc -O2 spectral-norm.c -lm	11.50u 0.00s 11.50r
   114  	gccgo -O2 spectral-norm.go	12.02u 0.00s 12.02r
   115  	gc spectral-norm	23.98u 0.00s 24.00r	# new time is 0.48 times old time, 52% faster
   116  	gc_B spectral-norm	23.71u 0.01s 23.72r	# ditto
   117  	gc spectral-norm-parallel	24.04u 0.00s 6.26r  # /2 put back.  note: 4x faster (on r70, idle)
   118  
   119  k-nucleotide 1000000
   120  	# string maps are slower than glib string maps
   121  	gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0	10.82u 0.04s 10.87r
   122  	gccgo -O2 k-nucleotide.go	22.73u 0.89s 23.63r
   123  	gc k-nucleotide	15.97u 0.03s 16.04r
   124  	gc_B k-nucleotide	15.86u 0.06s 15.93r	# 8.5% faster, but probably due to weird cache effeccts in previous version
   125  
   126  pidigits 10000
   127  	# bignum is slower than gmp
   128  	gcc -O2 pidigits.c -lgmp	2.58u 0.00s 2.58r
   129  	gc pidigits	71.24u 0.04s 71.28r	# 8.5% faster
   130  	gc_B pidigits	71.25u 0.03s 71.29r	# 4% faster
   131  
   132  threadring 50000000
   133  	gcc -O2 threadring.c -lpthread	35.51u 160.21s 199.50r
   134  	gccgo -O2 threadring.go	90.33u 459.95s 448.03r
   135  	gc threadring	33.11u 0.00s 33.14r
   136  	GOMAXPROCS=4 gc threadring	114.48u 226.65s 371.59r
   137  	# change wait code to do <-make(chan int) instead of time.Sleep
   138  	gc threadring	28.41u 0.01s 29.35r
   139  	GOMAXPROCS=4 gc threadring	112.59u 232.83s 384.72r
   140  	
   141  chameneos 6000000
   142  	gcc -O2 chameneosredux.c -lpthread	18.14u 276.52s 76.93r
   143  	gc chameneosredux	20.19u 0.01s 20.23r
   144  
   145  Aug 10 2009
   146  
   147  # new 6g with better fp registers, fast div and mod of integers
   148  # complete set of timings listed. significant changes marked ***
   149  
   150  fasta -n 25000000
   151  	# probably I/O library inefficiencies
   152  	gcc -O2 fasta.c	5.96u 0.00s 5.97r
   153  	gc fasta	10.59u 0.01s 10.61r
   154  	gc_B fasta	9.92u 0.02s 9.95r
   155  
   156  reverse-complement < output-of-fasta-25000000
   157  	# we don't know - memory cache behavior?
   158  	gcc -O2 reverse-complement.c	1.96u 1.56s 16.23r
   159  	gccgo -O2 reverse-complement.go	6.41u 0.62s 7.05r
   160  	gc reverse-complement	6.46u 0.70s 7.17r
   161  	gc_B reverse-complement	6.22u 0.72s 6.95r
   162  
   163  nbody 50000000
   164  	# math.Sqrt needs to be in assembly; inlining is probably the other 50%
   165  	gcc -O2 nbody.c	21.26u 0.01s 21.28r
   166  	gccgo -O2 nbody.go	116.68u 0.07s 116.80r
   167  	gc nbody	86.64u 0.01s 86.68r	# -14%
   168  	gc_B nbody	85.72u 0.02s 85.77r	# *** -17%
   169  
   170  binary-tree 15 # too slow to use 20
   171  	# memory allocation and garbage collection
   172  	gcc -O2 binary-tree.c -lm	0.87u 0.00s 0.87r
   173  	gccgo -O2 binary-tree.go	1.61u 0.47s 2.09r
   174  	gccgo -O2 binary-tree-freelist.go	0.00u 0.00s 0.01r
   175  	gc binary-tree	9.11u 0.01s 9.13r	# *** -5%
   176  	gc binary-tree-freelist	0.47u 0.01s 0.48r
   177  
   178  fannkuch 12
   179  	# bounds checking is half the difference
   180  	# rest might be registerization
   181  	gcc -O2 fannkuch.c	59.92u 0.00s 59.94r
   182  	gccgo -O2 fannkuch.go	65.54u 0.00s 65.58r
   183  	gc fannkuch	123.98u 0.01s 124.04r
   184  	gc_B fannkuch	90.75u 0.00s 90.78r
   185  
   186  regex-dna 100000
   187  	# regexp code is slow on trivial regexp
   188  	gcc -O2 regex-dna.c -lpcre	0.91u 0.00s 0.92r
   189  	gc regex-dna	27.25u 0.02s 27.28r
   190  	gc_B regex-dna	29.51u 0.03s 29.55r
   191  
   192  spectral-norm 5500
   193  	# possibly inline evalA
   194  	gcc -O2 spectral-norm.c -lm	11.57u 0.00s 11.57r
   195  	gccgo -O2 spectral-norm.go	12.07u 0.01s 12.08r
   196  	gc spectral-norm	23.99u 0.00s 24.00r
   197  	gc_B spectral-norm	23.73u 0.00s 23.75r
   198  
   199  k-nucleotide 1000000
   200  	# string maps are slower than glib string maps
   201  	gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0	10.63u 0.02s 10.69r
   202  	gccgo -O2 k-nucleotide.go	23.19u 0.91s 24.12r
   203  	gc k-nucleotide	16.73u 0.04s 16.78r	# *** +5% (but this one seems to vary by more than that)
   204  	gc_B k-nucleotide	16.46u 0.04s 16.51r	# *** +5%
   205  
   206  mandelbrot 16000
   207  	gcc -O2 mandelbrot.c	56.16u 0.00s 56.16r
   208  	gccgo -O2 mandelbrot.go	57.41u 0.01s 57.42r
   209  	gc mandelbrot	64.05u 0.02s 64.08r	# *** -14%
   210  	gc_B mandelbrot	64.10u 0.02s 64.14r	# *** -14%
   211  
   212  meteor 2100
   213  	# we don't know
   214  	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
   215  	gccgo -O2 meteor-contest.go	0.12u 0.00s 0.12r
   216  	gc meteor-contest	0.18u 0.00s 0.20r	# *** -25%
   217  	gc_B meteor-contest	0.17u 0.00s 0.18r	# *** -24%
   218  
   219  pidigits 10000
   220  	# bignum is slower than gmp
   221  	gcc -O2 pidigits.c -lgmp	2.57u 0.00s 2.57r
   222  	gc pidigits	71.82u 0.04s 71.89r
   223  	gc_B pidigits	71.84u 0.08s 71.98r
   224  
   225  threadring 50000000
   226  	gcc -O2 threadring.c -lpthread	30.91u 164.33s 204.57r
   227  	gccgo -O2 threadring.go	87.12u 460.04s 447.61r
   228  	gc threadring	38.55u 0.00s 38.56r	# *** +16%
   229  
   230  chameneos 6000000
   231  	gcc -O2 chameneosredux.c -lpthread	17.93u 323.65s 88.47r
   232  	gc chameneosredux	21.72u 0.00s 21.73r
   233  
   234  August 10 2009
   235  
   236  # In-place versions for some bignum operations.
   237  pidigits 10000
   238  	gcc -O2 pidigits.c -lgmp	2.56u 0.00s 2.57r
   239  	gc pidigits	55.22u 0.04s 55.29r	# *** -23%
   240  	gc_B pidigits	55.49u 0.02s 55.60r	# *** -23%
   241  
   242  September 3 2009
   243  
   244  # New 6g inlines slices, has a few other tweaks.
   245  # Complete rerun. Significant changes marked.
   246  
   247  fasta -n 25000000
   248  	# probably I/O library inefficiencies
   249  	gcc -O2 fasta.c	5.96u 0.00s 5.96r
   250  	gc fasta	10.63u 0.02s 10.66r
   251  	gc_B fasta	9.92u 0.01s 9.94r
   252  
   253  reverse-complement < output-of-fasta-25000000
   254  	# we don't know - memory cache behavior?
   255  	gcc -O2 reverse-complement.c	1.92u 0.33s 2.93r
   256  	gccgo -O2 reverse-complement.go	6.76u 0.72s 7.58r	# +5%
   257  	gc reverse-complement	6.59u 0.70s 7.29r	# +2%
   258  	gc_B reverse-complement	5.57u 0.80s 6.37r	# -10%
   259  
   260  nbody 50000000
   261  	# math.Sqrt needs to be in assembly; inlining is probably the other 50%
   262  	# also loop alignment appears to be critical
   263  	gcc -O2 nbody.c	21.28u 0.00s 21.28r
   264  	gccgo -O2 nbody.go	119.21u 0.00s 119.22r	# +2%
   265  	gc nbody	109.72u 0.00s 109.78r	# + 28% *****
   266  	gc_B nbody	85.90u 0.00s 85.91r
   267  
   268  binary-tree 15 # too slow to use 20
   269  	# memory allocation and garbage collection
   270  	gcc -O2 binary-tree.c -lm	0.86u 0.00s 0.87r
   271  	gccgo -O2 binary-tree.go	1.88u 0.54s 2.42r	# +17%
   272  	gccgo -O2 binary-tree-freelist.go	0.01u 0.01s 0.02r
   273  	gc binary-tree	8.94u 0.01s 8.96r	# -2%
   274  	gc binary-tree-freelist	0.47u 0.01s 0.48r
   275  
   276  fannkuch 12
   277  	# bounds checking is half the difference
   278  	# rest might be registerization
   279  	gcc -O2 fannkuch.c	60.12u 0.00s 60.12r
   280  	gccgo -O2 fannkuch.go	92.62u 0.00s 92.66r		# +41% ***
   281  	gc fannkuch	123.90u 0.00s 123.92r
   282  	gc_B fannkuch	89.71u 0.00s 89.74r	# -1%
   283  
   284  regex-dna 100000
   285  	# regexp code is slow on trivial regexp
   286  	gcc -O2 regex-dna.c -lpcre	0.88u 0.00s 0.88r
   287  	gc regex-dna	25.77u 0.01s 25.79r		# -5%
   288  	gc_B regex-dna	26.05u 0.02s 26.09r	# -12% ***
   289  
   290  spectral-norm 5500
   291  	# possibly inline evalA
   292  	gcc -O2 spectral-norm.c -lm	11.51u 0.00s 11.51r
   293  	gccgo -O2 spectral-norm.go	11.95u 0.00s 11.96r
   294  	gc spectral-norm	24.23u 0.00s 24.23r
   295  	gc_B spectral-norm	23.83u 0.00s 23.84r
   296  
   297  k-nucleotide 1000000
   298  	# string maps are slower than glib string maps
   299  	gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0	10.68u 0.04s 10.72r
   300  	gccgo -O2 k-nucleotide.go	23.03u 0.88s 23.92r
   301  	gc k-nucleotide	15.79u 0.05s 15.85r	# -5% (but this one seems to vary by more than that)
   302  	gc_B k-nucleotide	17.88u 0.05s 17.95r # +8% (ditto)
   303  
   304  mandelbrot 16000
   305  	gcc -O2 mandelbrot.c	56.17u 0.02s 56.20r
   306  	gccgo -O2 mandelbrot.go	56.74u 0.02s 56.79r	 # -1%
   307  	gc mandelbrot	63.31u 0.01s 63.35r	# -1%
   308  	gc_B mandelbrot	63.29u 0.00s 63.31r	# -1%
   309  
   310  meteor 2100
   311  	# we don't know
   312  	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
   313  	gccgo -O2 meteor-contest.go	0.11u 0.00s 0.12r
   314  	gc meteor-contest	0.18u 0.00s 0.19r
   315  	gc_B meteor-contest	0.17u 0.00s 0.18r
   316  
   317  pidigits 10000
   318  	# bignum is slower than gmp
   319  	gcc -O2 pidigits.c -lgmp	2.56u 0.00s 2.57r
   320  	gc pidigits	55.87u 0.03s 55.91r
   321  	gc_B pidigits	55.93u 0.03s 55.99r
   322  
   323  # these tests are compared using real time, since they run multiple processors
   324  # accuracy probably low
   325  threadring 50000000
   326  	gcc -O2 threadring.c -lpthread	26.31u 164.69s 199.92r	# -2%
   327  	gccgo -O2 threadring.go	87.90u 487.26s 472.81r	# +6%
   328  	gc threadring	28.89u 0.00s 28.90r	# -25% ***
   329  
   330  chameneos 6000000
   331  	gcc -O2 chameneosredux.c -lpthread	16.41u 296.91s 81.17r	# -8%
   332  	gc chameneosredux	19.97u 0.00s 19.97r	# -8%
   333  
   334  Sep 22, 2009
   335  
   336  # 6g inlines sliceslice in most cases.
   337  
   338  fasta -n 25000000
   339  	# probably I/O library inefficiencies
   340  	gc fasta	10.24u 0.00s 10.25r	# -4%
   341  	gc_B fasta	9.68u 0.01s 9.69r	# -3%
   342  
   343  reverse-complement < output-of-fasta-25000000
   344  	# we don't know - memory cache behavior?
   345  	gc reverse-complement	6.67u 0.69s 7.37r	# +1%
   346  	gc_B reverse-complement	6.00u 0.64s 6.65r	# +7%
   347  
   348  nbody -n 50000000
   349  	# math.Sqrt needs to be in assembly; inlining is probably the other 50%
   350  	# also loop alignment appears to be critical
   351  	gc nbody	86.27u 0.00s 86.29r	# -21%
   352  	gc_B nbody	104.52u 0.00s 104.54r	# +22%
   353  
   354  fannkuch 12
   355  	# bounds checking is half the difference
   356  	# rest might be registerization
   357  	gc fannkuch	128.36u 0.00s 128.37r	# +4%
   358  	gc_B fannkuch	89.32u 0.00s 89.34r
   359  
   360  regex-dna 100000
   361  	# regexp code is slow on trivial regexp
   362  	gc regex-dna	24.82u 0.01s 24.86r	# -4%
   363  	gc_B regex-dna	24.55u 0.01s 24.57r	# -6%
   364  
   365  spectral-norm 5500
   366  	# possibly inline evalA
   367  	gc spectral-norm	24.05u 0.00s 24.07r	# -1%
   368  	gc_B spectral-norm	23.60u 0.00s 23.65r	 # -1%
   369  
   370  k-nucleotide 1000000
   371  	# string maps are slower than glib string maps
   372  	gc k-nucleotide	17.84u 0.04s 17.89r	# +13% but mysterious variation continues
   373  	gc_B k-nucleotide	15.56u 0.08s 15.65r	# -13% (ditto)
   374  
   375  mandelbrot 16000
   376  	gc mandelbrot	64.08u 0.01s 64.11r	# +1%
   377  	gc_B mandelbrot	64.04u 0.00s 64.05r	# +1%
   378  
   379  pidigits 10000
   380  	# bignum is slower than gmp
   381  	gc pidigits	58.68u 0.02s 58.72r	# +5%
   382  	gc_B pidigits	58.86u 0.05s 58.99r	# +5%
   383  
   384  # these tests are compared using real time, since they run multiple processors
   385  # accuracy probably low
   386  threadring 50000000
   387  	gc threadring	32.70u 0.02s 32.77r	# +13%
   388  
   389  chameneos 6000000
   390  	gc chameneosredux	26.62u 0.00s 26.63r	# +13%
   391  
   392  Sep 24, 2009
   393  
   394  # Sqrt now in assembler for 6g.
   395  nbody -n 50000000
   396  	# remember, at least for 6g, alignment of loops may be important
   397  	gcc -O2 nbody.c	21.24u 0.00s 21.25r
   398  	gccgo -O2 nbody.go	121.03u 0.00s 121.04r
   399  	gc nbody	30.26u 0.00s 30.27r	# -65% ***
   400  	gc_B nbody	30.20u 0.02s 30.22r	# -72% *** 
   401  
   402  Nov 13 2009
   403  
   404  # fix bug in regexp; take performance hit.  good regexps will come in time.
   405  regex-dna 100000
   406  	gcc -O2 regex-dna.c -lpcre	0.92u 0.00s 0.94r
   407  	gc regex-dna	29.78u 0.03s 29.83r
   408  	gc_B regex-dna	32.63u 0.03s 32.74r
   409  
   410  Nov 24 2009
   411  
   412  # Roger Peppe's rewrite of the benchmark
   413  chameneos 6000000
   414  	gcc -O2 chameneosredux.c -lpthread	18.00u 303.29s 83.64r
   415  	gc chameneosredux	12.10u 0.00s 12.10r  # 2.22X faster
   416  
   417  Jan 6, 2010
   418  
   419  # Long-overdue update.  All numbers included in this complete run.
   420  # Some programs (e.g. reverse-complement) rewritten for speed.
   421  # Regular expressions much faster in common cases (although still far behind PCRE)
   422  # Bignum stuff improved
   423  # Better (but sometimes slower) locking in channels.
   424  
   425  fasta -n 25000000
   426  	gcc -O2 fasta.c	5.99u 0.01s 6.00r
   427  	gc fasta	9.11u 0.00s 9.12r	# -11%
   428  	gc_B fasta	8.60u 0.00s 8.62r	# +12% ??
   429  
   430  reverse-complement < output-of-fasta-25000000
   431  	gcc -O2 reverse-complement.c	2.00u 0.80s 9.54r
   432  #	gccgo -O2 reverse-complement.go	4.57u 0.35s 4.94r	# 33% faster
   433  	gc reverse-complement	2.01u 0.38s 2.40r	# 3.3X faster
   434  	gc_B reverse-complement	1.88u 0.36s 2.24r	# 3.2X faster
   435  GOGC=off
   436  	gc reverse-complement	2.01u 0.35s 2.37r
   437  	gc_B reverse-complement	1.86u 0.32s 2.19r
   438  
   439  nbody -n 50000000
   440  	gcc -O2 nbody.c	21.28u 0.00s 21.31r
   441  	gccgo -O2 nbody.go	80.02u 0.00s 80.05r	# 33% faster
   442  	gc nbody	30.13u 0.00s 30.13r
   443  	gc_B nbody	29.89u 0.01s 29.91r
   444  
   445  binary-tree 15 # too slow to use 20
   446  	gcc -O2 binary-tree.c -lm	0.86u 0.00s 0.87r
   447  	gccgo -O2 binary-tree.go	4.82u 0.41s 5.24r	# 2.5X slower
   448  	gc binary-tree	7.23u 0.01s 7.25r	# # -19%
   449  	gc binary-tree-freelist	0.43u 0.00s 0.44r	# -9%
   450  
   451  fannkuch 12
   452  	gcc -O2 fannkuch.c	60.17u 0.00s 60.17r
   453  	gccgo -O2 fannkuch.go	78.47u 0.01s 78.49r
   454  	gc fannkuch	128.86u 0.00s 128.96r
   455  	gc_B fannkuch	90.17u 0.00s 90.21r
   456  
   457  regex-dna 100000
   458  	gcc -O2 regex-dna.c -lpcre	0.90u 0.00s 0.92r
   459  	gc regex-dna	9.48u 0.01s 9.50r	# 3.1X faster
   460  	gc_B regex-dna	9.08u 0.00s 9.10r	# 3.6X faster
   461  
   462  spectral-norm 5500
   463  	gcc -O2 spectral-norm.c -lm	11.48u 0.00s 11.48r
   464  	gccgo -O2 spectral-norm.go	11.68u 0.00s 11.70r
   465  	gc spectral-norm	23.98u 0.00s 23.99r
   466  	gc_B spectral-norm	23.68u 0.00s 23.69r
   467  
   468  k-nucleotide 1000000
   469  	gcc -O2 k-nucleotide.c	10.85u 0.04s 10.90r
   470  	gccgo -O2 k-nucleotide.go	25.26u 0.87s 26.14r
   471  	gc k-nucleotide	15.28u 0.06s 15.37r	# restored; mysterious variation continues
   472  	gc_B k-nucleotide	15.97u 0.03s 16.00r
   473  
   474  mandelbrot 16000
   475  	gcc -O2 mandelbrot.c	56.12u 0.01s 56.15r
   476  	gccgo -O2 mandelbrot.go	56.86u 0.01s 56.89r
   477  	gc mandelbrot	66.05u 0.00s 66.07r	# -3%
   478  	gc_B mandelbrot	66.06u 0.00s 66.07r	# -3%
   479  
   480  meteor 2100
   481  	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
   482  	gccgo -O2 meteor-contest.go	0.12u 0.00s 0.12r
   483  	gc meteor-contest	0.17u 0.00s 0.17r
   484  	gc_B meteor-contest	0.15u 0.00s 0.16r
   485  
   486  pidigits 10000
   487  	gcc -O2 pidigits.c -lgmp	2.57u 0.00s 2.59r
   488  	gc pidigits	38.27u 0.02s 38.30r	# 1.5X faster
   489  	gc_B pidigits	38.27u 0.02s 38.31r	# 1.5X faster
   490  
   491  threadring 50000000
   492  	gcc -O2 threadring.c	37.11u 170.59s 212.75r
   493  	gccgo -O2 threadring.go	89.67u 447.56s 442.55r	# -6.5%
   494  	gc threadring	36.08u 0.04s 36.15r	# +10%
   495  
   496  chameneos 6000000
   497  	gcc -O2 chameneosredux.c -lpthread	19.02u 331.08s 90.79r
   498  	gc chameneosredux	12.54u 0.00s 12.55r
   499  
   500  Oct 19, 2010
   501  
   502  # Another long-overdue update. Some of the code is new; parallel versions
   503  # of some are added.  A few significant improvements.
   504  
   505  fasta -n 25000000
   506  	gcc -O2 fasta.c	4.92u 0.00s 4.93r
   507  	gccgo -O2 fasta.go	3.31u 0.00s 3.34r  # new code
   508  	gc fasta	3.68u 0.00s 3.69r  # 2.5X faster with no code
   509  	gc_B fasta	3.68u 0.00s 3.69r  # 2.3X faster with no code
   510  
   511  reverse-complement < output-of-fasta-25000000
   512  	gcc -O2 reverse-complement.c	1.93u 0.81s 11.24r
   513  	gccgo -O2 reverse-complement.go	1.58u 0.43s 2.04r  # first run with new code?
   514  	gc reverse-complement	1.84u 0.34s 2.20r  # 10% faster
   515  	gc_B reverse-complement	1.85u 0.32s 2.18r
   516  
   517  nbody -n 50000000
   518  	gcc -O2 nbody.c	21.35u 0.00s 21.36r
   519  	gccgo -O2 nbody.go	21.62u 0.00s 21.66r  # 3.7X faster - why??
   520  	gc nbody	29.78u 0.00s 29.79r
   521  	gc_B nbody	29.72u 0.00s 29.72r
   522  
   523  binary-tree 15 # too slow to use 20
   524  	gcc -O2 binary-tree.c -lm	0.86u 0.00s 0.88r
   525  	gccgo -O2 binary-tree.go	4.05u 0.02s 4.08r  # 28% faster
   526  	gccgo -O2 binary-tree-freelist	0.34u 0.08s 0.34r
   527  	gc binary-tree	5.94u 0.00s 5.95r  # 20% faster
   528  	gc binary-tree-freelist	0.50u 0.01s 0.54r
   529  
   530  fannkuch 12
   531  	gcc -O2 fannkuch.c	60.45u 0.00s 60.45r
   532  	gccgo -O2 fannkuch.go	64.64u 0.00s 64.64r
   533  	gccgo -O2 fannkuch-parallel.go	115.63u 0.00s 31.58r
   534  	gc fannkuch	126.52u 0.04s 126.68r
   535  	gc fannkuch-parallel	238.82u 0.10s 65.93r  # GOMAXPROCS=4
   536  	gc_B fannkuch	88.99u 0.00s 89.02r
   537  
   538  regex-dna 100000
   539  	gcc -O2 regex-dna.c -lpcre	0.89u 0.00s 0.89r
   540  	gc regex-dna	8.99u 0.02s 9.03r
   541  	gc regex-dna-parallel	8.94u 0.02s 3.68r  # GOMAXPROCS=4
   542  	gc_B regex-dna	9.12u 0.00s 9.14r
   543  
   544  spectral-norm 5500
   545  	gcc -O2 spectral-norm.c -lm	11.55u 0.00s 11.57r
   546  	gccgo -O2 spectral-norm.go	11.73u 0.00s 11.75r
   547  	gc spectral-norm	23.74u 0.00s 23.79r
   548  	gc_B spectral-norm	24.49u 0.02s 24.54r
   549  
   550  k-nucleotide 1000000
   551  	gcc -O2 k-nucleotide.c	11.44u 0.06s 11.50r
   552  	gccgo -O2 k-nucleotide.go	8.65u 0.04s 8.71r
   553  	gccgo -O2 k-nucleotide-parallel.go	8.75u 0.03s 2.97r # set GOMAXPROCS=4
   554  	gc k-nucleotide	14.92u 0.05s 15.01r
   555  	gc k-nucleotide-parallel	16.96u 0.06s 6.53r  # set GOMAXPROCS=4
   556  	gc_B k-nucleotide	15.97u 0.03s 16.08r
   557  
   558  mandelbrot 16000
   559  	gcc -O2 mandelbrot.c	56.32u 0.00s 56.35r
   560  	gccgo -O2 mandelbrot.go	55.62u 0.02s 55.77r
   561  	gc mandelbrot	64.85u 0.01s 64.94r
   562  	gc_B mandelbrot	65.02u 0.01s 65.14r
   563  
   564  meteor 2100
   565  	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
   566  	gccgo -O2 meteor-contest.go	0.10u 0.00s 0.11r
   567  	gc meteor-contest	0.17u 0.00s 0.18r
   568  	gc_B meteor-contest	0.16u 0.00s 0.16r
   569  
   570  pidigits 10000
   571  	gcc -O2 pidigits.c -lgmp	2.58u 0.00s 2.59r
   572  	gccgo -O2 pidigits.go	14.06u 0.01s 14.09r # first run?
   573  	gc pidigits	8.47u 0.05s 8.55r # 4.5X faster due to package big
   574  	gc_B pidigits	8.33u 0.01s 8.36r # 4.5X faster due to package big
   575  
   576  threadring 50000000
   577  	gcc -O2 threadring.c	28.18u 153.19s 186.47r
   578  	gccgo -O2 threadring.go	110.10u 516.48s 515.25r
   579  	gc threadring	40.39u 0.00s 40.40r
   580  
   581  chameneos 6000000
   582  	gcc -O2 chameneosredux.c -lpthread	18.20u 301.55s 83.10r
   583  	gccgo -O2 chameneosredux.go	52.22u 324.54s 201.21r
   584  	gc chameneosredux	13.52u 0.00s 13.54r
   585  
   586  Dec 14, 2010
   587  
   588  # Improved regex code (same algorithm) gets ~30%.
   589  
   590  regex-dna 100000
   591  	gcc -O2 regex-dna.c -lpcre	0.77u 0.01s 0.78r
   592  	gc regex-dna	6.80u 0.00s 6.81r
   593  	gc regex-dna-parallel	6.82u 0.01s 2.75r
   594  	gc_B regex-dna	6.69u 0.02s 6.70r
   595  
   596  Feb 15, 2011
   597  
   598  # Improved GC, still single-threaded but more efficient
   599  
   600  fasta -n 25000000
   601  	gcc -O2 fasta.c	3.40u 0.00s 3.40r
   602  	gccgo -O2 fasta.go	3.51u 0.00s 3.50r
   603  	gc fasta	3.66u 0.01s 3.66r
   604  	gc_B fasta	3.66u 0.00s 3.66r
   605  
   606  reverse-complement < output-of-fasta-25000000
   607  	gcc -O2 reverse-complement.c	1.86u 1.29s 4.93r
   608  	gccgo -O2 reverse-complement.go	2.18u 0.41s 2.60r
   609  	gc reverse-complement	1.67u 0.48s 2.15r
   610  	gc_B reverse-complement	1.71u 0.45s 2.15r
   611  
   612  nbody -n 50000000
   613  	gcc -O2 -lm nbody.c	21.64u 0.00s 21.64r
   614  	gccgo -O2 nbody.go	21.46u 0.00s 21.45r
   615  	gc nbody	29.07u 0.00s 29.06r
   616  	gc_B nbody	31.61u 0.00s 31.61r
   617  
   618  binary-tree 15 # too slow to use 20
   619  	gcc -O2 binary-tree.c -lm	0.88u 0.00s 0.87r
   620  	gccgo -O2 binary-tree.go	2.74u 0.07s 2.81r
   621  	gccgo -O2 binary-tree-freelist.go	0.01u 0.00s 0.00r
   622  	gc binary-tree	4.22u 0.02s 4.24r
   623  	gc binary-tree-freelist	0.54u 0.02s 0.55r
   624  
   625  fannkuch 12
   626  	gcc -O2 fannkuch.c	57.64u 0.00s 57.64r
   627  	gccgo -O2 fannkuch.go	65.79u 0.00s 65.82r
   628  	gccgo -O2 fannkuch-parallel.go	160.91u 0.02s 43.90r
   629  	gc fannkuch	126.36u 0.03s 126.53r
   630  	gc fannkuch-parallel	175.23u 0.04s 45.49r
   631  	gc_B fannkuch	89.23u 0.00s 89.24r
   632  
   633  regex-dna 100000
   634  	gcc -O2 regex-dna.c -lpcre	0.77u 0.01s 0.80r
   635  	gccgo -O2 regex-dna.go	12.38u 0.10s 12.52r
   636  	gccgo -O2 regex-dna-parallel.go	43.96u 4.64s 15.11r
   637  	gc regex-dna	7.03u 0.01s 7.05r
   638  	gc regex-dna-parallel	6.85u 0.05s 2.70r
   639  	gc_B regex-dna	6.87u 0.02s 6.89r
   640  
   641  spectral-norm 5500
   642  	gcc -O2 spectral-norm.c -lm	12.29u 0.00s 12.28r
   643  	gccgo -O2 spectral-norm.go	11.79u 0.00s 11.79r
   644  	gc spectral-norm	24.00u 0.02s 24.05r
   645  	gc_B spectral-norm	24.59u 0.01s 24.59r
   646  
   647  k-nucleotide 1000000
   648  	gcc -O2 k-nucleotide.c	9.75u 0.07s 9.82r
   649  	gccgo -O2 k-nucleotide.go	8.92u 0.06s 8.98r
   650  	gccgo -O2 k-nucleotide-parallel.go	8.40u 0.04s 2.76r
   651  	gc k-nucleotide	17.01u 0.03s 17.04r
   652  	gc k-nucleotide-parallel	16.51u 0.08s 6.21r
   653  	gc_B k-nucleotide	16.94u 0.08s 17.02r
   654  
   655  mandelbrot 16000
   656  	gcc -O2 mandelbrot.c	54.60u 0.00s 54.66r
   657  	gccgo -O2 mandelbrot.go	59.38u 0.00s 59.41r
   658  	gc mandelbrot	64.93u 0.04s 65.08r
   659  	gc_B mandelbrot	64.85u 0.03s 64.92r
   660  
   661  meteor 2098
   662  	gcc -O2 meteor-contest.c	0.10u 0.01s 0.10r
   663  	gccgo -O2 meteor-contest.go	0.11u 0.00s 0.11r
   664  	gc meteor-contest	0.18u 0.00s 0.17r
   665  	gc_B meteor-contest	0.17u 0.00s 0.16r
   666  
   667  pidigits 10000
   668  	gcc -O2 pidigits.c -lgmp	2.24u 0.00s 2.23r
   669  	gccgo -O2 pidigits.go	14.05u 0.00s 14.06r
   670  	gc pidigits	6.34u 0.05s 6.38r
   671  	gc_B pidigits	6.37u 0.02s 6.38r
   672  
   673  threadring 50000000
   674  	gcc -O2 threadring.c	30.50u 258.05s 325.72r
   675  	gccgo -O2 threadring.go	92.87u 748.39s 728.46r
   676  	gc threadring	38.03u 0.01s 38.04r
   677  
   678  # Apr 15, 2011
   679  # Move to new machine, Intel Xeon E5520@2.27GHz.
   680  # (Was Opteron(tm) Processor 8214 HE)
   681  
   682  fasta -n 25000000
   683  OLD:
   684  	gcc -O2 fasta.c	3.39u 0.04s 3.42r
   685  	gccgo -O2 fasta.go	3.52u 0.00s 3.52r
   686  	gc fasta	3.63u 0.04s 3.67r
   687  	gc_B fasta	3.66u 0.00s 3.66r
   688  NEW:
   689  	gcc -O2 fasta.c	1.45u 0.02s 1.47r
   690  	gccgo -O2 fasta.go	1.51u 0.01s 1.51r
   691  	gc fasta	2.04u 0.00s 2.04r
   692  	gc_B fasta	2.05u 0.00s 2.04r
   693  
   694  reverse-complement < output-of-fasta-25000000
   695  OLD:
   696  	gcc -O2 reverse-complement.c	1.87u 1.51s 7.02r
   697  	gccgo -O2 reverse-complement.go	1.56u 0.54s 3.37r
   698  	gc reverse-complement	1.73u 0.36s 2.08r
   699  	gc_B reverse-complement	1.75u 0.37s 2.12r
   700  NEW:
   701  	gcc -O2 reverse-complement.c	1.20u 0.47s 12.96r
   702  	gccgo -O2 reverse-complement.go	0.88u 0.14s 1.01r
   703  	gc reverse-complement	1.13u 0.17s 1.30r
   704  	gc_B reverse-complement	1.11u 0.09s 1.20r
   705  
   706  nbody -n 50000000
   707  OLD:
   708  	gcc -O2 -lm nbody.c	21.90u 0.00s 21.92r
   709  	gccgo -O2 nbody.go	23.12u 0.03s 23.19r
   710  	gc nbody	29.07u 0.00s 29.07r
   711  	gc_B nbody	31.84u 0.00s 31.85r
   712  NEW:
   713  	gcc -O2 -lm nbody.c	13.01u 0.00s 13.03r
   714  	gccgo -O2 nbody.go	13.35u 0.00s 13.37r
   715  	gc nbody	21.78u 0.00s 21.82r
   716  	gc_B nbody	21.72u 0.00s 21.76r
   717  
   718  binary-tree 15 # too slow to use 20
   719  OLD:
   720  	gcc -O2 binary-tree.c -lm	0.83u 0.02s 0.84r
   721  	gccgo -O2 binary-tree.go	2.61u 0.02s 2.62r
   722  	gccgo -O2 binary-tree-freelist.go	0.32u 0.01s 0.32r
   723  	gc binary-tree	3.93u 0.04s 3.97r
   724  	gc binary-tree-freelist	0.47u 0.03s 0.50r
   725  NEW:
   726  	gcc -O2 binary-tree.c -lm	0.60u 0.00s 0.59r
   727  	gccgo -O2 binary-tree.go	1.53u 0.00s 1.52r
   728  	gccgo -O2 binary-tree-freelist.go	0.01u 0.00s 0.00r
   729  	gc binary-tree	1.93u 0.02s 1.95r
   730  	gc binary-tree-freelist	0.32u 0.01s 0.32r
   731  
   732  fannkuch 12
   733  OLD:
   734  	gcc -O2 fannkuch.c	57.64u 0.00s 57.64r
   735  	gccgo -O2 fannkuch.go	65.56u 0.01s 65.65r
   736  	gccgo -O2 fannkuch-parallel.go	179.12u 0.00s 49.82r
   737  	gc fannkuch	126.39u 0.00s 126.39r
   738  	gc fannkuch-parallel	172.49u 0.02s 45.44r
   739  	gc_B fannkuch	89.30u 0.00s 89.28r
   740  NEW:
   741  	gcc -O2 fannkuch.c	45.17u 0.00s 45.26r
   742  	gccgo -O2 fannkuch.go	53.63u 0.00s 53.73r
   743  	gccgo -O2 fannkuch-parallel.go	216.72u 0.00s 58.42r
   744  	gc fannkuch	108.21u 0.00s 108.44r
   745  	gc fannkuch-parallel	227.20u 0.00s 57.27r
   746  	gc_B fannkuch	56.14u 0.00s 56.26r
   747  
   748  regex-dna 100000
   749  OLD:
   750  	gcc -O2 regex-dna.c -lpcre	0.77u 0.01s 0.78r
   751  	gccgo -O2 regex-dna.go	10.15u 0.02s 10.23r
   752  	gccgo -O2 regex-dna-parallel.go	33.81u 3.22s 11.62r
   753  	gc regex-dna	6.52u 0.04s 6.56r
   754  	gc regex-dna-parallel	6.84u 0.03s 2.70r
   755  	gc_B regex-dna	6.83u 0.01s 6.84r
   756  NEW:
   757  	gcc -O2 regex-dna.c -lpcre	0.47u 0.00s 0.47r
   758  	gccgo -O2 regex-dna.go	6.00u 0.00s 6.00r
   759  	gccgo -O2 regex-dna-parallel.go	44.54u 1.57s 6.51r
   760  	gc regex-dna	5.41u 0.01s 5.42r
   761  	gc regex-dna-parallel	5.62u 0.01s 2.20r
   762  	gc_B regex-dna	5.50u 0.00s 5.50r
   763  
   764  spectral-norm 5500
   765  OLD:
   766  	gcc -O2 spectral-norm.c -lm	12.29u 0.00s 12.28r
   767  	gccgo -O2 spectral-norm.go	11.56u 0.00s 11.55r
   768  	gc spectral-norm	23.98u 0.00s 24.00r
   769  	gc_B spectral-norm	24.62u 0.00s 24.65r
   770  NEW:
   771  	gcc -O2 spectral-norm.c -lm	15.79u 0.00s 15.82r
   772  	gccgo -O2 spectral-norm.go	15.32u 0.00s 15.35r
   773  	gc spectral-norm	19.62u 0.01s 19.67r
   774  	gc_B spectral-norm	19.62u 0.00s 19.66r
   775  
   776  k-nucleotide 1000000
   777  OLD:
   778  	gcc -O2 k-nucleotide.c	9.82u 0.06s 9.87r
   779  	gccgo -O2 k-nucleotide.go	8.30u 0.02s 8.32r
   780  	gccgo -O2 k-nucleotide-parallel.go	8.84u 0.05s 3.02r
   781  	gc k-nucleotide	15.38u 0.07s 15.44r
   782  	gc k-nucleotide-parallel	16.40u 0.03s 5.93r
   783  	gc_B k-nucleotide	15.19u 0.05s 15.23r
   784  NEW:
   785  	gcc -O2 -k-nucleotide.c	4.88u 0.03s 4.92r
   786  	gccgo -O2 k-nucleotide.go	5.94u 0.01s 5.96r
   787  	gccgo -O2 k-nucleotide-parallel.go	6.44u 0.03s 1.47r
   788  	gc k-nucleotide	9.61u 0.01s 9.63r
   789  	gc k-nucleotide-parallel	9.70u 0.00s 3.39r
   790  	gc_B k-nucleotide	9.19u 0.03s 9.23r
   791  
   792  mandelbrot 16000
   793  OLD:
   794  	gcc -O2 mandelbrot.c	54.54u 0.00s 54.56r
   795  	gccgo -O2 mandelbrot.go	59.63u 0.03s 59.67r
   796  	gc mandelbrot	64.82u 0.00s 64.83r
   797  	gc_B mandelbrot	64.84u 0.00s 64.91r
   798  NEW:
   799  	gcc -O2 mandelbrot.c	36.07u 0.01s 36.15r
   800  	gccgo -O2 mandelbrot.go	43.57u 0.00s 43.66r
   801  	gc mandelbrot	60.66u 0.00s 60.79r
   802  	gc_B mandelbrot	60.90u 0.00s 61.03r
   803  
   804  meteor 2098
   805  OLD:
   806  	gcc -O2 meteor-contest.c	0.11u 0.00s 0.10r
   807  	gccgo -O2 meteor-contest.go	0.10u 0.01s 0.10r
   808  	gc meteor-contest	0.18u 0.00s 0.17r
   809  	gc_B meteor-contest	0.17u 0.00s 0.16r
   810  NEW:
   811  	gcc -O2 meteor-contest.c	0.10u 0.00s 0.09r
   812  	gccgo -O2 meteor-contest.go	0.10u 0.00s 0.09r
   813  	gc meteor-contest	0.14u 0.00s 0.14r
   814  	gc_B meteor-contest	0.13u 0.00s 0.13r
   815  
   816  pidigits 10000
   817  OLD:
   818  	gcc -O2 pidigits.c -lgmp	2.22u 0.00s 2.21r
   819  	gccgo -O2 pidigits.go	13.39u 0.00s 13.40r
   820  	gc pidigits	6.42u 0.04s 6.45r
   821  	gc_B pidigits	6.45u 0.02s 6.47r
   822  NEW:
   823  	gcc -O2 pidigits.c -lgmp	2.27u 0.00s 2.29r
   824  	gccgo -O2 pidigits.go	9.21u 0.00s 9.22r
   825  	gc pidigits	3.60u 0.00s 3.60r
   826  	gc_B pidigits	3.56u 0.02s 3.58r
   827  
   828  threadring 50000000
   829  OLD:
   830  	gcc -O2 threadring.c -lpthread	34.51u 267.95s 336.12r
   831  	gccgo -O2 threadring.go	103.51u 588.57s 627.16r
   832  	gc threadring	54.68u 0.00s 54.73r
   833  NEW:
   834  	gcc -O2 threadring.c 32.00u 259.39s 369.74r
   835  	gccgo -O2 threadring.go	133.06u 546.02s 595.33r
   836  	gc threadring	16.75u 0.02s 16.80r
   837  
   838  chameneos 6000000
   839  OLD:
   840  	gcc -O2 chameneosredux.c -lpthread	12.65u 31.02s 13.33r
   841  	gccgo -O2 chameneosredux.go	47.04u 302.84s 252.29r
   842  	gc chameneosredux	14.14u 0.00s 14.14r
   843  NEW:
   844  	gcc -O2 chameneosredux.c -lpthread	8.05u 63.43s 11.16r
   845  	gccgo -O2 chameneosredux.go	82.95u 304.37s 207.64r
   846  	gc chameneosredux	9.42u 0.00s 9.43r
   847  
   848  # May 13, 2011
   849  # after gc update to inline append when possible - 35% faster
   850  
   851  regex-dna 100000
   852  	gc regex-dna	3.94u 0.00s 3.95r
   853  	gc regex-dna-parallel	4.15u 0.01s 1.63r
   854  	gc_B regex-dna	4.01u 0.01s 4.02r
   855  
   856  # Aug 4, 2011
   857  # After various updates to locking code and some runtime changes.
   858  # Slowdowns believed due to slower (but more correct) memmove.
   859  
   860  fannkuch 12
   861  	gccgo -O2 fannkuch.go	51.59u 0.00s 51.69r # -4%
   862  	gccgo -O2 fannkuch-parallel.go	253.17u 0.00s 64.67r # -11%
   863  	gc fannkuch	103.14u 0.00s 103.36r # -5%
   864  	gc fannkuch-parallel	189.63u 0.00s 49.37r # +9%
   865  	gc_B fannkuch	49.19u 0.00s 49.29r # -14%
   866  
   867  regex-dna 100000
   868  	gc regex-dna	3.78u 0.00s 3.78r # -43%
   869  	gc regex-dna-parallel	3.84u 0.02s 1.48r # -49%
   870  	gc_B regex-dna	3.62u 0.00s 3.63r # -52%
   871  
   872  k-nucleotide 1000000
   873  	gc k-nucleotide	12.23u 0.02s 12.27r # +27%
   874  	gc k-nucleotide-parallel	12.76u 0.02s 4.37r # +29%
   875  	gc_B k-nucleotide	12.18u 0.01s 12.21r # +33%
   876  
   877  threadring 50000000
   878  	gc threadring	17.49u 0.00s 17.53r # +4%
   879  
   880  chameneos 6000000
   881  	gc chameneosredux	7.61u 0.00s 7.63r # -24%
   882  
   883  Aug 9, 2011
   884  # After custom algorithms for 1- 2- 4- 8-byte scalars.
   885  
   886  fannkuch 12
   887  	gc fannkuch-parallel	157.17u 0.00s 41.08r # -17%
   888  
   889  k-nucleotide 1000000
   890  	gc k-nucleotide	8.72u 0.03s 8.76r # -39%
   891  	gc k-nucleotide-parallel	8.79u 0.01s 3.14r # -39%
   892  	gc_B k-nucleotide	8.65u 0.03s 8.69r # -39%
   893  
   894  pidigits 10000
   895  	gc pidigits	3.71u 0.02s 3.73r # +4%
   896  	gc_B pidigits	3.73u 0.00s 3.73r # +4%
   897  
   898  threadring 50000000
   899  	gc threadring	14.51u 0.00s 14.54r # -17%
   900  
   901  chameneos 6000000
   902  	gc chameneosredux	7.41u 0.00s 7.42r # -3%
   903  
   904  # A complete run at the Go 1 release.
   905  # Significant changes:
   906  # - gccgo is now enabled for all tests (goroutines are cheap enough)
   907  # - threadring and chameneos are 14% faster, probably due to runtime changes
   908  # - regex-dna 36% faster
   909  # - fannkuch-parallel (only) slowed down 40%
   910  # - gccgo on binary-tree-freelist is still optimized to nothing
   911  # Other changes are modest.
   912  
   913  fasta -n 25000000
   914  	gcc -O2 fasta.c	1.45u 0.02s 1.48r
   915  	gccgo -O2 fasta.go	1.46u 0.00s 1.47r
   916  	gc fasta	1.99u 0.01s 2.00r
   917  	gc_B fasta	1.99u 0.01s 2.01r
   918  
   919  reverse-complement < output-of-fasta-25000000
   920  	gcc -O2 reverse-complement.c	0.95u 0.48s 4.99r
   921  	gccgo -O2 reverse-complement.go	0.93u 0.16s 1.09r
   922  	gc reverse-complement	1.20u 0.19s 1.39r
   923  	gc_B reverse-complement	1.04u 0.16s 1.20r
   924  
   925  nbody -n 50000000
   926  	gcc -O2 -lm nbody.c	13.02u 0.00s 13.05r
   927  	gccgo -O2 nbody.go	14.46u 0.00s 14.49r
   928  	gc nbody	21.79u 0.00s 21.84r
   929  	gc_B nbody	21.74u 0.00s 21.79r
   930  
   931  binary-tree 15 # too slow to use 20
   932  	gcc -O2 binary-tree.c -lm	0.60u 0.01s 0.61r
   933  	gccgo -O2 binary-tree.go	1.30u 0.01s 1.32r
   934  	gccgo -O2 binary-tree-freelist.go	0.00u 0.00s 0.00r
   935  	gc binary-tree	1.84u 0.01s 1.86r
   936  	gc binary-tree-freelist	0.33u 0.00s 0.33r
   937  
   938  fannkuch 12
   939  	gcc -O2 fannkuch.c	45.24u 0.00s 45.34r
   940  	gccgo -O2 fannkuch.go	59.76u 0.01s 59.90r
   941  	gccgo -O2 fannkuch-parallel.go	218.20u 0.01s 61.60r
   942  	gc fannkuch	103.92u 0.00s 104.16r
   943  	gc fannkuch-parallel	221.61u 0.00s 60.49r
   944  	gc_B fannkuch	53.17u 0.00s 53.30r
   945  
   946  regex-dna 100000
   947  	gcc -O2 regex-dna.c -lpcre	0.47u 0.00s 0.48r
   948  	gccgo -O2 regex-dna.go	6.52u 0.00s 6.54r
   949  	gccgo -O2 regex-dna-parallel.go	14.40u 0.73s 4.35r
   950  	gc regex-dna	2.63u 0.02s 2.66r # -36%
   951  	gc regex-dna-parallel	2.87u 0.01s 1.11r
   952  	gc_B regex-dna	2.65u 0.00s 2.66r
   953  
   954  spectral-norm 5500
   955  	gcc -O2 spectral-norm.c -lm	15.78u 0.00s 15.82r
   956  	gccgo -O2 spectral-norm.go	15.79u 0.00s 15.83r
   957  	gc spectral-norm	19.76u 0.00s 19.80r
   958  	gc_B spectral-norm	19.73u 0.01s 19.78r
   959  
   960  k-nucleotide 1000000
   961  	gcc -O2  k-nucleotide.c	5.59u 0.03s 5.63r
   962  	gccgo -O2 k-nucleotide.go	4.09u 0.03s 4.13r
   963  	gccgo -O2 k-nucleotide-parallel.go	4.50u 0.06s 1.63r
   964  	gc k-nucleotide	9.23u 0.02s 9.27r
   965  	gc k-nucleotide-parallel	9.87u 0.03s 3.55r
   966  	gc_B k-nucleotide	9.20u 0.00s 9.22r
   967  
   968  mandelbrot 16000
   969  	gcc -O2 mandelbrot.c	36.09u 0.00s 36.18r
   970  	gccgo -O2 mandelbrot.go	41.69u 0.01s 41.80r
   971  	gc mandelbrot	60.91u 0.02s 61.07r
   972  	gc_B mandelbrot	60.90u 0.00s 61.04r
   973  
   974  meteor 2098
   975  	gcc -O2 meteor-contest.c	0.09u 0.00s 0.09r
   976  	gccgo -O2 meteor-contest.go	0.09u 0.00s 0.09r
   977  	gc meteor-contest	0.14u 0.00s 0.15r
   978  	gc_B meteor-contest	0.14u 0.00s 0.14r
   979  
   980  pidigits 10000
   981  	gcc -O2 pidigits.c -lgmp	2.27u 0.00s 2.27r
   982  	gccgo -O2 pidigits.go	8.65u 0.00s 8.67r
   983  	gc pidigits	3.70u 0.04s 3.75r
   984  	gc_B pidigits	3.72u 0.02s 3.75r
   985  
   986  threadring 50000000
   987  	gcc -O2 threadring.c	40.91u 369.85s 323.31r
   988  	gccgo -O2 threadring.go	26.97u 30.82s 57.93r
   989  	gc threadring	12.81u 0.01s 12.85r # -13%
   990  
   991  chameneos 6000000
   992  	gcc -O2 chameneosredux.c -lpthread	9.44u 72.90s 12.65r
   993  	gccgo -O2 chameneosredux.go	7.73u 7.53s 15.30r
   994  	gc chameneosredux	6.51u 0.00s 6.53r # - 14%
   995  
   996  # After http://codereview.appspot.com/6248049, moving panicindex
   997  # calls out of line (putting the likely code into a single path and shortening
   998  # loops). Significant changes since the last run (note: some are slower for
   999  # unrelated and as yet undiagnosed reasons):
  1000  
  1001  nbody -n 50000000
  1002  	gc nbody	19.10u 0.01s 19.19r # -12%
  1003  	gc_B nbody	19.19u 0.00s 19.23r # -12%
  1004  
  1005  binary-tree 15 # too slow to use 20
  1006  	gc binary-tree	1.49u 0.01s 1.51r # -19%
  1007  	
  1008  fannkuch 12
  1009  	gc fannkuch	60.79u 0.00s 60.92r # -41%
  1010  	gc fannkuch-parallel	183.51u 0.01s 51.75r # -14%
  1011  	gc_B fannkuch	51.68u 0.00s 51.79r # -3%
  1012  
  1013  k-nucleotide 1000000
  1014  	gc k-nucleotide	9.74u 0.04s 9.80r # +6%
  1015  	gc k-nucleotide-parallel	9.89u 0.05s 3.59r # +1%
  1016  	gc_B k-nucleotide	9.39u 0.02s 9.43r # +2%
  1017  
  1018  mandelbrot (much slower, due to unrelated http://codereview.appspot.com/6209077)
  1019  	gc mandelbrot	100.98u 0.00s 101.20r # +65%
  1020  	gc_B mandelbrot	100.90u 0.01s 101.17r # +65%
  1021  
  1022  meteor 2098
  1023  	gc meteor-contest	0.13u 0.00s 0.13r # -13%
  1024  	gc_B meteor-contest	0.13u 0.00s 0.13r # -7%
  1025  
  1026  # May 30, 2012.
  1027  # After http://codereview.appspot.com/6261051, restoring old code generated
  1028  # for floating-point constants. Mandelbrot is back to its previous numbers.
  1029  
  1030  mandelbrot 16000
  1031  	gcc -O2 mandelbrot.c	36.07u 0.00s 36.16r
  1032  	gccgo -O2 mandelbrot.go	41.72u 0.01s 41.90r
  1033  	gc mandelbrot	60.62u 0.00s 60.76r
  1034  	gc_B mandelbrot	60.68u 0.00s 60.82r
  1035  
  1036  # May 30, 2012.
  1037  # After http://codereview.appspot.com/6248068, better FP code
  1038  # by avoiding MOVSD between registers.
  1039  # Plus some other timing changes that have crept in from other speedups,
  1040  # from garbage collection to Printf.
  1041  
  1042  fasta -n 25000000
  1043  	gc fasta	1.76u 0.00s 1.76r # -12%
  1044  	gc_B fasta	1.71u 0.00s 1.72r # -12%
  1045  
  1046  nbody -n 50000000
  1047  	gc nbody	17.56u 0.00s 17.60r # -8%
  1048  	gc_B nbody	17.30u 0.00s 17.34r # -10%
  1049  
  1050  fannkuch 12
  1051  	gc fannkuch-parallel	155.92u 0.01s 44.05r # -15%
  1052  
  1053  k-nucleotide 1000000
  1054  	gc k-nucleotide	9.22u 0.01s 9.26r # -5%
  1055  	gc k-nucleotide-parallel	9.23u 0.03s 3.26r # -9%
  1056  	gc_B k-nucleotide	9.22u 0.03s 9.28r # -2%
  1057  
  1058  mandelbrot 16000
  1059  	gc mandelbrot	44.80u 0.00s 44.90r # -27%
  1060  	gc_B mandelbrot	44.81u 0.00s 44.92r # -26%
  1061  
  1062  pidigits 10000
  1063  	gc pidigits	3.51u 0.00s 3.52r # -6%
  1064  	gc_B pidigits	3.51u 0.00s 3.52r # -6%
  1065  
  1066  # Aug 28, 2012
  1067  # After some assembler work in package big.
  1068  pidigits 10000
  1069  	gc pidigits	2.85u 0.02s 2.88r # -22%
  1070  	gc_B pidigits	2.88u 0.01s 2.90r # -21%
  1071  
  1072  # Sep 26, 2012
  1073  # 64-bit ints, plus significantly better floating-point code.
  1074  # Interesting details:
  1075  # 	Generally something in the 0-10% slower range, some (binary tree) more
  1076  #	Floating-point noticeably faster:
  1077  #		nbody -25%
  1078  #		mandelbrot -37% relative to Go 1.
  1079  #	Other:
  1080  #		regex-dna +47%
  1081  fasta -n 25000000
  1082  	gcc -O2 fasta.c	1.43u 0.03s 1.46r
  1083  	gccgo -O2 fasta.go	1.47u 0.00s 1.47r
  1084  	gc fasta	1.78u 0.01s 1.80r
  1085  	gc_B fasta	1.76u 0.00s 1.76r
  1086  
  1087  reverse-complement < output-of-fasta-25000000
  1088  	gcc -O2 reverse-complement.c	1.14u 0.39s 11.19r
  1089  	gccgo -O2 reverse-complement.go	0.91u 0.17s 1.09r
  1090  	gc reverse-complement	1.12u 0.18s 1.31r
  1091  	gc_B reverse-complement	1.12u 0.15s 1.28r
  1092  
  1093  nbody -n 50000000
  1094  	gcc -O2 nbody.c -lm	13.02u 0.00s 13.05r
  1095  	gccgo -O2 nbody.go	13.90u 0.00s 13.93r
  1096  	gc nbody	17.05u 0.00s 17.09r
  1097  	gc_B nbody	16.30u 0.00s 16.34r
  1098  
  1099  binary-tree 15 # too slow to use 20
  1100  	gcc -O2 binary-tree.c -lm	0.61u 0.00s 0.61r
  1101  	gccgo -O2 binary-tree.go	1.24u 0.04s 1.29r
  1102  	gccgo -O2 binary-tree-freelist.go	0.21u 0.01s 0.22r
  1103  	gc binary-tree	1.93u 0.02s 1.96r
  1104  	gc binary-tree-freelist	0.32u 0.00s 0.33r
  1105  
  1106  fannkuch 12
  1107  	gcc -O2 fannkuch.c	45.19u 0.00s 45.29r
  1108  	gccgo -O2 fannkuch.go	60.32u 0.00s 60.45r
  1109  	gccgo -O2 fannkuch-parallel.go	185.59u 0.00s 59.49r
  1110  	gc fannkuch	72.14u 0.00s 72.30r
  1111  	gc fannkuch-parallel	172.54u 0.00s 43.59r
  1112  	gc_B fannkuch	53.55u 0.00s 53.67r
  1113  
  1114  regex-dna 100000
  1115  	gcc -O2 regex-dna.c -lpcre	0.47u 0.00s 0.47r
  1116  	gccgo -O2 regex-dna.go	6.49u 0.05s 6.56r
  1117  	gccgo -O2 regex-dna-parallel.go	14.60u 0.67s 4.42r
  1118  	gc regex-dna	3.91u 0.00s 3.92r
  1119  	gc regex-dna-parallel	4.01u 0.03s 1.56r
  1120  	gc_B regex-dna	3.91u 0.00s 3.92r
  1121  
  1122  spectral-norm 5500
  1123  	gcc -O2 spectral-norm.c -lm	15.85u 0.00s 15.89r
  1124  	gccgo -O2 spectral-norm.go	15.86u 0.00s 15.89r
  1125  	gc spectral-norm	19.72u 0.00s 19.76r
  1126  	gc_B spectral-norm	19.68u 0.01s 19.74r
  1127  
  1128  k-nucleotide 1000000
  1129  	gcc -O2 k-nucleotide.c -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include -lglib-2.0 	4.90u 0.01s 4.93r
  1130  	gccgo -O2 k-nucleotide.go	4.78u 0.01s 4.80r
  1131  	gccgo -O2 k-nucleotide-parallel.go	6.49u 0.02s 2.18r
  1132  	gc k-nucleotide	9.05u 0.02s 9.09r
  1133  	gc k-nucleotide-parallel	9.27u 0.01s 3.29r
  1134  	gc_B k-nucleotide	8.95u 0.03s 9.00r
  1135  
  1136  mandelbrot 16000
  1137  	gcc -O2 mandelbrot.c	36.11u 0.00s 36.19r
  1138  	gccgo -O2 mandelbrot.go	43.67u 0.00s 43.77r
  1139  	gc mandelbrot	38.57u 0.00s 38.66r
  1140  	gc_B mandelbrot	38.59u 0.00s 38.68r
  1141  
  1142  meteor 2098
  1143  	gcc -O2 meteor-contest.c	0.09u 0.00s 0.09r
  1144  	gccgo -O2 meteor-contest.go	0.09u 0.00s 0.09r
  1145  	gc meteor-contest	0.13u 0.00s 0.14r
  1146  	gc_B meteor-contest	0.12u 0.00s 0.13r
  1147  
  1148  pidigits 10000
  1149  	gcc -O2 pidigits.c -lgmp	2.26u 0.00s 2.27r
  1150  	gccgo -O2 pidigits.go	9.05u 0.00s 9.07r
  1151  	gc pidigits	2.88u 0.02s 2.90r
  1152  	gc_B pidigits	2.89u 0.00s 2.90r
  1153  
  1154  threadring 50000000
  1155  	gcc -O2 threadring.c -lpthread	37.30u 327.81s 289.28r
  1156  	gccgo -O2 threadring.go	42.83u 26.15s 69.14r
  1157  	gc threadring	13.00u 0.00s 13.03r
  1158  
  1159  chameneos 6000000
  1160  	gcc -O2 chameneosredux.c -lpthread	8.80u 71.67s 12.19r
  1161  	gccgo -O2 chameneosredux.go	11.28u 6.68s 18.00r
  1162  	gc chameneosredux	6.94u 0.00s 6.96r
  1163  
  1164  # May 23, 2013
  1165  # Go 1.1, which includes precise GC, new scheduler, faster maps.
  1166  # 20%-ish speedups across many benchmarks.
  1167  # gccgo showing significant improvement (even though it's not yet up to Go 1.1)
  1168  #
  1169  # Standouts:
  1170  #	fannkuch, regex-dna, k-nucleotide, threadring, chameneos
  1171  
  1172  fasta -n 25000000
  1173  	gcc -m64 -O2 fasta.c	1.54u 0.01s 1.55r
  1174  	gccgo -O2 fasta.go	1.42u 0.00s 1.43r
  1175  	gc fasta	1.50u 0.01s 1.52r # -16%
  1176  	gc_B fasta	1.46u 0.00s 1.46r # -17%
  1177  
  1178  reverse-complement < output-of-fasta-25000000
  1179  	gcc -m64 -O2 reverse-complement.c	0.87u 0.37s 4.36r
  1180  	gccgo -O2 reverse-complement.go	0.77u 0.15s 0.93r # -15%
  1181  	gc reverse-complement	0.99u 0.12s 1.12r # -15%
  1182  	gc_B reverse-complement	0.85u 0.17s 1.02r # -21%
  1183  
  1184  nbody -n 50000000
  1185  	gcc -m64 -O2 nbody.c -lm	13.50u 0.00s 13.53r
  1186  	gccgo -O2 nbody.go	13.98u 0.01s 14.02r
  1187  	gc nbody	16.63u 0.01s 16.67r
  1188  	gc_B nbody	15.74u 0.00s 15.76r
  1189  
  1190  binary-tree 15 # too slow to use 20
  1191  	gcc -m64 -O2 binary-tree.c -lm	0.61u 0.00s 0.61r
  1192  	gccgo -O2 binary-tree.go	1.11u 0.01s 1.12r # -13%
  1193  	gccgo -O2 binary-tree-freelist.go	0.22u 0.01s 0.23r
  1194  	gc binary-tree	1.83u 0.02s 1.83r # -7%
  1195  	gc binary-tree-freelist	0.32u 0.00s 0.32r
  1196  
  1197  fannkuch 12
  1198  	gcc -m64 -O2 fannkuch.c	45.56u 0.00s 45.67r
  1199  	gccgo -O2 fannkuch.go	57.71u 0.00s 57.85r # -4%
  1200  	gccgo -O2 fannkuch-parallel.go	146.31u 0.00s 37.50r #-37%
  1201  	gc fannkuch	70.06u 0.03s 70.17r # -3%
  1202  	gc fannkuch-parallel	131.88u 0.06s 33.59r # -23%
  1203  	gc_B fannkuch	45.55u 0.02s 45.63r # -15%
  1204  
  1205  regex-dna 100000
  1206  	gcc -m64 -O2 regex-dna.c -lpcre	0.44u 0.01s 0.45r
  1207  	gccgo -O2 regex-dna.go	5.59u 0.00s 5.61r # -14%
  1208  	gccgo -O2 regex-dna-parallel.go	10.85u 0.30s 3.34r # -24%
  1209  	gc regex-dna	2.23u 0.01s 2.25r # -43%
  1210  	gc regex-dna-parallel	2.35u 0.00s 0.93r # -40%
  1211  	gc_B regex-dna	2.24u 0.01s 2.25r # -43%
  1212  
  1213  spectral-norm 5500
  1214  	gcc -m64 -O2 spectral-norm.c -lm	14.84u 0.00s 14.88r
  1215  	gccgo -O2 spectral-norm.go	15.33u 0.00s 15.37r
  1216  	gc spectral-norm	16.75u 0.02s 16.79r # -15%
  1217  	gc_B spectral-norm	16.77u 0.01s 16.79r # -15%
  1218  
  1219  k-nucleotide 1000000
  1220  	gcc -O2 k-nucleotide.c -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -lglib-2.0 	4.50u 0.00s 4.52r
  1221  	gccgo -O2 k-nucleotide.go	3.72u 0.04s 3.77r # -21%
  1222  	gccgo -O2 k-nucleotide-parallel.go	3.88u 0.03s 1.42r # -35%
  1223  	gc k-nucleotide	6.32u 0.01s 6.33r # -31%
  1224  	gc k-nucleotide-parallel	6.47u 0.05s 2.13r # -33%
  1225  	gc_B k-nucleotide	6.45u 0.01s 6.47r # - 28%
  1226  
  1227  mandelbrot 16000
  1228  	gcc -m64 -O2 mandelbrot.c	36.03u 0.00s 36.11r
  1229  	gccgo -O2 mandelbrot.go	37.61u 0.00s 37.74r # -14%
  1230  	gc mandelbrot	38.19u 0.05s 38.29r
  1231  	gc_B mandelbrot	38.19u 0.03s 38.26r
  1232  
  1233  meteor 2098
  1234  	gcc -m64 -O2 meteor-contest.c	0.08u 0.00s 0.08r
  1235  	gccgo -O2 meteor-contest.go	0.09u 0.01s 0.10r
  1236  	gc meteor-contest	0.12u 0.00s 0.12r # -15% although perhaps just noise
  1237  	gc_B meteor-contest	0.11u 0.00s 0.12r # -8% although perhaps just noise
  1238  
  1239  pidigits 10000
  1240  	gcc -m64 -O2 pidigits.c -lgmp	2.27u 0.00s 2.28r
  1241  	gccgo -O2 pidigits.go	8.95u 0.02s 8.99r
  1242  	gc pidigits	2.88u 0.14s 2.91r
  1243  	gc_B pidigits	2.92u 0.10s 2.91r
  1244  
  1245  threadring 50000000
  1246  	gcc -m64 -O2 threadring.c -lpthread	14.75u 167.88s 212.23r
  1247  	gccgo -O2 threadring.go	36.72u 12.08s 48.91r # -29%
  1248  	gc threadring	10.93u 0.01s 10.95r # -16%
  1249  
  1250  chameneos 6000000
  1251  	gcc -m64 -O2 chameneosredux.c -lpthread	8.89u 56.62s 9.75r
  1252  	gccgo -O2 chameneosredux.go	9.48u 2.48s 11.99r # -33%
  1253  	gc chameneosredux	5.80u 0.00s 5.81r # -16%
  1254