github.com/pachyderm/pachyderm@v1.13.4/src/server/worker/datum/iterator_bench_test.go (about)

     1  package datum
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  	"testing"
     7  
     8  	"github.com/pachyderm/pachyderm/src/client"
     9  	"github.com/pachyderm/pachyderm/src/client/pkg/require"
    10  	"github.com/pachyderm/pachyderm/src/client/pps"
    11  	tu "github.com/pachyderm/pachyderm/src/server/pkg/testutil"
    12  )
    13  
    14  func benchmarkIterators(j int, b *testing.B) {
    15  	c := tu.GetPachClient(b)
    16  	defer require.NoError(b, c.DeleteAll())
    17  
    18  	b.ResetTimer()
    19  	for n := 0; n < b.N; n++ {
    20  		dataRepo := tu.UniqueString("TestIteratorPFS_data")
    21  		require.NoError(b, c.CreateRepo(dataRepo))
    22  
    23  		// put files in structured in a way so that there are many ways to glob it
    24  		commit, err := c.StartCommit(dataRepo, "master")
    25  		require.NoError(b, err)
    26  		for i := 0; i < 100*j; i++ {
    27  			_, err = c.PutFile(dataRepo, commit.ID, fmt.Sprintf("foo%v", i), strings.NewReader("bar"))
    28  			require.NoError(b, err)
    29  		}
    30  
    31  		require.NoError(b, err)
    32  		require.NoError(b, c.FinishCommit(dataRepo, commit.ID))
    33  
    34  		// make one with zero datums for testing edge cases
    35  		in0 := client.NewPFSInput(dataRepo, "!(**)")
    36  		in0.Pfs.Commit = commit.ID
    37  		pfs0, err := NewIterator(c, in0)
    38  		require.NoError(b, err)
    39  
    40  		in1 := client.NewPFSInput(dataRepo, "/foo?1*")
    41  		in1.Pfs.Commit = commit.ID
    42  		pfs1, err := NewIterator(c, in1)
    43  		require.NoError(b, err)
    44  
    45  		in2 := client.NewPFSInput(dataRepo, "/foo*2")
    46  		in2.Pfs.Commit = commit.ID
    47  		pfs2, err := NewIterator(c, in2)
    48  		require.NoError(b, err)
    49  
    50  		validateDI(b, pfs0)
    51  		validateDI(b, pfs1)
    52  		validateDI(b, pfs2)
    53  
    54  		b.Run("union", func(b *testing.B) {
    55  			in3 := client.NewUnionInput(in1, in2)
    56  			union1, err := NewIterator(c, in3)
    57  			require.NoError(b, err)
    58  			validateDI(b, union1)
    59  		})
    60  
    61  		b.Run("cross", func(b *testing.B) {
    62  			in4 := client.NewCrossInput(in1, in2)
    63  			cross1, err := NewIterator(c, in4)
    64  			require.NoError(b, err)
    65  			validateDI(b, cross1)
    66  		})
    67  
    68  		b.Run("join", func(b *testing.B) {
    69  			in8 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)*", "$1$2", "", false, false, nil)
    70  			in8.Pfs.Commit = commit.ID
    71  			in9 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)*", "$2$1", "", false, false, nil)
    72  			in9.Pfs.Commit = commit.ID
    73  			join1, err := newJoinIterator(c, []*pps.Input{in8, in9})
    74  			require.NoError(b, err)
    75  			validateDI(b, join1)
    76  		})
    77  
    78  		b.Run("group", func(b *testing.B) {
    79  			in10 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)*", "", "$2", false, false, nil)
    80  			in10.Pfs.Commit = commit.ID
    81  			group1, err := newGroupIterator(c, []*pps.Input{in10})
    82  			require.NoError(b, err)
    83  			validateDI(b, group1)
    84  		})
    85  
    86  		b.Run("iterated", func(b *testing.B) {
    87  			in3 := client.NewUnionInput(in1, in2)
    88  			in4 := client.NewCrossInput(in1, in2)
    89  
    90  			in5 := client.NewCrossInput(in3, in4)
    91  			cross2, err := NewIterator(c, in5)
    92  			require.NoError(b, err)
    93  
    94  			// cross with a zero datum input should also be zero
    95  			in6 := client.NewCrossInput(in3, in0, in2, in4)
    96  			cross3, err := NewIterator(c, in6)
    97  			require.NoError(b, err)
    98  
    99  			// zero cross inside a cross should also be zero
   100  			in7 := client.NewCrossInput(in6, in1)
   101  			cross4, err := NewIterator(c, in7)
   102  			require.NoError(b, err)
   103  
   104  			validateDI(b, cross2)
   105  			validateDI(b, cross3)
   106  			validateDI(b, cross4)
   107  
   108  		})
   109  	}
   110  }
   111  
   112  func BenchmarkDI1(b *testing.B)  { benchmarkIterators(1, b) }
   113  func BenchmarkDI2(b *testing.B)  { benchmarkIterators(2, b) }
   114  func BenchmarkDI4(b *testing.B)  { benchmarkIterators(4, b) }
   115  func BenchmarkDI8(b *testing.B)  { benchmarkIterators(8, b) }
   116  func BenchmarkDI16(b *testing.B) { benchmarkIterators(16, b) }
   117  func BenchmarkDI32(b *testing.B) { benchmarkIterators(32, b) }