github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/test/devstack/sharding_test.go (about)

     1  //go:build integration
     2  
     3  package devstack
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"testing"
    12  
    13  	"github.com/filecoin-project/bacalhau/pkg/devstack"
    14  	"github.com/filecoin-project/bacalhau/pkg/docker"
    15  	"github.com/filecoin-project/bacalhau/pkg/executor/noop"
    16  	"github.com/filecoin-project/bacalhau/pkg/ipfs"
    17  	"github.com/filecoin-project/bacalhau/pkg/job"
    18  	_ "github.com/filecoin-project/bacalhau/pkg/logger"
    19  	"github.com/filecoin-project/bacalhau/pkg/model"
    20  	"github.com/filecoin-project/bacalhau/pkg/node"
    21  	"github.com/filecoin-project/bacalhau/pkg/requester/publicapi"
    22  	ipfs_storage "github.com/filecoin-project/bacalhau/pkg/storage/ipfs"
    23  	"github.com/filecoin-project/bacalhau/pkg/system"
    24  	"github.com/filecoin-project/bacalhau/pkg/test/scenario"
    25  	testutils "github.com/filecoin-project/bacalhau/pkg/test/utils"
    26  	"github.com/stretchr/testify/require"
    27  	"github.com/stretchr/testify/suite"
    28  )
    29  
    30  type ShardingSuite struct {
    31  	scenario.ScenarioRunner
    32  }
    33  
    34  // In order for 'go test' to run this suite, we need to create
    35  // a normal test function and pass our suite to suite.Run
    36  func TestShardingSuite(t *testing.T) {
    37  	suite.Run(t, new(ShardingSuite))
    38  }
    39  
    40  func prepareFolderWithFoldersAndFiles(t *testing.T, folderCount, fileCount int) (string, error) {
    41  	basePath := t.TempDir()
    42  	for i := 0; i < folderCount; i++ {
    43  		subfolderPath := fmt.Sprintf("%s/folder%d", basePath, i)
    44  		err := os.Mkdir(subfolderPath, 0700)
    45  		if err != nil {
    46  			return "", err
    47  		}
    48  		for j := 0; j < fileCount; j++ {
    49  			err = os.WriteFile(
    50  				fmt.Sprintf("%s/%d.txt", subfolderPath, j),
    51  				[]byte(fmt.Sprintf("hello %d %d", i, j)),
    52  				0644,
    53  			)
    54  			if err != nil {
    55  				return "", err
    56  			}
    57  		}
    58  	}
    59  	return basePath, nil
    60  }
    61  
    62  func (suite *ShardingSuite) TestExplodeCid() {
    63  	const nodeCount = 1
    64  	const folderCount = 10
    65  	const fileCount = 10
    66  	ctx := context.Background()
    67  	cm := system.NewCleanupManager()
    68  
    69  	err := system.InitConfigForTesting(suite.T())
    70  	require.NoError(suite.T(), err)
    71  
    72  	stack, err := devstack.NewDevStackIPFS(ctx, cm, nodeCount)
    73  	require.NoError(suite.T(), err)
    74  
    75  	node := stack.IPFSClients[0]
    76  
    77  	// make 10 folders each with 10 files
    78  	dirPath, err := prepareFolderWithFoldersAndFiles(suite.T(), folderCount, fileCount)
    79  	require.NoError(suite.T(), err)
    80  
    81  	directoryCid, err := ipfs.AddFileToNodes(ctx, dirPath, stack.IPFSClients[:nodeCount]...)
    82  	require.NoError(suite.T(), err)
    83  
    84  	ipfsProvider, err := ipfs_storage.NewStorage(cm, node)
    85  	require.NoError(suite.T(), err)
    86  
    87  	results, err := ipfsProvider.Explode(ctx, model.StorageSpec{
    88  		Path:          "/input",
    89  		StorageSource: model.StorageSourceIPFS,
    90  		CID:           directoryCid,
    91  	})
    92  	require.NoError(suite.T(), err)
    93  
    94  	resultPaths := []string{}
    95  	for _, result := range results {
    96  		resultPaths = append(resultPaths, result.Path)
    97  	}
    98  
    99  	// the top level node is en empty path
   100  	expectedFilePaths := []string{"/input"}
   101  	for i := 0; i < folderCount; i++ {
   102  		expectedFilePaths = append(expectedFilePaths, fmt.Sprintf("/input/folder%d", i))
   103  		for j := 0; j < fileCount; j++ {
   104  			expectedFilePaths = append(expectedFilePaths, fmt.Sprintf("/input/folder%d/%d.txt", i, j))
   105  		}
   106  	}
   107  
   108  	require.Equal(
   109  		suite.T(),
   110  		strings.Join(expectedFilePaths, ","),
   111  		strings.Join(resultPaths, ","),
   112  		"the exploded file paths do not match the expected ones",
   113  	)
   114  }
   115  
   116  func (suite *ShardingSuite) TestEndToEnd() {
   117  	docker.MustHaveDocker(suite.T())
   118  
   119  	const totalFiles = 100
   120  	const batchSize = 10
   121  	const batchCount = totalFiles / batchSize
   122  	const nodeCount = 3
   123  
   124  	var assertShardCounts job.CheckStatesFunction = func(js model.JobState) (bool, error) {
   125  		return len(js.Shards) == batchCount, nil
   126  	}
   127  
   128  	// check that the merged stdout is correct
   129  	checks := []scenario.CheckResults{}
   130  	for i := 0; i < totalFiles; i++ {
   131  		for j := 0; j < nodeCount; j++ {
   132  			content := fmt.Sprintf("hello /input/%d.txt", i)
   133  			filename := filepath.Join("results", fmt.Sprintf("%d.txt", i))
   134  			checks = append(checks,
   135  				scenario.FileEquals(filename, content+"\n"),
   136  				scenario.FileContains(model.DownloadFilenameStdout, content, totalFiles*3+1),
   137  			)
   138  		}
   139  	}
   140  
   141  	testScenario := scenario.Scenario{
   142  		Stack: &scenario.StackConfig{
   143  			DevStackOptions: &devstack.DevStackOptions{NumberOfHybridNodes: nodeCount},
   144  		},
   145  		Inputs: scenario.StoredFile(
   146  			prepareFolderWithFiles(suite.T(), totalFiles),
   147  			"/input",
   148  		),
   149  		Outputs: []model.StorageSpec{
   150  			{
   151  				StorageSource: model.StorageSourceIPFS,
   152  				Name:          "results",
   153  				Path:          "/output",
   154  			},
   155  		},
   156  		Spec: model.Spec{
   157  			Engine:    model.EngineDocker,
   158  			Verifier:  model.VerifierNoop,
   159  			Publisher: model.PublisherIpfs,
   160  			Docker: model.JobSpecDocker{
   161  				Image: "ubuntu:latest",
   162  				Entrypoint: []string{
   163  					"bash", "-c",
   164  					// loop over each input file and write the filename to an
   165  					// output file named the same thing in the results folder
   166  					`for f in /input/*; do export filename=$(echo $f | sed 's/\/input//');` +
   167  						`echo "hello $f" && echo "hello $f" >> /output/$filename; done`,
   168  				},
   169  			},
   170  			Sharding: model.JobShardingConfig{
   171  				GlobPattern: "/input/*",
   172  				BatchSize:   batchSize,
   173  			},
   174  		},
   175  		Deal: model.Deal{Concurrency: 3},
   176  		JobCheckers: []job.CheckStatesFunction{
   177  			assertShardCounts,
   178  			job.WaitExecutionsThrowErrors([]model.ExecutionStateType{
   179  				model.ExecutionStateFailed,
   180  			}),
   181  			job.WaitForExecutionStates(map[model.ExecutionStateType]int{
   182  				model.ExecutionStateCompleted: nodeCount * batchCount,
   183  			}),
   184  		},
   185  		ResultsChecker: scenario.ManyChecks(checks...),
   186  	}
   187  
   188  	suite.RunScenario(testScenario)
   189  }
   190  
   191  func (suite *ShardingSuite) TestNoShards() {
   192  	const nodeCount = 1
   193  	ctx := context.Background()
   194  
   195  	stack, _ := testutils.SetupTest(
   196  		ctx,
   197  		suite.T(),
   198  
   199  		nodeCount,
   200  		0,
   201  		false,
   202  		node.NewComputeConfigWithDefaults(),
   203  		node.NewRequesterConfigWithDefaults(),
   204  	)
   205  
   206  	dirPath := prepareFolderWithFiles(suite.T(), 0)
   207  	directoryCid, err := ipfs.AddFileToNodes(ctx, dirPath, devstack.ToIPFSClients(stack.Nodes[:nodeCount])...)
   208  	require.NoError(suite.T(), err)
   209  
   210  	j := &model.Job{
   211  		APIVersion: model.APIVersionLatest().String(),
   212  	}
   213  	j.Spec = model.Spec{
   214  		Engine:    model.EngineWasm,
   215  		Verifier:  model.VerifierNoop,
   216  		Publisher: model.PublisherNoop,
   217  		Wasm:      scenario.WasmHelloWorld.Spec.Wasm,
   218  		Inputs: []model.StorageSpec{
   219  			{
   220  				StorageSource: model.StorageSourceIPFS,
   221  				CID:           directoryCid,
   222  				Path:          "/input",
   223  			},
   224  		},
   225  		Outputs: []model.StorageSpec{},
   226  		Sharding: model.JobShardingConfig{
   227  			GlobPattern: "/input/*",
   228  			BatchSize:   1,
   229  		},
   230  	}
   231  
   232  	j.Spec.Deal = model.Deal{
   233  		Concurrency: nodeCount,
   234  	}
   235  
   236  	apiUri := stack.Nodes[0].APIServer.GetURI()
   237  	apiClient := publicapi.NewRequesterAPIClient(apiUri)
   238  	_, err = apiClient.Submit(ctx, j)
   239  	require.Error(suite.T(), err)
   240  	require.True(suite.T(), strings.Contains(err.Error(), "no sharding atoms found for glob pattern"))
   241  }
   242  
   243  func (suite *ShardingSuite) TestExplodeVideos() {
   244  	videos := []string{
   245  		"Bird flying over the lake.mp4",
   246  		"Calm waves on a rocky sea gulf.mp4",
   247  		"Prominent Late Gothic styled architecture.mp4",
   248  	}
   249  
   250  	dirPath := suite.T().TempDir()
   251  	for _, video := range videos {
   252  		err := os.WriteFile(
   253  			filepath.Join(dirPath, video),
   254  			[]byte(fmt.Sprintf("hello %s", video)),
   255  			0644,
   256  		)
   257  		require.NoError(suite.T(), err)
   258  	}
   259  
   260  	testScenario := scenario.Scenario{
   261  		Stack: &scenario.StackConfig{
   262  			ExecutorConfig: noop.ExecutorConfig{},
   263  		},
   264  		Inputs:   scenario.StoredFile(dirPath, "/inputs"),
   265  		Contexts: scenario.WasmHelloWorld.Contexts,
   266  		Spec: model.Spec{
   267  			Engine:    model.EngineNoop,
   268  			Verifier:  model.VerifierNoop,
   269  			Publisher: model.PublisherNoop,
   270  			Sharding: model.JobShardingConfig{
   271  				BasePath:    "/inputs",
   272  				GlobPattern: "*.mp4",
   273  				BatchSize:   1,
   274  			},
   275  		},
   276  		JobCheckers: scenario.WaitUntilSuccessful(len(videos)),
   277  	}
   278  
   279  	suite.RunScenario(testScenario)
   280  }