github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/test/devstack/sharding_test.go (about) 1 //go:build integration 2 3 package devstack 4 5 import ( 6 "context" 7 "fmt" 8 "os" 9 "path/filepath" 10 "strings" 11 "testing" 12 13 "github.com/filecoin-project/bacalhau/pkg/devstack" 14 "github.com/filecoin-project/bacalhau/pkg/docker" 15 "github.com/filecoin-project/bacalhau/pkg/executor/noop" 16 "github.com/filecoin-project/bacalhau/pkg/ipfs" 17 "github.com/filecoin-project/bacalhau/pkg/job" 18 _ "github.com/filecoin-project/bacalhau/pkg/logger" 19 "github.com/filecoin-project/bacalhau/pkg/model" 20 "github.com/filecoin-project/bacalhau/pkg/node" 21 "github.com/filecoin-project/bacalhau/pkg/requester/publicapi" 22 ipfs_storage "github.com/filecoin-project/bacalhau/pkg/storage/ipfs" 23 "github.com/filecoin-project/bacalhau/pkg/system" 24 "github.com/filecoin-project/bacalhau/pkg/test/scenario" 25 testutils "github.com/filecoin-project/bacalhau/pkg/test/utils" 26 "github.com/stretchr/testify/require" 27 "github.com/stretchr/testify/suite" 28 ) 29 30 type ShardingSuite struct { 31 scenario.ScenarioRunner 32 } 33 34 // In order for 'go test' to run this suite, we need to create 35 // a normal test function and pass our suite to suite.Run 36 func TestShardingSuite(t *testing.T) { 37 suite.Run(t, new(ShardingSuite)) 38 } 39 40 func prepareFolderWithFoldersAndFiles(t *testing.T, folderCount, fileCount int) (string, error) { 41 basePath := t.TempDir() 42 for i := 0; i < folderCount; i++ { 43 subfolderPath := fmt.Sprintf("%s/folder%d", basePath, i) 44 err := os.Mkdir(subfolderPath, 0700) 45 if err != nil { 46 return "", err 47 } 48 for j := 0; j < fileCount; j++ { 49 err = os.WriteFile( 50 fmt.Sprintf("%s/%d.txt", subfolderPath, j), 51 []byte(fmt.Sprintf("hello %d %d", i, j)), 52 0644, 53 ) 54 if err != nil { 55 return "", err 56 } 57 } 58 } 59 return basePath, nil 60 } 61 62 func (suite *ShardingSuite) TestExplodeCid() { 63 const nodeCount = 1 64 const folderCount = 10 65 const fileCount = 10 66 ctx := context.Background() 67 cm := system.NewCleanupManager() 68 69 err := system.InitConfigForTesting(suite.T()) 70 require.NoError(suite.T(), err) 71 72 stack, err := devstack.NewDevStackIPFS(ctx, cm, nodeCount) 73 require.NoError(suite.T(), err) 74 75 node := stack.IPFSClients[0] 76 77 // make 10 folders each with 10 files 78 dirPath, err := prepareFolderWithFoldersAndFiles(suite.T(), folderCount, fileCount) 79 require.NoError(suite.T(), err) 80 81 directoryCid, err := ipfs.AddFileToNodes(ctx, dirPath, stack.IPFSClients[:nodeCount]...) 82 require.NoError(suite.T(), err) 83 84 ipfsProvider, err := ipfs_storage.NewStorage(cm, node) 85 require.NoError(suite.T(), err) 86 87 results, err := ipfsProvider.Explode(ctx, model.StorageSpec{ 88 Path: "/input", 89 StorageSource: model.StorageSourceIPFS, 90 CID: directoryCid, 91 }) 92 require.NoError(suite.T(), err) 93 94 resultPaths := []string{} 95 for _, result := range results { 96 resultPaths = append(resultPaths, result.Path) 97 } 98 99 // the top level node is en empty path 100 expectedFilePaths := []string{"/input"} 101 for i := 0; i < folderCount; i++ { 102 expectedFilePaths = append(expectedFilePaths, fmt.Sprintf("/input/folder%d", i)) 103 for j := 0; j < fileCount; j++ { 104 expectedFilePaths = append(expectedFilePaths, fmt.Sprintf("/input/folder%d/%d.txt", i, j)) 105 } 106 } 107 108 require.Equal( 109 suite.T(), 110 strings.Join(expectedFilePaths, ","), 111 strings.Join(resultPaths, ","), 112 "the exploded file paths do not match the expected ones", 113 ) 114 } 115 116 func (suite *ShardingSuite) TestEndToEnd() { 117 docker.MustHaveDocker(suite.T()) 118 119 const totalFiles = 100 120 const batchSize = 10 121 const batchCount = totalFiles / batchSize 122 const nodeCount = 3 123 124 var assertShardCounts job.CheckStatesFunction = func(js model.JobState) (bool, error) { 125 return len(js.Shards) == batchCount, nil 126 } 127 128 // check that the merged stdout is correct 129 checks := []scenario.CheckResults{} 130 for i := 0; i < totalFiles; i++ { 131 for j := 0; j < nodeCount; j++ { 132 content := fmt.Sprintf("hello /input/%d.txt", i) 133 filename := filepath.Join("results", fmt.Sprintf("%d.txt", i)) 134 checks = append(checks, 135 scenario.FileEquals(filename, content+"\n"), 136 scenario.FileContains(model.DownloadFilenameStdout, content, totalFiles*3+1), 137 ) 138 } 139 } 140 141 testScenario := scenario.Scenario{ 142 Stack: &scenario.StackConfig{ 143 DevStackOptions: &devstack.DevStackOptions{NumberOfHybridNodes: nodeCount}, 144 }, 145 Inputs: scenario.StoredFile( 146 prepareFolderWithFiles(suite.T(), totalFiles), 147 "/input", 148 ), 149 Outputs: []model.StorageSpec{ 150 { 151 StorageSource: model.StorageSourceIPFS, 152 Name: "results", 153 Path: "/output", 154 }, 155 }, 156 Spec: model.Spec{ 157 Engine: model.EngineDocker, 158 Verifier: model.VerifierNoop, 159 Publisher: model.PublisherIpfs, 160 Docker: model.JobSpecDocker{ 161 Image: "ubuntu:latest", 162 Entrypoint: []string{ 163 "bash", "-c", 164 // loop over each input file and write the filename to an 165 // output file named the same thing in the results folder 166 `for f in /input/*; do export filename=$(echo $f | sed 's/\/input//');` + 167 `echo "hello $f" && echo "hello $f" >> /output/$filename; done`, 168 }, 169 }, 170 Sharding: model.JobShardingConfig{ 171 GlobPattern: "/input/*", 172 BatchSize: batchSize, 173 }, 174 }, 175 Deal: model.Deal{Concurrency: 3}, 176 JobCheckers: []job.CheckStatesFunction{ 177 assertShardCounts, 178 job.WaitExecutionsThrowErrors([]model.ExecutionStateType{ 179 model.ExecutionStateFailed, 180 }), 181 job.WaitForExecutionStates(map[model.ExecutionStateType]int{ 182 model.ExecutionStateCompleted: nodeCount * batchCount, 183 }), 184 }, 185 ResultsChecker: scenario.ManyChecks(checks...), 186 } 187 188 suite.RunScenario(testScenario) 189 } 190 191 func (suite *ShardingSuite) TestNoShards() { 192 const nodeCount = 1 193 ctx := context.Background() 194 195 stack, _ := testutils.SetupTest( 196 ctx, 197 suite.T(), 198 199 nodeCount, 200 0, 201 false, 202 node.NewComputeConfigWithDefaults(), 203 node.NewRequesterConfigWithDefaults(), 204 ) 205 206 dirPath := prepareFolderWithFiles(suite.T(), 0) 207 directoryCid, err := ipfs.AddFileToNodes(ctx, dirPath, devstack.ToIPFSClients(stack.Nodes[:nodeCount])...) 208 require.NoError(suite.T(), err) 209 210 j := &model.Job{ 211 APIVersion: model.APIVersionLatest().String(), 212 } 213 j.Spec = model.Spec{ 214 Engine: model.EngineWasm, 215 Verifier: model.VerifierNoop, 216 Publisher: model.PublisherNoop, 217 Wasm: scenario.WasmHelloWorld.Spec.Wasm, 218 Inputs: []model.StorageSpec{ 219 { 220 StorageSource: model.StorageSourceIPFS, 221 CID: directoryCid, 222 Path: "/input", 223 }, 224 }, 225 Outputs: []model.StorageSpec{}, 226 Sharding: model.JobShardingConfig{ 227 GlobPattern: "/input/*", 228 BatchSize: 1, 229 }, 230 } 231 232 j.Spec.Deal = model.Deal{ 233 Concurrency: nodeCount, 234 } 235 236 apiUri := stack.Nodes[0].APIServer.GetURI() 237 apiClient := publicapi.NewRequesterAPIClient(apiUri) 238 _, err = apiClient.Submit(ctx, j) 239 require.Error(suite.T(), err) 240 require.True(suite.T(), strings.Contains(err.Error(), "no sharding atoms found for glob pattern")) 241 } 242 243 func (suite *ShardingSuite) TestExplodeVideos() { 244 videos := []string{ 245 "Bird flying over the lake.mp4", 246 "Calm waves on a rocky sea gulf.mp4", 247 "Prominent Late Gothic styled architecture.mp4", 248 } 249 250 dirPath := suite.T().TempDir() 251 for _, video := range videos { 252 err := os.WriteFile( 253 filepath.Join(dirPath, video), 254 []byte(fmt.Sprintf("hello %s", video)), 255 0644, 256 ) 257 require.NoError(suite.T(), err) 258 } 259 260 testScenario := scenario.Scenario{ 261 Stack: &scenario.StackConfig{ 262 ExecutorConfig: noop.ExecutorConfig{}, 263 }, 264 Inputs: scenario.StoredFile(dirPath, "/inputs"), 265 Contexts: scenario.WasmHelloWorld.Contexts, 266 Spec: model.Spec{ 267 Engine: model.EngineNoop, 268 Verifier: model.VerifierNoop, 269 Publisher: model.PublisherNoop, 270 Sharding: model.JobShardingConfig{ 271 BasePath: "/inputs", 272 GlobPattern: "*.mp4", 273 BatchSize: 1, 274 }, 275 }, 276 JobCheckers: scenario.WaitUntilSuccessful(len(videos)), 277 } 278 279 suite.RunScenario(testScenario) 280 }