github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/obj/testing/deployment_test.go (about) 1 package testing 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "io" 8 "os" 9 "strings" 10 "testing" 11 "time" 12 13 "github.com/gogo/protobuf/proto" 14 v1 "k8s.io/api/core/v1" 15 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 kube "k8s.io/client-go/kubernetes" 17 18 "github.com/pachyderm/pachyderm/src/client" 19 "github.com/pachyderm/pachyderm/src/client/pfs" 20 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 21 "github.com/pachyderm/pachyderm/src/client/pkg/require" 22 "github.com/pachyderm/pachyderm/src/client/pps" 23 "github.com/pachyderm/pachyderm/src/server/pkg/deploy/assets" 24 "github.com/pachyderm/pachyderm/src/server/pkg/obj" 25 "github.com/pachyderm/pachyderm/src/server/pkg/serde" 26 tu "github.com/pachyderm/pachyderm/src/server/pkg/testutil" 27 ) 28 29 // This test suite works by spinning up separate pachd deployments in a new 30 // namespace for each configuration. There are several important bits to make 31 // sure these are parallelizable, that the manifests don't step on each other's 32 // toes. Once the deployment is up-and-running, we run a simple pipeline test to 33 // ensure that we can round-trip data to object storage in both the worker and 34 // in pachd. For testing specific corner-cases, consider modifying the client 35 // test suite in this same package. 36 37 // NOTE: these tests require object storage credentials to be loaded in your 38 // environment (see util.go for where they are loaded). 39 40 // Change this to false to keep kubernetes namespaces around after the test for 41 // debugging purposes. 42 const cleanup = true 43 44 // Rewrites kubernetes manifest services to auto-allocate external ports and 45 // reduce cpu resource requests for parallel testing. 46 type ManifestRewriter struct { 47 serde.Encoder 48 } 49 50 func rewriterCallback(innerCb func(map[string]interface{}) error) func(map[string]interface{}) error { 51 return func(data map[string]interface{}) error { 52 var err error 53 if innerCb != nil { 54 err = innerCb(data) 55 } 56 rewriteManifest(data) 57 return err 58 } 59 } 60 61 func rewriteManifest(data map[string]interface{}) { 62 if data["kind"] == "Service" { 63 spec := data["spec"].(map[string]interface{}) 64 if spec["type"] == "NodePort" { 65 ports := spec["ports"].([]interface{}) 66 for _, port := range ports { 67 port := port.(map[string]interface{}) 68 if _, ok := port["nodePort"]; ok { 69 port["nodePort"] = 0 70 } 71 } 72 } 73 } 74 75 if data["kind"] == "Deployment" { 76 if spec, ok := data["spec"]; ok { 77 spec := spec.(map[string]interface{}) 78 if template, ok := spec["template"]; ok { 79 template := template.(map[string]interface{}) 80 if spec, ok := template["spec"]; ok { 81 spec := spec.(map[string]interface{}) 82 if containers, ok := spec["containers"]; ok { 83 containers := containers.([]interface{}) 84 for _, container := range containers { 85 container := container.(map[string]interface{}) 86 if resources, ok := container["resources"]; ok { 87 resources := resources.(map[string]interface{}) 88 if limits, ok := resources["limits"]; ok { 89 limits := limits.(map[string]interface{}) 90 if _, ok := limits["cpu"]; ok { 91 limits["cpu"] = "0" 92 } 93 } 94 if requests, ok := resources["requests"]; ok { 95 requests := requests.(map[string]interface{}) 96 if _, ok := requests["cpu"]; ok { 97 requests["cpu"] = "0" 98 } 99 } 100 } 101 } 102 } 103 } 104 } 105 } 106 } 107 } 108 109 func (npr *ManifestRewriter) Encode(v interface{}) error { 110 return npr.EncodeTransform(v, nil) 111 } 112 113 func (npr *ManifestRewriter) EncodeProto(m proto.Message) error { 114 return npr.EncodeProtoTransform(m, nil) 115 } 116 117 func (npr *ManifestRewriter) EncodeTransform(v interface{}, cb func(map[string]interface{}) error) error { 118 return npr.Encoder.EncodeTransform(v, rewriterCallback(cb)) 119 } 120 121 func (npr *ManifestRewriter) EncodeProtoTransform(m proto.Message, cb func(map[string]interface{}) error) error { 122 return npr.Encoder.EncodeProtoTransform(m, rewriterCallback(cb)) 123 } 124 125 func getPachClient(t *testing.T, kubeClient *kube.Clientset, namespace string) *client.APIClient { 126 // Get the pachd service from kubernetes 127 pachd, err := kubeClient.CoreV1().Services(namespace).Get("pachd", metav1.GetOptions{}) 128 require.NoError(t, err) 129 130 var port int32 131 for _, servicePort := range pachd.Spec.Ports { 132 if servicePort.Name == "api-grpc-port" { 133 port = servicePort.NodePort 134 } 135 } 136 require.NotEqual(t, 0, port) 137 138 // Get the IP address of the nodes (any _should_ work for the service port) 139 nodes, err := kubeClient.CoreV1().Nodes().List(metav1.ListOptions{}) 140 require.NoError(t, err) 141 142 // Minikube 'Hostname' address type didn't work when testing, use InternalIP 143 var address string 144 for _, addr := range nodes.Items[0].Status.Addresses { 145 if addr.Type == "InternalIP" { 146 address = addr.Address 147 } 148 } 149 require.NotEqual(t, "", address) 150 151 // Connect to pachd 152 tu.WaitForPachdReady(t, namespace) 153 client, err := client.NewFromAddress(fmt.Sprintf("%s:%d", address, port), client.WithDialTimeout(100*time.Second)) 154 155 // Some debugging info in case connecting fails - this will dump the pachd 156 // logs in case something went wrong there. In my experience, this has been 157 // due to either problems with credentials to object storage (will also fail 158 // in client_test.go), or insufficient timeout due to slow CI machines. 159 if err != nil { 160 fmt.Printf("Failed to connect to pachd: %v\n", err) 161 fmt.Printf("Used host:port: %s:%d\n", address, port) 162 fmt.Printf("All nodes addresses:\n") 163 for i, node := range nodes.Items { 164 fmt.Printf(" [%d]: %v\n", i, node.Status.Addresses) 165 } 166 pods, err := kubeClient.CoreV1().Pods(namespace).List(metav1.ListOptions{ 167 LabelSelector: "app=pachd", 168 }) 169 if err == nil { 170 if len(pods.Items) != 1 { 171 fmt.Printf("Got wrong number of pods, expected %d but found %d\n", 1, len(pods.Items)) 172 } else { 173 stream, err := kubeClient.CoreV1().Pods(namespace).GetLogs( 174 pods.Items[0].ObjectMeta.Name, 175 &v1.PodLogOptions{}, 176 ).Stream() 177 if err == nil { 178 defer stream.Close() 179 fmt.Printf("Pod logs:\n") 180 io.Copy(os.Stdout, stream) 181 } else { 182 fmt.Printf("Failed to get pod logs: %v\n", err) 183 } 184 } 185 } else { 186 fmt.Printf("Failed to find pachd pod: %v\n", err) 187 } 188 } 189 require.NoError(t, err) 190 return client 191 } 192 193 func makeManifest(t *testing.T, backend assets.Backend, secrets map[string][]byte, opts *assets.AssetOpts) string { 194 manifest := &strings.Builder{} 195 jsonEncoder, err := serde.GetEncoder("json", manifest, serde.WithIndent(2), serde.WithOrigName(true)) 196 require.NoError(t, err) 197 198 // Create a wrapper encoder that rewrites the manifest so that we don't get 199 // collisions across namespaces and can run these tests in parallel. 200 encoder := &ManifestRewriter{Encoder: jsonEncoder} 201 202 // Use a separate hostpath on the kubernetes host for each deployment 203 hostPath := fmt.Sprintf("/var/pachyderm-%s", opts.Namespace) 204 err = assets.WriteAssets(encoder, opts, backend, assets.LocalBackend, 1, hostPath) 205 require.NoError(t, err) 206 207 err = assets.WriteSecret(encoder, secrets, opts) 208 require.NoError(t, err) 209 210 return manifest.String() 211 } 212 213 func withManifest(t *testing.T, backend assets.Backend, secrets map[string][]byte, callback func(namespace string, pachClient *client.APIClient)) { 214 namespaceName := tu.UniqueString("deployment-test-") 215 opts := &assets.AssetOpts{ 216 StorageOpts: assets.StorageOpts{ 217 UploadConcurrencyLimit: assets.DefaultUploadConcurrencyLimit, 218 PutFileConcurrencyLimit: assets.DefaultPutFileConcurrencyLimit, 219 }, 220 PachdShards: 16, 221 Version: "local", 222 LogLevel: "info", 223 Namespace: namespaceName, 224 RequireCriticalServersOnly: assets.DefaultRequireCriticalServersOnly, 225 WorkerServiceAccountName: assets.DefaultWorkerServiceAccountName, 226 NoDash: true, 227 LocalRoles: true, 228 } 229 230 manifest := makeManifest(t, backend, secrets, opts) 231 232 kubeClient := tu.GetKubeClient(t) 233 namespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespaceName}} 234 _, err := kubeClient.CoreV1().Namespaces().Create(namespace) 235 require.NoError(t, err) 236 237 if cleanup { 238 defer func() { 239 err := kubeClient.CoreV1().Namespaces().Delete(namespaceName, nil) 240 require.NoError(t, err) 241 }() 242 } 243 244 cmd := tu.Cmd("kubectl", "apply", "--namespace", namespaceName, "-f", "-") 245 cmd.Stdin = strings.NewReader(manifest) 246 err = cmd.Run() 247 require.NoError(t, err) 248 249 pachClient := getPachClient(t, kubeClient, namespaceName) 250 defer pachClient.Close() 251 252 callback(namespaceName, pachClient) 253 } 254 255 func runDeploymentTest(t *testing.T, pachClient *client.APIClient) { 256 // Create an input repo 257 dataRepo := "data" 258 require.NoError(t, pachClient.CreateRepo(dataRepo)) 259 260 // Upload some files 261 commit1, err := pachClient.StartCommit(dataRepo, "master") 262 require.NoError(t, err) 263 _, err = pachClient.PutFile(dataRepo, commit1.ID, "file", strings.NewReader("foo")) 264 require.NoError(t, err) 265 require.NoError(t, pachClient.FinishCommit(dataRepo, commit1.ID)) 266 267 // Create a pipeline 268 pipelineRepo := tu.UniqueString("pipeline") 269 _, err = pachClient.PpsAPIClient.CreatePipeline(context.Background(), &pps.CreatePipelineRequest{ 270 Pipeline: client.NewPipeline(pipelineRepo), 271 Transform: &pps.Transform{ 272 Image: "", 273 Cmd: []string{"bash"}, 274 Stdin: []string{ 275 fmt.Sprintf("cp /pfs/%s/* /pfs/out/", dataRepo), 276 }, 277 }, 278 ParallelismSpec: &pps.ParallelismSpec{ 279 Constant: 1, 280 }, 281 Input: client.NewPFSInput(dataRepo, "/*"), 282 OutputBranch: "", 283 Update: false, 284 ResourceRequests: &pps.ResourceSpec{Cpu: 0.0}, 285 ResourceLimits: &pps.ResourceSpec{Cpu: 0.0}, 286 SidecarResourceLimits: &pps.ResourceSpec{Cpu: 0.0}, 287 }) 288 require.NoError(t, err) 289 290 // Wait for the output commit 291 commitInfoIter, err := pachClient.FlushCommit([]*pfs.Commit{commit1}, nil) 292 require.NoError(t, err) 293 294 // Collect commit infos 295 var commitInfos []*pfs.CommitInfo 296 for { 297 commitInfo, err := commitInfoIter.Next() 298 if errors.Is(err, io.EOF) { 299 break 300 } 301 require.NoError(t, err) 302 commitInfos = append(commitInfos, commitInfo) 303 } 304 require.Equal(t, 1, len(commitInfos)) 305 306 // Check the pipeline output 307 var buf bytes.Buffer 308 require.NoError(t, pachClient.GetFile(commitInfos[0].Commit.Repo.Name, commitInfos[0].Commit.ID, "file", 0, 0, &buf)) 309 require.Equal(t, "foo", buf.String()) 310 } 311 312 func TestAmazonDeployment(t *testing.T) { 313 t.Parallel() 314 advancedConfig := &obj.AmazonAdvancedConfiguration{ 315 Retries: obj.DefaultRetries, 316 Timeout: obj.DefaultTimeout, 317 UploadACL: obj.DefaultUploadACL, 318 Reverse: obj.DefaultReverse, 319 PartSize: obj.DefaultPartSize, 320 MaxUploadParts: obj.DefaultMaxUploadParts, 321 DisableSSL: obj.DefaultDisableSSL, 322 NoVerifySSL: obj.DefaultNoVerifySSL, 323 LogOptions: obj.DefaultAwsLogOptions, 324 } 325 326 // Test the Amazon client against S3 327 t.Run("AmazonObjectStorage", func(t *testing.T) { 328 t.Parallel() 329 id, secret, bucket, region := LoadAmazonParameters(t) 330 secrets := assets.AmazonSecret(region, bucket, id, secret, "", "", "", advancedConfig) 331 withManifest(t, assets.AmazonBackend, secrets, func(namespace string, pachClient *client.APIClient) { 332 runDeploymentTest(t, pachClient) 333 }) 334 }) 335 336 // Test the Amazon client against ECS 337 t.Run("ECSObjectStorage", func(t *testing.T) { 338 t.Parallel() 339 id, secret, bucket, region, endpoint := LoadECSParameters(t) 340 secrets := assets.AmazonSecret(region, bucket, id, secret, "", "", endpoint, advancedConfig) 341 withManifest(t, assets.AmazonBackend, secrets, func(namespace string, pachClient *client.APIClient) { 342 runDeploymentTest(t, pachClient) 343 }) 344 }) 345 346 // Test the Amazon client against GCS 347 t.Run("GoogleObjectStorage", func(t *testing.T) { 348 t.Skip("Amazon client does not work against GCS currently, see client_test.go") 349 t.Parallel() 350 id, secret, bucket, region, endpoint := LoadGoogleHMACParameters(t) 351 secrets := assets.AmazonSecret(region, bucket, id, secret, "", "", endpoint, advancedConfig) 352 withManifest(t, assets.AmazonBackend, secrets, func(namespace string, pachClient *client.APIClient) { 353 runDeploymentTest(t, pachClient) 354 }) 355 }) 356 } 357 358 func TestMinioDeployment(t *testing.T) { 359 t.Parallel() 360 minioTests := func(t *testing.T, endpoint string, bucket string, id string, secret string) { 361 t.Run("S3v2", func(t *testing.T) { 362 t.Skip("Minio client running S3v2 does not handle empty writes properly on S3 and ECS") // (this works for GCS), try upgrading to v7? 363 t.Parallel() 364 secrets := assets.MinioSecret(bucket, id, secret, endpoint, true, true) 365 withManifest(t, assets.MinioBackend, secrets, func(namespace string, pachClient *client.APIClient) { 366 runDeploymentTest(t, pachClient) 367 }) 368 }) 369 370 t.Run("S3v4", func(t *testing.T) { 371 t.Parallel() 372 secrets := assets.MinioSecret(bucket, id, secret, endpoint, true, false) 373 withManifest(t, assets.MinioBackend, secrets, func(namespace string, pachClient *client.APIClient) { 374 runDeploymentTest(t, pachClient) 375 }) 376 }) 377 } 378 379 // Test the Minio client against S3 using the S3v2 and S3v4 APIs 380 t.Run("AmazonObjectStorage", func(t *testing.T) { 381 t.Parallel() 382 id, secret, bucket, region := LoadAmazonParameters(t) 383 endpoint := fmt.Sprintf("s3.%s.amazonaws.com", region) // Note that not all AWS regions support both http/https or both S3v2/S3v4 384 minioTests(t, endpoint, bucket, id, secret) 385 }) 386 387 // Test the Minio client against ECS using the S3v2 and S3v4 APIs 388 t.Run("ECSObjectStorage", func(t *testing.T) { 389 t.Parallel() 390 id, secret, bucket, _, endpoint := LoadECSParameters(t) 391 minioTests(t, endpoint, bucket, id, secret) 392 }) 393 394 // Test the Minio client against GCP using the S3v2 and S3v4 APIs 395 t.Run("GoogleObjectStorage", func(t *testing.T) { 396 t.Parallel() 397 id, secret, bucket, _, endpoint := LoadGoogleHMACParameters(t) 398 minioTests(t, endpoint, bucket, id, secret) 399 }) 400 } 401 402 func TestGoogleDeployment(t *testing.T) { 403 t.Parallel() 404 bucket, creds := LoadGoogleParameters(t) 405 secrets := assets.GoogleSecret(bucket, creds) 406 withManifest(t, assets.GoogleBackend, secrets, func(namespace string, pachClient *client.APIClient) { 407 runDeploymentTest(t, pachClient) 408 }) 409 } 410 411 func TestMicrosoftDeployment(t *testing.T) { 412 t.Parallel() 413 id, secret, container := LoadMicrosoftParameters(t) 414 secrets := assets.MicrosoftSecret(container, id, secret) 415 withManifest(t, assets.MicrosoftBackend, secrets, func(namespace string, pachClient *client.APIClient) { 416 runDeploymentTest(t, pachClient) 417 }) 418 } 419 420 func TestLocalDeployment(t *testing.T) { 421 t.Parallel() 422 secrets := assets.LocalSecret() 423 withManifest(t, assets.LocalBackend, secrets, func(namespace string, pachClient *client.APIClient) { 424 runDeploymentTest(t, pachClient) 425 }) 426 }