github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/obj/testing/deployment_test.go (about)

     1  package testing
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"strings"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/gogo/protobuf/proto"
    14  	v1 "k8s.io/api/core/v1"
    15  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    16  	kube "k8s.io/client-go/kubernetes"
    17  
    18  	"github.com/pachyderm/pachyderm/src/client"
    19  	"github.com/pachyderm/pachyderm/src/client/pfs"
    20  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
    21  	"github.com/pachyderm/pachyderm/src/client/pkg/require"
    22  	"github.com/pachyderm/pachyderm/src/client/pps"
    23  	"github.com/pachyderm/pachyderm/src/server/pkg/deploy/assets"
    24  	"github.com/pachyderm/pachyderm/src/server/pkg/obj"
    25  	"github.com/pachyderm/pachyderm/src/server/pkg/serde"
    26  	tu "github.com/pachyderm/pachyderm/src/server/pkg/testutil"
    27  )
    28  
    29  // This test suite works by spinning up separate pachd deployments in a new
    30  // namespace for each configuration. There are several important bits to make
    31  // sure these are parallelizable, that the manifests don't step on each other's
    32  // toes. Once the deployment is up-and-running, we run a simple pipeline test to
    33  // ensure that we can round-trip data to object storage in both the worker and
    34  // in pachd. For testing specific corner-cases, consider modifying the client
    35  // test suite in this same package.
    36  
    37  // NOTE: these tests require object storage credentials to be loaded in your
    38  // environment (see util.go for where they are loaded).
    39  
    40  // Change this to false to keep kubernetes namespaces around after the test for
    41  // debugging purposes.
    42  const cleanup = true
    43  
    44  // Rewrites kubernetes manifest services to auto-allocate external ports and
    45  // reduce cpu resource requests for parallel testing.
    46  type ManifestRewriter struct {
    47  	serde.Encoder
    48  }
    49  
    50  func rewriterCallback(innerCb func(map[string]interface{}) error) func(map[string]interface{}) error {
    51  	return func(data map[string]interface{}) error {
    52  		var err error
    53  		if innerCb != nil {
    54  			err = innerCb(data)
    55  		}
    56  		rewriteManifest(data)
    57  		return err
    58  	}
    59  }
    60  
    61  func rewriteManifest(data map[string]interface{}) {
    62  	if data["kind"] == "Service" {
    63  		spec := data["spec"].(map[string]interface{})
    64  		if spec["type"] == "NodePort" {
    65  			ports := spec["ports"].([]interface{})
    66  			for _, port := range ports {
    67  				port := port.(map[string]interface{})
    68  				if _, ok := port["nodePort"]; ok {
    69  					port["nodePort"] = 0
    70  				}
    71  			}
    72  		}
    73  	}
    74  
    75  	if data["kind"] == "Deployment" {
    76  		if spec, ok := data["spec"]; ok {
    77  			spec := spec.(map[string]interface{})
    78  			if template, ok := spec["template"]; ok {
    79  				template := template.(map[string]interface{})
    80  				if spec, ok := template["spec"]; ok {
    81  					spec := spec.(map[string]interface{})
    82  					if containers, ok := spec["containers"]; ok {
    83  						containers := containers.([]interface{})
    84  						for _, container := range containers {
    85  							container := container.(map[string]interface{})
    86  							if resources, ok := container["resources"]; ok {
    87  								resources := resources.(map[string]interface{})
    88  								if limits, ok := resources["limits"]; ok {
    89  									limits := limits.(map[string]interface{})
    90  									if _, ok := limits["cpu"]; ok {
    91  										limits["cpu"] = "0"
    92  									}
    93  								}
    94  								if requests, ok := resources["requests"]; ok {
    95  									requests := requests.(map[string]interface{})
    96  									if _, ok := requests["cpu"]; ok {
    97  										requests["cpu"] = "0"
    98  									}
    99  								}
   100  							}
   101  						}
   102  					}
   103  				}
   104  			}
   105  		}
   106  	}
   107  }
   108  
   109  func (npr *ManifestRewriter) Encode(v interface{}) error {
   110  	return npr.EncodeTransform(v, nil)
   111  }
   112  
   113  func (npr *ManifestRewriter) EncodeProto(m proto.Message) error {
   114  	return npr.EncodeProtoTransform(m, nil)
   115  }
   116  
   117  func (npr *ManifestRewriter) EncodeTransform(v interface{}, cb func(map[string]interface{}) error) error {
   118  	return npr.Encoder.EncodeTransform(v, rewriterCallback(cb))
   119  }
   120  
   121  func (npr *ManifestRewriter) EncodeProtoTransform(m proto.Message, cb func(map[string]interface{}) error) error {
   122  	return npr.Encoder.EncodeProtoTransform(m, rewriterCallback(cb))
   123  }
   124  
   125  func getPachClient(t *testing.T, kubeClient *kube.Clientset, namespace string) *client.APIClient {
   126  	// Get the pachd service from kubernetes
   127  	pachd, err := kubeClient.CoreV1().Services(namespace).Get("pachd", metav1.GetOptions{})
   128  	require.NoError(t, err)
   129  
   130  	var port int32
   131  	for _, servicePort := range pachd.Spec.Ports {
   132  		if servicePort.Name == "api-grpc-port" {
   133  			port = servicePort.NodePort
   134  		}
   135  	}
   136  	require.NotEqual(t, 0, port)
   137  
   138  	// Get the IP address of the nodes (any _should_ work for the service port)
   139  	nodes, err := kubeClient.CoreV1().Nodes().List(metav1.ListOptions{})
   140  	require.NoError(t, err)
   141  
   142  	// Minikube 'Hostname' address type didn't work when testing, use InternalIP
   143  	var address string
   144  	for _, addr := range nodes.Items[0].Status.Addresses {
   145  		if addr.Type == "InternalIP" {
   146  			address = addr.Address
   147  		}
   148  	}
   149  	require.NotEqual(t, "", address)
   150  
   151  	// Connect to pachd
   152  	tu.WaitForPachdReady(t, namespace)
   153  	client, err := client.NewFromAddress(fmt.Sprintf("%s:%d", address, port), client.WithDialTimeout(100*time.Second))
   154  
   155  	// Some debugging info in case connecting fails - this will dump the pachd
   156  	// logs in case something went wrong there. In my experience, this has been
   157  	// due to either problems with credentials to object storage (will also fail
   158  	// in client_test.go), or insufficient timeout due to slow CI machines.
   159  	if err != nil {
   160  		fmt.Printf("Failed to connect to pachd: %v\n", err)
   161  		fmt.Printf("Used host:port: %s:%d\n", address, port)
   162  		fmt.Printf("All nodes addresses:\n")
   163  		for i, node := range nodes.Items {
   164  			fmt.Printf(" [%d]: %v\n", i, node.Status.Addresses)
   165  		}
   166  		pods, err := kubeClient.CoreV1().Pods(namespace).List(metav1.ListOptions{
   167  			LabelSelector: "app=pachd",
   168  		})
   169  		if err == nil {
   170  			if len(pods.Items) != 1 {
   171  				fmt.Printf("Got wrong number of pods, expected %d but found %d\n", 1, len(pods.Items))
   172  			} else {
   173  				stream, err := kubeClient.CoreV1().Pods(namespace).GetLogs(
   174  					pods.Items[0].ObjectMeta.Name,
   175  					&v1.PodLogOptions{},
   176  				).Stream()
   177  				if err == nil {
   178  					defer stream.Close()
   179  					fmt.Printf("Pod logs:\n")
   180  					io.Copy(os.Stdout, stream)
   181  				} else {
   182  					fmt.Printf("Failed to get pod logs: %v\n", err)
   183  				}
   184  			}
   185  		} else {
   186  			fmt.Printf("Failed to find pachd pod: %v\n", err)
   187  		}
   188  	}
   189  	require.NoError(t, err)
   190  	return client
   191  }
   192  
   193  func makeManifest(t *testing.T, backend assets.Backend, secrets map[string][]byte, opts *assets.AssetOpts) string {
   194  	manifest := &strings.Builder{}
   195  	jsonEncoder, err := serde.GetEncoder("json", manifest, serde.WithIndent(2), serde.WithOrigName(true))
   196  	require.NoError(t, err)
   197  
   198  	// Create a wrapper encoder that rewrites the manifest so that we don't get
   199  	// collisions across namespaces and can run these tests in parallel.
   200  	encoder := &ManifestRewriter{Encoder: jsonEncoder}
   201  
   202  	// Use a separate hostpath on the kubernetes host for each deployment
   203  	hostPath := fmt.Sprintf("/var/pachyderm-%s", opts.Namespace)
   204  	err = assets.WriteAssets(encoder, opts, backend, assets.LocalBackend, 1, hostPath)
   205  	require.NoError(t, err)
   206  
   207  	err = assets.WriteSecret(encoder, secrets, opts)
   208  	require.NoError(t, err)
   209  
   210  	return manifest.String()
   211  }
   212  
   213  func withManifest(t *testing.T, backend assets.Backend, secrets map[string][]byte, callback func(namespace string, pachClient *client.APIClient)) {
   214  	namespaceName := tu.UniqueString("deployment-test-")
   215  	opts := &assets.AssetOpts{
   216  		StorageOpts: assets.StorageOpts{
   217  			UploadConcurrencyLimit:  assets.DefaultUploadConcurrencyLimit,
   218  			PutFileConcurrencyLimit: assets.DefaultPutFileConcurrencyLimit,
   219  		},
   220  		PachdShards:                16,
   221  		Version:                    "local",
   222  		LogLevel:                   "info",
   223  		Namespace:                  namespaceName,
   224  		RequireCriticalServersOnly: assets.DefaultRequireCriticalServersOnly,
   225  		WorkerServiceAccountName:   assets.DefaultWorkerServiceAccountName,
   226  		NoDash:                     true,
   227  		LocalRoles:                 true,
   228  	}
   229  
   230  	manifest := makeManifest(t, backend, secrets, opts)
   231  
   232  	kubeClient := tu.GetKubeClient(t)
   233  	namespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespaceName}}
   234  	_, err := kubeClient.CoreV1().Namespaces().Create(namespace)
   235  	require.NoError(t, err)
   236  
   237  	if cleanup {
   238  		defer func() {
   239  			err := kubeClient.CoreV1().Namespaces().Delete(namespaceName, nil)
   240  			require.NoError(t, err)
   241  		}()
   242  	}
   243  
   244  	cmd := tu.Cmd("kubectl", "apply", "--namespace", namespaceName, "-f", "-")
   245  	cmd.Stdin = strings.NewReader(manifest)
   246  	err = cmd.Run()
   247  	require.NoError(t, err)
   248  
   249  	pachClient := getPachClient(t, kubeClient, namespaceName)
   250  	defer pachClient.Close()
   251  
   252  	callback(namespaceName, pachClient)
   253  }
   254  
   255  func runDeploymentTest(t *testing.T, pachClient *client.APIClient) {
   256  	// Create an input repo
   257  	dataRepo := "data"
   258  	require.NoError(t, pachClient.CreateRepo(dataRepo))
   259  
   260  	// Upload some files
   261  	commit1, err := pachClient.StartCommit(dataRepo, "master")
   262  	require.NoError(t, err)
   263  	_, err = pachClient.PutFile(dataRepo, commit1.ID, "file", strings.NewReader("foo"))
   264  	require.NoError(t, err)
   265  	require.NoError(t, pachClient.FinishCommit(dataRepo, commit1.ID))
   266  
   267  	// Create a pipeline
   268  	pipelineRepo := tu.UniqueString("pipeline")
   269  	_, err = pachClient.PpsAPIClient.CreatePipeline(context.Background(), &pps.CreatePipelineRequest{
   270  		Pipeline: client.NewPipeline(pipelineRepo),
   271  		Transform: &pps.Transform{
   272  			Image: "",
   273  			Cmd:   []string{"bash"},
   274  			Stdin: []string{
   275  				fmt.Sprintf("cp /pfs/%s/* /pfs/out/", dataRepo),
   276  			},
   277  		},
   278  		ParallelismSpec: &pps.ParallelismSpec{
   279  			Constant: 1,
   280  		},
   281  		Input:                 client.NewPFSInput(dataRepo, "/*"),
   282  		OutputBranch:          "",
   283  		Update:                false,
   284  		ResourceRequests:      &pps.ResourceSpec{Cpu: 0.0},
   285  		ResourceLimits:        &pps.ResourceSpec{Cpu: 0.0},
   286  		SidecarResourceLimits: &pps.ResourceSpec{Cpu: 0.0},
   287  	})
   288  	require.NoError(t, err)
   289  
   290  	// Wait for the output commit
   291  	commitInfoIter, err := pachClient.FlushCommit([]*pfs.Commit{commit1}, nil)
   292  	require.NoError(t, err)
   293  
   294  	// Collect commit infos
   295  	var commitInfos []*pfs.CommitInfo
   296  	for {
   297  		commitInfo, err := commitInfoIter.Next()
   298  		if errors.Is(err, io.EOF) {
   299  			break
   300  		}
   301  		require.NoError(t, err)
   302  		commitInfos = append(commitInfos, commitInfo)
   303  	}
   304  	require.Equal(t, 1, len(commitInfos))
   305  
   306  	// Check the pipeline output
   307  	var buf bytes.Buffer
   308  	require.NoError(t, pachClient.GetFile(commitInfos[0].Commit.Repo.Name, commitInfos[0].Commit.ID, "file", 0, 0, &buf))
   309  	require.Equal(t, "foo", buf.String())
   310  }
   311  
   312  func TestAmazonDeployment(t *testing.T) {
   313  	t.Parallel()
   314  	advancedConfig := &obj.AmazonAdvancedConfiguration{
   315  		Retries:        obj.DefaultRetries,
   316  		Timeout:        obj.DefaultTimeout,
   317  		UploadACL:      obj.DefaultUploadACL,
   318  		Reverse:        obj.DefaultReverse,
   319  		PartSize:       obj.DefaultPartSize,
   320  		MaxUploadParts: obj.DefaultMaxUploadParts,
   321  		DisableSSL:     obj.DefaultDisableSSL,
   322  		NoVerifySSL:    obj.DefaultNoVerifySSL,
   323  		LogOptions:     obj.DefaultAwsLogOptions,
   324  	}
   325  
   326  	// Test the Amazon client against S3
   327  	t.Run("AmazonObjectStorage", func(t *testing.T) {
   328  		t.Parallel()
   329  		id, secret, bucket, region := LoadAmazonParameters(t)
   330  		secrets := assets.AmazonSecret(region, bucket, id, secret, "", "", "", advancedConfig)
   331  		withManifest(t, assets.AmazonBackend, secrets, func(namespace string, pachClient *client.APIClient) {
   332  			runDeploymentTest(t, pachClient)
   333  		})
   334  	})
   335  
   336  	// Test the Amazon client against ECS
   337  	t.Run("ECSObjectStorage", func(t *testing.T) {
   338  		t.Parallel()
   339  		id, secret, bucket, region, endpoint := LoadECSParameters(t)
   340  		secrets := assets.AmazonSecret(region, bucket, id, secret, "", "", endpoint, advancedConfig)
   341  		withManifest(t, assets.AmazonBackend, secrets, func(namespace string, pachClient *client.APIClient) {
   342  			runDeploymentTest(t, pachClient)
   343  		})
   344  	})
   345  
   346  	// Test the Amazon client against GCS
   347  	t.Run("GoogleObjectStorage", func(t *testing.T) {
   348  		t.Skip("Amazon client does not work against GCS currently, see client_test.go")
   349  		t.Parallel()
   350  		id, secret, bucket, region, endpoint := LoadGoogleHMACParameters(t)
   351  		secrets := assets.AmazonSecret(region, bucket, id, secret, "", "", endpoint, advancedConfig)
   352  		withManifest(t, assets.AmazonBackend, secrets, func(namespace string, pachClient *client.APIClient) {
   353  			runDeploymentTest(t, pachClient)
   354  		})
   355  	})
   356  }
   357  
   358  func TestMinioDeployment(t *testing.T) {
   359  	t.Parallel()
   360  	minioTests := func(t *testing.T, endpoint string, bucket string, id string, secret string) {
   361  		t.Run("S3v2", func(t *testing.T) {
   362  			t.Skip("Minio client running S3v2 does not handle empty writes properly on S3 and ECS") // (this works for GCS), try upgrading to v7?
   363  			t.Parallel()
   364  			secrets := assets.MinioSecret(bucket, id, secret, endpoint, true, true)
   365  			withManifest(t, assets.MinioBackend, secrets, func(namespace string, pachClient *client.APIClient) {
   366  				runDeploymentTest(t, pachClient)
   367  			})
   368  		})
   369  
   370  		t.Run("S3v4", func(t *testing.T) {
   371  			t.Parallel()
   372  			secrets := assets.MinioSecret(bucket, id, secret, endpoint, true, false)
   373  			withManifest(t, assets.MinioBackend, secrets, func(namespace string, pachClient *client.APIClient) {
   374  				runDeploymentTest(t, pachClient)
   375  			})
   376  		})
   377  	}
   378  
   379  	// Test the Minio client against S3 using the S3v2 and S3v4 APIs
   380  	t.Run("AmazonObjectStorage", func(t *testing.T) {
   381  		t.Parallel()
   382  		id, secret, bucket, region := LoadAmazonParameters(t)
   383  		endpoint := fmt.Sprintf("s3.%s.amazonaws.com", region) // Note that not all AWS regions support both http/https or both S3v2/S3v4
   384  		minioTests(t, endpoint, bucket, id, secret)
   385  	})
   386  
   387  	// Test the Minio client against ECS using the S3v2 and S3v4 APIs
   388  	t.Run("ECSObjectStorage", func(t *testing.T) {
   389  		t.Parallel()
   390  		id, secret, bucket, _, endpoint := LoadECSParameters(t)
   391  		minioTests(t, endpoint, bucket, id, secret)
   392  	})
   393  
   394  	// Test the Minio client against GCP using the S3v2 and S3v4 APIs
   395  	t.Run("GoogleObjectStorage", func(t *testing.T) {
   396  		t.Parallel()
   397  		id, secret, bucket, _, endpoint := LoadGoogleHMACParameters(t)
   398  		minioTests(t, endpoint, bucket, id, secret)
   399  	})
   400  }
   401  
   402  func TestGoogleDeployment(t *testing.T) {
   403  	t.Parallel()
   404  	bucket, creds := LoadGoogleParameters(t)
   405  	secrets := assets.GoogleSecret(bucket, creds)
   406  	withManifest(t, assets.GoogleBackend, secrets, func(namespace string, pachClient *client.APIClient) {
   407  		runDeploymentTest(t, pachClient)
   408  	})
   409  }
   410  
   411  func TestMicrosoftDeployment(t *testing.T) {
   412  	t.Parallel()
   413  	id, secret, container := LoadMicrosoftParameters(t)
   414  	secrets := assets.MicrosoftSecret(container, id, secret)
   415  	withManifest(t, assets.MicrosoftBackend, secrets, func(namespace string, pachClient *client.APIClient) {
   416  		runDeploymentTest(t, pachClient)
   417  	})
   418  }
   419  
   420  func TestLocalDeployment(t *testing.T) {
   421  	t.Parallel()
   422  	secrets := assets.LocalSecret()
   423  	withManifest(t, assets.LocalBackend, secrets, func(namespace string, pachClient *client.APIClient) {
   424  		runDeploymentTest(t, pachClient)
   425  	})
   426  }