github.com/kubeflow/training-operator@v1.7.0/.github/workflows/integration-tests.yaml (about) 1 name: integration test 2 on: 3 - pull_request 4 5 concurrency: 6 group: ${{ github.workflow }}-${{ github.ref }} 7 cancel-in-progress: true 8 9 jobs: 10 integration-test: 11 runs-on: ubuntu-latest 12 13 # Almost similar to the following: 14 # 15 # ```yaml 16 # strategy: 17 # fail-fast: false 18 # matrix: 19 # kubernetes-version: [""v1.25.11", "v1.26.6", "v1.27.3"] 20 # gang-scheduler-name: ["none", "scheduler-plugins", "volcano"] 21 # ``` 22 # The difference is that each combination is randomly assigned various Python versions 23 # to verify Python SDK operations. 24 strategy: 25 fail-fast: false 26 matrix: 27 # TODO (tenzen-y): Add volcano. 28 include: 29 - kubernetes-version: v1.26.6 30 gang-scheduler-name: "none" 31 python-version: "3.10" 32 - kubernetes-version: v1.27.3 33 gang-scheduler-name: "none" 34 python-version: "3.7" 35 - kubernetes-version: v1.25.11 36 gang-scheduler-name: "none" 37 python-version: "3.8" 38 - kubernetes-version: v1.26.6 39 gang-scheduler-name: "scheduler-plugins" 40 python-version: "3.9" 41 - kubernetes-version: v1.27.3 42 gang-scheduler-name: "scheduler-plugins" 43 python-version: "3.10" 44 - kubernetes-version: v1.25.11 45 gang-scheduler-name: "scheduler-plugins" 46 python-version: "3.10" 47 - kubernetes-version: v1.26.6 48 gang-scheduler-name: "volcano" 49 python-version: "3.9" 50 - kubernetes-version: v1.27.3 51 gang-scheduler-name: "volcano" 52 python-version: "3.10" 53 - kubernetes-version: v1.25.11 54 gang-scheduler-name: "volcano" 55 python-version: "3.10" 56 57 steps: 58 - name: Checkout 59 uses: actions/checkout@v3 60 61 - name: Setup Python 62 uses: actions/setup-python@v4 63 with: 64 python-version: ${{ matrix.python-version }} 65 66 - name: Setup Go 67 uses: actions/setup-go@v3 68 with: 69 go-version-file: go.mod 70 71 - name: Create k8s Kind Cluster 72 uses: helm/kind-action@v1.3.0 73 with: 74 node_image: kindest/node:${{ matrix.kubernetes-version }} 75 cluster_name: training-operator-cluster 76 kubectl_version: ${{ matrix.kubernetes-version }} 77 78 - name: Build training-operator 79 run: | 80 ./scripts/gha/build-image.sh 81 env: 82 TRAINING_CI_IMAGE: kubeflowtraining/training-operator:test 83 84 - name: Deploy training operator 85 run: | 86 ./scripts/gha/setup-training-operator.sh 87 env: 88 KIND_CLUSTER: training-operator-cluster 89 TRAINING_CI_IMAGE: kubeflowtraining/training-operator:test 90 GANG_SCHEDULER_NAME: ${{ matrix.gang-scheduler-name }} 91 KUBERNETES_VERSION: ${{ matrix.kubernetes-version }} 92 93 - name: Run tests 94 run: | 95 pip install pytest 96 python3 -m pip install -e sdk/python; pytest sdk/python/test --log-cli-level=info --namespace=default 97 env: 98 GANG_SCHEDULER_NAME: ${{ matrix.gang-scheduler-name }} 99 100 - name: Collect volcano logs 101 if: ${{ failure() && matrix.gang-scheduler-name == 'volcano' }} 102 run: | 103 echo "dump volcano-scheduler logs..." 104 kubectl logs -n volcano-system -l app=volcano-scheduler --tail=-1 105 echo "dump volcano-admission logs..." 106 kubectl logs -n volcano-system -l app=volcano-admission --tail=-1 107 echo "dump volcano-controllers logs..." 108 kubectl logs -n volcano-system -l app=volcano-controller --tail=-1 109 echo "dump podgroups description..." 110 kubectl describe podgroups.scheduling.volcano.sh -A