github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/.github/workflows/dm_chaos.yaml (about)

     1  name: DM Chaos
     2  
     3  on:
     4    schedule:
     5      - cron: '0 17-23 * * *' # run at minute 0 every hour from 01:00 ~ 07:00 UTC+8
     6    workflow_dispatch:
     7      inputs:
     8        pr:
     9          description: 'Which PR do you want to trigger'
    10          required: true
    11          default: ''
    12  
    13  # See: https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#concurrency.
    14  concurrency:
    15    group: ${{ github.ref }}-${{ github.workflow }}
    16    cancel-in-progress: true
    17  
    18  # A workflow run is made up of one or more jobs that can run sequentially or in parallel
    19  jobs:
    20    # This workflow contains a single job called "base"
    21    base:
    22      # The type of runner that the job will run on
    23      runs-on: ubuntu-20.04
    24      timeout-minutes: 50
    25      strategy:
    26        fail-fast: false
    27        matrix:
    28          chaos-obj:
    29            [
    30              "pod-failure-dm",
    31              "pod-kill-dm",
    32              "network-partition-dm",
    33              "network-emulation-dm",
    34              "io-chaos-dm",
    35            ]
    36  
    37      # Steps represent a sequence of tasks that will be executed as part of the job
    38      steps:
    39        # Set up Go for building DM
    40        - name: Set up Go env
    41          uses: actions/setup-go@v3
    42          with:
    43            go-version: '1.21'
    44        - name: Print Go version
    45          run: go version
    46  
    47        # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
    48        - name: Check out code
    49          uses: actions/checkout@v2
    50  
    51        - name: Check out code by workflow dispatch
    52          if: ${{ github.event.inputs.pr != '' }}
    53          uses: actions/checkout@v2
    54          with:
    55            ref: refs/pull/${{ github.event.inputs.pr }}/head
    56  
    57        - name: Cache go modules
    58          uses: actions/cache@v2
    59          with:
    60            path: ~/go/pkg/mod
    61            key: ${{ runner.os }}-ticdc-${{ hashFiles('go.sum') }}
    62  
    63        - name: Cache Tools
    64          id: cache-tools
    65          uses: actions/cache@v2
    66          with:
    67            path: tools/bin
    68            key: ${{ runner.os }}-ticdc-tools-${{ hashFiles('tools/check/go.sum') }}
    69  
    70        - name: Create k8s Kind Cluster
    71          uses: helm/kind-action@v1.4.0
    72  
    73        - name: Print cluster information
    74          run: |
    75            kubectl config view
    76            kubectl cluster-info
    77            kubectl get nodes
    78            kubectl get pods -n kube-system
    79            kubectl get sc
    80            kubectl version
    81            helm version
    82  
    83        # Disable AppArmor for MySQL, see https://github.com/moby/moby/issues/7512#issuecomment-61787845
    84        - name: Disable AppArmor for MySQL
    85          run: |
    86            sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/
    87            sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld
    88  
    89        - name: Build DM binary
    90          run: make dm-master dm-worker dmctl dm-chaos-case
    91  
    92        # NOTE: we also copy config files into `bin` directory,
    93        # so we only need to send `bin` as the context into docker daemon when building image.
    94        - name: Build DM docker image
    95          run: |
    96            cp -r $GITHUB_WORKSPACE/dm/chaos/cases/conf/ $GITHUB_WORKSPACE/bin/
    97            docker build -f $GITHUB_WORKSPACE/dm/chaos/manifests/Dockerfile -t dm:chaos $GITHUB_WORKSPACE/bin
    98            docker image list
    99  
   100        # Load DM docker image into KIND, see https://kind.sigs.k8s.io/docs/user/quick-start/#loading-an-image-into-your-cluster
   101        - name: Load DM docker image into KIND
   102          run: |
   103            kind load docker-image dm:chaos --name chart-testing
   104  
   105        # Set up upstream instances
   106        - name: Set up sources
   107          run: |
   108            kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/sources.yaml
   109            kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/sources.yaml
   110            kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/sources.yaml
   111        - name: Wait for sources ready # kubectl wait --all not working
   112          run: |
   113            kubectl wait --for=condition=Ready pod/mysql57-0 --timeout=300s || true
   114            kubectl wait --for=condition=Ready pod/mysql8-0 --timeout=300s || true
   115            kubectl wait --for=condition=Ready pod/mariadb-0 --timeout=300s || true
   116            sleep 10
   117            echo show pvc
   118            kubectl get pvc -l app=sources -o wide
   119            echo show pv
   120            kubectl get pv -o wide
   121            echo show svc
   122            kubectl get svc -l app=sources -o wide
   123            echo show sts
   124            kubectl get sts -l app=sources -o wide
   125            echo show po
   126            kubectl get po -l app=sources -o wide
   127            echo describe po
   128            kubectl describe po -l app=sources
   129            echo describe pvc
   130            kubectl describe pvc -l app=sources
   131            kubectl wait --for=condition=Ready pod/mysql57-0 --timeout=0s
   132            kubectl wait --for=condition=Ready pod/mysql8-0 --timeout=0s
   133            kubectl wait --for=condition=Ready pod/mariadb-0 --timeout=0s
   134  
   135        # Set up downstream TiDB instance (deploy a TiDB with mockTiKV, not a TidbCluster managed by TiDB-operator)
   136        - name: Set up TiDB
   137          run: |
   138            kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/tidb.yaml
   139            kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/tidb.yaml
   140            kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/tidb.yaml
   141        - name: Wait for TiDB ready
   142          run: |
   143            kubectl wait --for=condition=Ready pod/tidb-0 --timeout=300s || true
   144            echo show pvc
   145            kubectl get pvc -l app=tidb -o wide
   146            echo show pv
   147            kubectl get pv -o wide
   148            echo show svc
   149            kubectl get svc -l app=tidb -o wide
   150            echo show sts
   151            kubectl get sts -l app=tidb -o wide
   152            echo show po
   153            kubectl get po -l app=tidb -o wide
   154            echo describe po
   155            kubectl describe po -l app=tidb
   156            echo describe pvc
   157            kubectl describe pvc -l app=tidb
   158            kubectl wait --for=condition=Ready pod/tidb-0 --timeout=0s
   159  
   160        - name: Set up DM-master
   161          run: |
   162            kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-master.yaml
   163            kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-master.yaml
   164            kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-master.yaml
   165        # NOTE: even some DM-master instances are not ready, we still continue and let chaos test cases to check again.
   166        - name: Wait for DM-master ready
   167          run: |
   168            sleep 10
   169            kubectl wait --for=condition=Ready pod -l app=dm-master --all --timeout=300s || true
   170            echo "<<<<< show pvc >>>>>"
   171            kubectl get pvc -l app=dm-master -o wide
   172            echo "<<<<< show pv >>>>>"
   173            kubectl get pv -o wide
   174            echo "<<<<< show svc >>>>>"
   175            kubectl get svc -l app=dm-master -o wide
   176            echo "<<<<< show sts >>>>>"
   177            kubectl get sts -l app=dm-master -o wide
   178            echo "<<<<< show po >>>>>"
   179            kubectl get po -l app=dm-master -o wide
   180            echo "<<<<< describe po >>>>>"
   181            kubectl describe po -l app=dm-master
   182            echo "<<<<< describe pvc >>>>>"
   183            kubectl describe pvc -l app=dm-master
   184            echo "<<<<< show current log for dm-master-0 >>>>>"
   185            kubectl logs dm-master-0 || true
   186            echo "<<<<< show previous log for dm-master-0 >>>>>"
   187            kubectl logs dm-master-0 -p || true
   188            echo "<<<<< show current log for dm-master-1 >>>>>"
   189            kubectl logs dm-master-1 || true
   190            echo "<<<<< show previous log for dm-master-1 >>>>>"
   191            kubectl logs dm-master-1 -p || true
   192            echo "<<<<< show current log for dm-master-2 >>>>>"
   193            kubectl logs dm-master-2 || true
   194            echo "<<<<< show previous log for dm-master-2 >>>>>"
   195            kubectl logs dm-master-2 -p || true
   196  
   197        - name: Set up DM-worker
   198          run: |
   199            kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-worker.yaml
   200            kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-worker.yaml
   201            kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-worker.yaml
   202        # NOTE: even some DM-worker instances are not ready, we still continue and let chaos test cases to check again.
   203        - name: Wait for DM-worker ready
   204          run: |
   205            sleep 10
   206            kubectl wait --for=condition=Ready pod -l app=dm-worker --all --timeout=300s || true
   207            echo "<<<<< show pvc >>>>>"
   208            kubectl get pvc -l app=dm-worker -o wide
   209            echo "<<<<< show pv >>>>>"
   210            kubectl get pv -o wide
   211            echo "<<<<< show svc >>>>>"
   212            kubectl get svc -l app=dm-worker -o wide
   213            echo "<<<<< show sts >>>>>"
   214            kubectl get sts -l app=dm-worker -o wide
   215            echo "<<<<< show po >>>>>"
   216            kubectl get po -l app=dm-worker -o wide
   217            echo "<<<<< describe po >>>>>"
   218            kubectl describe po -l app=dm-worker
   219            echo "<<<<< describe pvc >>>>>"
   220            kubectl describe pvc -l app=dm-worker
   221            echo "<<<<< show current log for dm-worker-0 >>>>>"
   222            kubectl logs dm-worker-0 || true
   223            echo "<<<<< show previous log for dm-worker-0 >>>>>"
   224            kubectl logs dm-worker-0 -p || true
   225            echo "<<<<< show current log for dm-worker-1 >>>>>"
   226            kubectl logs dm-worker-1 || true
   227            echo "<<<<< show previous log for worker-master-1 >>>>>"
   228            kubectl logs dm-worker-1 -p || true
   229            echo "<<<<< show current log for dm-worker-2 >>>>>"
   230            kubectl logs dm-worker-2 || true
   231            echo "<<<<< show previous log for dm-worker-2 >>>>>"
   232            kubectl logs dm-worker-2 -p || true
   233  
   234        # NOTE: we sleep a while when check members ready in cases before applying any chaos operations.
   235        - name: Set up chaos test cases
   236          run: |
   237            kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/cases.yaml
   238            kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/cases.yaml
   239            kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/cases.yaml
   240            sleep 60
   241  
   242        - name: Encode chaos-mesh action
   243          run: |
   244            echo CFG_BASE64=$(base64 -w 0 $GITHUB_WORKSPACE/dm/chaos/manifests/${{ matrix.chaos-obj }}.yaml) >> $GITHUB_ENV
   245  
   246        - name: Run chaos mesh action
   247          uses: chaos-mesh/chaos-mesh-action@master
   248          env:
   249            CFG_BASE64: ${{ env.CFG_BASE64 }}
   250  
   251        # check whether complete with 1m * 20 times.
   252        - name: Wait for chaos test case complete
   253          run: |
   254            $GITHUB_WORKSPACE/dm/chaos/scripts/check-case.sh
   255  
   256        - name: Copy logs to hack permission
   257          if: ${{ always() }}
   258          run: |
   259            mkdir ./logs
   260            kubectl get pods --no-headers -o custom-columns=":metadata.name"|grep -E "dm-"|xargs -I{} sudo kubectl cp {}:/log/{}.log ./logs/{}.log || true
   261            kind export logs ./logs/kind --name chart-testing
   262            sudo chown -R runner ./logs
   263        # Update logs as artifact seems not stable, so we set `continue-on-error: true` here.
   264        - name: Upload logs
   265          continue-on-error: true
   266          uses: actions/upload-artifact@v2
   267          if: ${{ always() }}
   268          with:
   269            name: chaos-base-logs.${{ matrix.chaos-obj }}
   270            path: |
   271              ./logs
   272  
   273        # send Slack notify if failed.
   274        # NOTE: With the exception of `GITHUB_TOKEN`, secrets are not passed to the runner when a workflow is triggered from a forked repository.
   275        - name: Slack notification
   276          if: ${{ failure() }}
   277          env:
   278            SLACK_WEBHOOK: ${{ secrets.SLACK_NOTIFY }}
   279          uses: Ilshidur/action-slack@2.1.0
   280          with:
   281            args: "chaos job failed, see https://github.com/pingcap/tiflow/actions/runs/{{ GITHUB_RUN_ID }}"