github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/.github/workflows/dm_chaos.yaml (about) 1 name: DM Chaos 2 3 on: 4 schedule: 5 - cron: '0 17-23 * * *' # run at minute 0 every hour from 01:00 ~ 07:00 UTC+8 6 workflow_dispatch: 7 inputs: 8 pr: 9 description: 'Which PR do you want to trigger' 10 required: true 11 default: '' 12 13 # See: https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#concurrency. 14 concurrency: 15 group: ${{ github.ref }}-${{ github.workflow }} 16 cancel-in-progress: true 17 18 # A workflow run is made up of one or more jobs that can run sequentially or in parallel 19 jobs: 20 # This workflow contains a single job called "base" 21 base: 22 # The type of runner that the job will run on 23 runs-on: ubuntu-20.04 24 timeout-minutes: 50 25 strategy: 26 fail-fast: false 27 matrix: 28 chaos-obj: 29 [ 30 "pod-failure-dm", 31 "pod-kill-dm", 32 "network-partition-dm", 33 "network-emulation-dm", 34 "io-chaos-dm", 35 ] 36 37 # Steps represent a sequence of tasks that will be executed as part of the job 38 steps: 39 # Set up Go for building DM 40 - name: Set up Go env 41 uses: actions/setup-go@v3 42 with: 43 go-version: '1.21' 44 - name: Print Go version 45 run: go version 46 47 # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 48 - name: Check out code 49 uses: actions/checkout@v2 50 51 - name: Check out code by workflow dispatch 52 if: ${{ github.event.inputs.pr != '' }} 53 uses: actions/checkout@v2 54 with: 55 ref: refs/pull/${{ github.event.inputs.pr }}/head 56 57 - name: Cache go modules 58 uses: actions/cache@v2 59 with: 60 path: ~/go/pkg/mod 61 key: ${{ runner.os }}-ticdc-${{ hashFiles('go.sum') }} 62 63 - name: Cache Tools 64 id: cache-tools 65 uses: actions/cache@v2 66 with: 67 path: tools/bin 68 key: ${{ runner.os }}-ticdc-tools-${{ hashFiles('tools/check/go.sum') }} 69 70 - name: Create k8s Kind Cluster 71 uses: helm/kind-action@v1.4.0 72 73 - name: Print cluster information 74 run: | 75 kubectl config view 76 kubectl cluster-info 77 kubectl get nodes 78 kubectl get pods -n kube-system 79 kubectl get sc 80 kubectl version 81 helm version 82 83 # Disable AppArmor for MySQL, see https://github.com/moby/moby/issues/7512#issuecomment-61787845 84 - name: Disable AppArmor for MySQL 85 run: | 86 sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/ 87 sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld 88 89 - name: Build DM binary 90 run: make dm-master dm-worker dmctl dm-chaos-case 91 92 # NOTE: we also copy config files into `bin` directory, 93 # so we only need to send `bin` as the context into docker daemon when building image. 94 - name: Build DM docker image 95 run: | 96 cp -r $GITHUB_WORKSPACE/dm/chaos/cases/conf/ $GITHUB_WORKSPACE/bin/ 97 docker build -f $GITHUB_WORKSPACE/dm/chaos/manifests/Dockerfile -t dm:chaos $GITHUB_WORKSPACE/bin 98 docker image list 99 100 # Load DM docker image into KIND, see https://kind.sigs.k8s.io/docs/user/quick-start/#loading-an-image-into-your-cluster 101 - name: Load DM docker image into KIND 102 run: | 103 kind load docker-image dm:chaos --name chart-testing 104 105 # Set up upstream instances 106 - name: Set up sources 107 run: | 108 kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/sources.yaml 109 kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/sources.yaml 110 kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/sources.yaml 111 - name: Wait for sources ready # kubectl wait --all not working 112 run: | 113 kubectl wait --for=condition=Ready pod/mysql57-0 --timeout=300s || true 114 kubectl wait --for=condition=Ready pod/mysql8-0 --timeout=300s || true 115 kubectl wait --for=condition=Ready pod/mariadb-0 --timeout=300s || true 116 sleep 10 117 echo show pvc 118 kubectl get pvc -l app=sources -o wide 119 echo show pv 120 kubectl get pv -o wide 121 echo show svc 122 kubectl get svc -l app=sources -o wide 123 echo show sts 124 kubectl get sts -l app=sources -o wide 125 echo show po 126 kubectl get po -l app=sources -o wide 127 echo describe po 128 kubectl describe po -l app=sources 129 echo describe pvc 130 kubectl describe pvc -l app=sources 131 kubectl wait --for=condition=Ready pod/mysql57-0 --timeout=0s 132 kubectl wait --for=condition=Ready pod/mysql8-0 --timeout=0s 133 kubectl wait --for=condition=Ready pod/mariadb-0 --timeout=0s 134 135 # Set up downstream TiDB instance (deploy a TiDB with mockTiKV, not a TidbCluster managed by TiDB-operator) 136 - name: Set up TiDB 137 run: | 138 kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/tidb.yaml 139 kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/tidb.yaml 140 kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/tidb.yaml 141 - name: Wait for TiDB ready 142 run: | 143 kubectl wait --for=condition=Ready pod/tidb-0 --timeout=300s || true 144 echo show pvc 145 kubectl get pvc -l app=tidb -o wide 146 echo show pv 147 kubectl get pv -o wide 148 echo show svc 149 kubectl get svc -l app=tidb -o wide 150 echo show sts 151 kubectl get sts -l app=tidb -o wide 152 echo show po 153 kubectl get po -l app=tidb -o wide 154 echo describe po 155 kubectl describe po -l app=tidb 156 echo describe pvc 157 kubectl describe pvc -l app=tidb 158 kubectl wait --for=condition=Ready pod/tidb-0 --timeout=0s 159 160 - name: Set up DM-master 161 run: | 162 kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-master.yaml 163 kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-master.yaml 164 kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-master.yaml 165 # NOTE: even some DM-master instances are not ready, we still continue and let chaos test cases to check again. 166 - name: Wait for DM-master ready 167 run: | 168 sleep 10 169 kubectl wait --for=condition=Ready pod -l app=dm-master --all --timeout=300s || true 170 echo "<<<<< show pvc >>>>>" 171 kubectl get pvc -l app=dm-master -o wide 172 echo "<<<<< show pv >>>>>" 173 kubectl get pv -o wide 174 echo "<<<<< show svc >>>>>" 175 kubectl get svc -l app=dm-master -o wide 176 echo "<<<<< show sts >>>>>" 177 kubectl get sts -l app=dm-master -o wide 178 echo "<<<<< show po >>>>>" 179 kubectl get po -l app=dm-master -o wide 180 echo "<<<<< describe po >>>>>" 181 kubectl describe po -l app=dm-master 182 echo "<<<<< describe pvc >>>>>" 183 kubectl describe pvc -l app=dm-master 184 echo "<<<<< show current log for dm-master-0 >>>>>" 185 kubectl logs dm-master-0 || true 186 echo "<<<<< show previous log for dm-master-0 >>>>>" 187 kubectl logs dm-master-0 -p || true 188 echo "<<<<< show current log for dm-master-1 >>>>>" 189 kubectl logs dm-master-1 || true 190 echo "<<<<< show previous log for dm-master-1 >>>>>" 191 kubectl logs dm-master-1 -p || true 192 echo "<<<<< show current log for dm-master-2 >>>>>" 193 kubectl logs dm-master-2 || true 194 echo "<<<<< show previous log for dm-master-2 >>>>>" 195 kubectl logs dm-master-2 -p || true 196 197 - name: Set up DM-worker 198 run: | 199 kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-worker.yaml 200 kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-worker.yaml 201 kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/dm-worker.yaml 202 # NOTE: even some DM-worker instances are not ready, we still continue and let chaos test cases to check again. 203 - name: Wait for DM-worker ready 204 run: | 205 sleep 10 206 kubectl wait --for=condition=Ready pod -l app=dm-worker --all --timeout=300s || true 207 echo "<<<<< show pvc >>>>>" 208 kubectl get pvc -l app=dm-worker -o wide 209 echo "<<<<< show pv >>>>>" 210 kubectl get pv -o wide 211 echo "<<<<< show svc >>>>>" 212 kubectl get svc -l app=dm-worker -o wide 213 echo "<<<<< show sts >>>>>" 214 kubectl get sts -l app=dm-worker -o wide 215 echo "<<<<< show po >>>>>" 216 kubectl get po -l app=dm-worker -o wide 217 echo "<<<<< describe po >>>>>" 218 kubectl describe po -l app=dm-worker 219 echo "<<<<< describe pvc >>>>>" 220 kubectl describe pvc -l app=dm-worker 221 echo "<<<<< show current log for dm-worker-0 >>>>>" 222 kubectl logs dm-worker-0 || true 223 echo "<<<<< show previous log for dm-worker-0 >>>>>" 224 kubectl logs dm-worker-0 -p || true 225 echo "<<<<< show current log for dm-worker-1 >>>>>" 226 kubectl logs dm-worker-1 || true 227 echo "<<<<< show previous log for worker-master-1 >>>>>" 228 kubectl logs dm-worker-1 -p || true 229 echo "<<<<< show current log for dm-worker-2 >>>>>" 230 kubectl logs dm-worker-2 || true 231 echo "<<<<< show previous log for dm-worker-2 >>>>>" 232 kubectl logs dm-worker-2 -p || true 233 234 # NOTE: we sleep a while when check members ready in cases before applying any chaos operations. 235 - name: Set up chaos test cases 236 run: | 237 kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/cases.yaml 238 kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/cases.yaml 239 kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/cases.yaml 240 sleep 60 241 242 - name: Encode chaos-mesh action 243 run: | 244 echo CFG_BASE64=$(base64 -w 0 $GITHUB_WORKSPACE/dm/chaos/manifests/${{ matrix.chaos-obj }}.yaml) >> $GITHUB_ENV 245 246 - name: Run chaos mesh action 247 uses: chaos-mesh/chaos-mesh-action@master 248 env: 249 CFG_BASE64: ${{ env.CFG_BASE64 }} 250 251 # check whether complete with 1m * 20 times. 252 - name: Wait for chaos test case complete 253 run: | 254 $GITHUB_WORKSPACE/dm/chaos/scripts/check-case.sh 255 256 - name: Copy logs to hack permission 257 if: ${{ always() }} 258 run: | 259 mkdir ./logs 260 kubectl get pods --no-headers -o custom-columns=":metadata.name"|grep -E "dm-"|xargs -I{} sudo kubectl cp {}:/log/{}.log ./logs/{}.log || true 261 kind export logs ./logs/kind --name chart-testing 262 sudo chown -R runner ./logs 263 # Update logs as artifact seems not stable, so we set `continue-on-error: true` here. 264 - name: Upload logs 265 continue-on-error: true 266 uses: actions/upload-artifact@v2 267 if: ${{ always() }} 268 with: 269 name: chaos-base-logs.${{ matrix.chaos-obj }} 270 path: | 271 ./logs 272 273 # send Slack notify if failed. 274 # NOTE: With the exception of `GITHUB_TOKEN`, secrets are not passed to the runner when a workflow is triggered from a forked repository. 275 - name: Slack notification 276 if: ${{ failure() }} 277 env: 278 SLACK_WEBHOOK: ${{ secrets.SLACK_NOTIFY }} 279 uses: Ilshidur/action-slack@2.1.0 280 with: 281 args: "chaos job failed, see https://github.com/pingcap/tiflow/actions/runs/{{ GITHUB_RUN_ID }}"