github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/deploy/llm/templates/scripts.yaml (about) 1 apiVersion: v1 2 kind: ConfigMap 3 metadata: 4 name: vllm-scripts 5 labels: 6 {{- include "llm.labels" . | nindent 4 }} 7 data: 8 start.sh: | 9 #!/bin/bash 10 if [ -n "$CLONE_MODEL_SCRIPT" ]; then 11 bash -c "$CLONE_MODEL_SCRIPT" 12 fi 13 ordinal=${KB_POD_NAME##*-} 14 echo "current pod ordinal: $ordinal" 15 if [ $ordinal -eq 0 ]; then 16 /scripts/vllm-start.sh & 17 /scripts/ray-health-checker.sh & 18 ray start --head --block --redis-password=44s5jntp 19 else 20 ray start --address="${KB_VLLM_0_HOSTNAME}:6379" --block 21 fi 22 vllm-start.sh: | 23 #!/bin/bash 24 echo "model=${MODEL_NAME}" 25 echo "EXTRA_ARGS=${EXTRA_ARGS}" 26 cd vllm 27 while true; do 28 node_num=`ray status | grep "1 node" | wc -l` 29 # continue waiting if ray status not ok 30 if [[ "$node_num" -ne "$KB_VLLM_N" ]]; then 31 sleep 1 32 continue 33 fi 34 python -m vllm.entrypoints.api_server --host 0.0.0.0 --port 8000 --model ${MODEL_NAME} --gpu-memory-utilization 0.95 --max-num-seqs 512 --max-num-batched-tokens 8192 --tensor-parallel-size ${KB_VLLM_N} ${EXTRA_ARGS} 2>&1 > log 35 code=$? 36 if [ $code -eq 0 ]; then 37 break 38 fi 39 echo "exit with code $code, wait for 1 second and try again..." 2>&1 > log 40 sleep 1 41 done 42 ray-health-checker.sh: | 43 #!/bin/bash 44 # wait ray to start when first run 45 sleep 10 46 while true; do 47 node_num=`ray status | grep "1 node" | wc -l` 48 if [[ "$node_num" -ne "$KB_VLLM_N" ]]; then 49 # if ray nodes not healthy, restart vllm 50 vllm_pid=`ps aux | grep "python -m vllm.entrypoints.api_server" | grep -v grep | awk '{print $2}'` 51 if [[ "$vllm_pid" ]]; then 52 kill -9 "$vllm_pid" 53 fi 54 fi 55 sleep 3 56 done