github.com/matrixorigin/matrixone@v1.2.0/optools/check_log_count.sh (about) 1 #!/bin/bash 2 3 # Copyright 2023 Matrix Origin 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 set -o nounset 18 19 ## const 20 ############### 21 pid=$$ 22 ts=`date +%s` 23 out_log_count="/tmp/log_count.pid${pid}.ts${ts}" 24 25 max_try_conn=3 26 mod='mysql -h 127.0.0.1 -P 6001 -udump -p111 system -A' 27 metric_interval=60 28 count_threshold=1000 29 30 ## function 31 ############### 32 33 echo_proxy() { 34 echo "[`date '+%F %T'`] $@" 35 } 36 37 show_env() { 38 echo_proxy "This script is inspired by #9835" 39 echo_proxy "arg count_threshold : $count_threshold" 40 echo_proxy "arg metric_interval : $metric_interval" 41 echo_proxy "out_log_count file : $out_log_count" 42 } 43 44 check_mo_service_alive() { 45 ## Action: try access mo {max_try_conn} times. 46 ## if failed, exit 0 with note: "failed to access mo-servcie ..." 47 local ret=0 48 for idx in `seq 1 $max_try_conn`; 49 do 50 echo_proxy "Try to access mo $idx times." 51 $mod -Nse "select 1;" 1>/dev/null 2>&1 52 ret=$? 53 if [ $ret -eq 0 ]; then 54 break 55 fi 56 # sleep 1 for retry 57 sleep 1 58 done 59 if [ $ret -ne 0 ]; then 60 echo_proxy "warning: failed to access mo-servcie through port 6001." 61 exit 0 62 fi 63 echo_proxy "seccess to access mo-servcie through port 6001." 64 } 65 66 get_log_count() { 67 # count log message per second (exclude level=debug record) 68 # 69 ### table system_metrics.metric example: 70 # metric_name collecttime value node role account type 71 # mo_log_message_count 2023-08-03 15:08:08.591955 77 7c4dccb4-4d3c-41f8-b482-5251dc7a41bf ALL sys error 72 # mo_log_message_count 2023-08-03 15:08:08.591955 37 7c4dccb4-4d3c-41f8-b482-5251dc7a41bf ALL sys info 73 # 74 ### calculation result example 75 # collecttime cnt_per_second node role level 76 # 2023-08-03 14:37:24.977181 35.78 7c4dccb4-4d3c-41f8-b482-5251dc7a41bf ALL error 77 # 2023-08-03 14:37:24.977181 31.00 7c4dccb4-4d3c-41f8-b482-5251dc7a41bf ALL info 78 # 2023-08-03 14:38:24.987134 21.02 7c4dccb4-4d3c-41f8-b482-5251dc7a41bf ALL error 79 # 80 local sql=`cat << EOF 81 select * from 82 (select collecttime, cast( value / $metric_interval as DECIMAL(38,2)) as cnt_per_second, node, role, type as level from system_metrics.metric 83 where metric_name = 'mo_log_message_count' and type not in ('debug')) a 84 where a.cnt_per_second > $count_threshold 85 order by collecttime 86 EOF` 87 echo_proxy "Query: $sql" 88 $mod -e "$sql" > $out_log_count 89 } 90 91 check_log_count() { 92 local rows=`wc -l $out_log_count | awk '{print $1}'` 93 if [ "$rows" == "0" ]; then 94 echo_proxy "log messages spitting out per second < threshold(val: $count_threshold): OK!" 95 return 0 96 fi 97 98 echo_proxy "log messages spitting out per second threshold(val: $count_threshold)" 99 echo_proxy "each rows show last $metric_interval secs status" 100 echo_proxy 101 cat $out_log_count 102 echo 103 return 1 104 } 105 106 usage() { 107 cat << EOF 108 Usage: $0 [cnt_threshold [metric_interval]] 109 like: $0 110 or $0 1000 111 or $0 1000 60 112 113 options 114 cnt_threshold - int, log messages per second threshold 115 default: $count_threshold 116 metric_interval - int, metric collected interval 117 default: $metric_interval 118 EOF 119 } 120 121 ## main 122 ################ 123 124 if [ $# -eq 1 ]; then 125 arg=$1 126 if [ "$arg" == "-h" -o "$arg" == "--help" ]; then 127 usage 128 exit 1 129 fi 130 count_threshold=$arg 131 elif [ $# -eq 2 ]; then 132 count_threshold=$1 133 metric_interval=$2 134 fi 135 136 show_env 137 check_mo_service_alive 138 get_log_count 139 check_log_count 140 ret=$? 141 if [ "$ret" != "0" ]; then 142 echo_proxy "log messages spitting out per second > threshold(val: $count_threshold): NOT ok!!!" 143 exit 1 144 fi