github.com/matrixorigin/matrixone@v1.2.0/optools/check_log_count.sh (about)

     1  #!/bin/bash
     2  
     3  # Copyright 2023 Matrix Origin
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #      http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  set -o nounset
    18  
    19  ## const
    20  ###############
    21  pid=$$
    22  ts=`date +%s`
    23  out_log_count="/tmp/log_count.pid${pid}.ts${ts}"
    24  
    25  max_try_conn=3
    26  mod='mysql -h 127.0.0.1 -P 6001 -udump -p111 system -A'
    27  metric_interval=60
    28  count_threshold=1000
    29  
    30  ## function
    31  ###############
    32  
    33  echo_proxy() {
    34      echo "[`date '+%F %T'`] $@"
    35  }
    36  
    37  show_env() {
    38      echo_proxy "This script is inspired by #9835"
    39      echo_proxy "arg count_threshold : $count_threshold"
    40      echo_proxy "arg metric_interval : $metric_interval"
    41      echo_proxy "out_log_count file  : $out_log_count"
    42  }
    43  
    44  check_mo_service_alive() {
    45      ## Action: try access mo {max_try_conn} times.
    46      ##         if failed, exit 0 with note: "failed to access mo-servcie ..."
    47      local ret=0
    48      for idx in `seq 1 $max_try_conn`;
    49      do
    50          echo_proxy "Try to access mo $idx times."
    51          $mod -Nse "select 1;" 1>/dev/null 2>&1
    52          ret=$?
    53          if [ $ret -eq 0 ]; then
    54              break
    55          fi
    56          # sleep 1 for retry
    57          sleep 1
    58      done
    59      if [ $ret -ne 0 ]; then
    60          echo_proxy "warning: failed to access mo-servcie through port 6001."
    61          exit 0
    62      fi
    63      echo_proxy "seccess to access mo-servcie through port 6001."
    64  }
    65  
    66  get_log_count() {
    67      # count log message per second (exclude level=debug record)
    68      #
    69      ### table system_metrics.metric example:
    70      # metric_name collecttime value   node    role    account type
    71      # mo_log_message_count    2023-08-03 15:08:08.591955  77  7c4dccb4-4d3c-41f8-b482-5251dc7a41bf    ALL sys error
    72      # mo_log_message_count    2023-08-03 15:08:08.591955  37  7c4dccb4-4d3c-41f8-b482-5251dc7a41bf    ALL sys info
    73      #
    74      ### calculation result example
    75      # collecttime	cnt_per_second	node	role	level
    76      # 2023-08-03 14:37:24.977181	35.78	7c4dccb4-4d3c-41f8-b482-5251dc7a41bf	ALL	error
    77      # 2023-08-03 14:37:24.977181	31.00	7c4dccb4-4d3c-41f8-b482-5251dc7a41bf	ALL	info
    78      # 2023-08-03 14:38:24.987134	21.02	7c4dccb4-4d3c-41f8-b482-5251dc7a41bf	ALL	error
    79      #
    80      local sql=`cat << EOF
    81  select * from
    82  (select collecttime, cast( value / $metric_interval as DECIMAL(38,2)) as cnt_per_second, node, role, type as level from system_metrics.metric
    83   where metric_name = 'mo_log_message_count' and type not in ('debug')) a
    84   where a.cnt_per_second > $count_threshold
    85   order by collecttime
    86  EOF`
    87      echo_proxy "Query: $sql"
    88      $mod -e "$sql" > $out_log_count
    89  }
    90  
    91  check_log_count() {
    92      local rows=`wc -l $out_log_count | awk '{print $1}'`
    93      if [ "$rows" == "0" ]; then
    94          echo_proxy "log messages spitting out per second < threshold(val: $count_threshold): OK!"
    95          return 0
    96      fi
    97  
    98      echo_proxy "log messages spitting out per second threshold(val: $count_threshold)"
    99      echo_proxy "each rows show last $metric_interval secs status"
   100      echo_proxy
   101      cat $out_log_count
   102      echo
   103      return 1
   104  }
   105  
   106  usage() {
   107      cat << EOF
   108  Usage: $0 [cnt_threshold [metric_interval]]
   109  like:  $0
   110    or   $0 1000
   111    or   $0 1000 60
   112  
   113  options
   114      cnt_threshold   - int, log messages per second threshold
   115                        default: $count_threshold
   116      metric_interval - int, metric collected interval
   117                        default: $metric_interval
   118  EOF
   119  }
   120  
   121  ## main
   122  ################
   123  
   124  if [ $# -eq 1 ]; then
   125      arg=$1
   126      if [ "$arg" == "-h" -o "$arg" == "--help" ]; then
   127          usage
   128          exit 1
   129      fi
   130      count_threshold=$arg
   131  elif [ $# -eq 2 ]; then
   132      count_threshold=$1
   133      metric_interval=$2
   134  fi
   135  
   136  show_env
   137  check_mo_service_alive
   138  get_log_count
   139  check_log_count
   140  ret=$?
   141  if [ "$ret" != "0" ]; then
   142      echo_proxy "log messages spitting out per second > threshold(val: $count_threshold): NOT ok!!!"
   143      exit 1
   144  fi