vitess.io/vitess@v0.16.2/go/vt/vtorc/process/health_dao.go (about)

     1  /*
     2     Copyright 2015 Shlomi Noach, courtesy Booking.com
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package process
    18  
    19  import (
    20  	"time"
    21  
    22  	"vitess.io/vitess/go/vt/external/golib/sqlutils"
    23  	"vitess.io/vitess/go/vt/log"
    24  	"vitess.io/vitess/go/vt/vtorc/config"
    25  	"vitess.io/vitess/go/vt/vtorc/db"
    26  )
    27  
    28  // WriteRegisterNode writes down this node in the node_health table
    29  func WriteRegisterNode(nodeHealth *NodeHealth) (healthy bool, err error) {
    30  	timeNow := time.Now()
    31  	reportedAgo := timeNow.Sub(nodeHealth.LastReported)
    32  	reportedSecondsAgo := int64(reportedAgo.Seconds())
    33  	if reportedSecondsAgo > config.HealthPollSeconds*2 {
    34  		// This entry is too old. No reason to persist it; already expired.
    35  		return false, nil
    36  	}
    37  
    38  	nodeHealth.onceHistory.Do(func() {
    39  		_, _ = db.ExecVTOrc(`
    40  			insert ignore into node_health_history
    41  				(hostname, token, first_seen_active, extra_info, command, app_version)
    42  			values
    43  				(?, ?, NOW(), ?, ?, ?)
    44  			`,
    45  			nodeHealth.Hostname, nodeHealth.Token, nodeHealth.ExtraInfo, nodeHealth.Command,
    46  			nodeHealth.AppVersion,
    47  		)
    48  	})
    49  	{
    50  		sqlResult, err := db.ExecVTOrc(`
    51  			update node_health set
    52  				last_seen_active = now() - interval ? second,
    53  				extra_info = case when ? != '' then ? else extra_info end,
    54  				app_version = ?,
    55  				incrementing_indicator = incrementing_indicator + 1
    56  			where
    57  				hostname = ?
    58  				and token = ?
    59  			`,
    60  			reportedSecondsAgo,
    61  			nodeHealth.ExtraInfo, nodeHealth.ExtraInfo,
    62  			nodeHealth.AppVersion,
    63  			nodeHealth.Hostname, nodeHealth.Token,
    64  		)
    65  		if err != nil {
    66  			log.Error(err)
    67  			return false, err
    68  		}
    69  		rows, err := sqlResult.RowsAffected()
    70  		if err != nil {
    71  			log.Error(err)
    72  			return false, err
    73  		}
    74  		if rows > 0 {
    75  			return true, nil
    76  		}
    77  	}
    78  	// Got here? The UPDATE didn't work. Row isn't there.
    79  	{
    80  		dbBackend := config.Config.SQLite3DataFile
    81  		sqlResult, err := db.ExecVTOrc(`
    82  			insert ignore into node_health
    83  				(hostname, token, first_seen_active, last_seen_active, extra_info, command, app_version, db_backend)
    84  			values (
    85  				?, ?,
    86  				now() - interval ? second, now() - interval ? second,
    87  				?, ?, ?, ?)
    88  			`,
    89  			nodeHealth.Hostname, nodeHealth.Token,
    90  			reportedSecondsAgo, reportedSecondsAgo,
    91  			nodeHealth.ExtraInfo, nodeHealth.Command,
    92  			nodeHealth.AppVersion, dbBackend,
    93  		)
    94  		if err != nil {
    95  			log.Error(err)
    96  			return false, err
    97  		}
    98  		rows, err := sqlResult.RowsAffected()
    99  		if err != nil {
   100  			log.Error(err)
   101  			return false, err
   102  		}
   103  		if rows > 0 {
   104  			return true, nil
   105  		}
   106  	}
   107  	return false, nil
   108  }
   109  
   110  // ExpireAvailableNodes is an aggressive purging method to remove
   111  // node entries who have skipped their keepalive for two times.
   112  func ExpireAvailableNodes() {
   113  	_, err := db.ExecVTOrc(`
   114  			delete
   115  				from node_health
   116  			where
   117  				last_seen_active < now() - interval ? second
   118  			`,
   119  		config.HealthPollSeconds*5,
   120  	)
   121  	if err != nil {
   122  		log.Errorf("ExpireAvailableNodes: failed to remove old entries: %+v", err)
   123  	}
   124  }
   125  
   126  // ExpireNodesHistory cleans up the nodes history and is run by
   127  // the vtorc active node.
   128  func ExpireNodesHistory() error {
   129  	_, err := db.ExecVTOrc(`
   130  			delete
   131  				from node_health_history
   132  			where
   133  				first_seen_active < now() - interval ? hour
   134  			`,
   135  		config.UnseenInstanceForgetHours,
   136  	)
   137  	if err != nil {
   138  		log.Error(err)
   139  	}
   140  	return err
   141  }
   142  
   143  func ReadAvailableNodes(onlyHTTPNodes bool) (nodes [](*NodeHealth), err error) {
   144  	extraInfo := ""
   145  	if onlyHTTPNodes {
   146  		extraInfo = string(VTOrcExecutionHTTPMode)
   147  	}
   148  	query := `
   149  		select
   150  			hostname, token, app_version, first_seen_active, last_seen_active, db_backend
   151  		from
   152  			node_health
   153  		where
   154  			last_seen_active > now() - interval ? second
   155  			and ? in (extra_info, '')
   156  		order by
   157  			hostname
   158  		`
   159  
   160  	err = db.QueryVTOrc(query, sqlutils.Args(config.HealthPollSeconds*2, extraInfo), func(m sqlutils.RowMap) error {
   161  		nodeHealth := &NodeHealth{
   162  			Hostname:        m.GetString("hostname"),
   163  			Token:           m.GetString("token"),
   164  			AppVersion:      m.GetString("app_version"),
   165  			FirstSeenActive: m.GetString("first_seen_active"),
   166  			LastSeenActive:  m.GetString("last_seen_active"),
   167  			DBBackend:       m.GetString("db_backend"),
   168  		}
   169  		nodes = append(nodes, nodeHealth)
   170  		return nil
   171  	})
   172  	if err != nil {
   173  		log.Error(err)
   174  	}
   175  	return nodes, err
   176  }