go.etcd.io/etcd@v3.3.27+incompatible/rafthttp/stream.go (about)

     1  // Copyright 2015 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package rafthttp
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"io/ioutil"
    22  	"net/http"
    23  	"path"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"golang.org/x/time/rate"
    29  
    30  	"github.com/coreos/etcd/etcdserver/stats"
    31  	"github.com/coreos/etcd/pkg/httputil"
    32  	"github.com/coreos/etcd/pkg/transport"
    33  	"github.com/coreos/etcd/pkg/types"
    34  	"github.com/coreos/etcd/raft/raftpb"
    35  	"github.com/coreos/etcd/version"
    36  	"github.com/coreos/go-semver/semver"
    37  )
    38  
    39  const (
    40  	streamTypeMessage  streamType = "message"
    41  	streamTypeMsgAppV2 streamType = "msgappv2"
    42  
    43  	streamBufSize = 4096
    44  )
    45  
    46  var (
    47  	errUnsupportedStreamType = fmt.Errorf("unsupported stream type")
    48  
    49  	// the key is in string format "major.minor.patch"
    50  	supportedStream = map[string][]streamType{
    51  		"2.0.0": {},
    52  		"2.1.0": {streamTypeMsgAppV2, streamTypeMessage},
    53  		"2.2.0": {streamTypeMsgAppV2, streamTypeMessage},
    54  		"2.3.0": {streamTypeMsgAppV2, streamTypeMessage},
    55  		"3.0.0": {streamTypeMsgAppV2, streamTypeMessage},
    56  		"3.1.0": {streamTypeMsgAppV2, streamTypeMessage},
    57  		"3.2.0": {streamTypeMsgAppV2, streamTypeMessage},
    58  		"3.3.0": {streamTypeMsgAppV2, streamTypeMessage},
    59  	}
    60  )
    61  
    62  type streamType string
    63  
    64  func (t streamType) endpoint() string {
    65  	switch t {
    66  	case streamTypeMsgAppV2:
    67  		return path.Join(RaftStreamPrefix, "msgapp")
    68  	case streamTypeMessage:
    69  		return path.Join(RaftStreamPrefix, "message")
    70  	default:
    71  		plog.Panicf("unhandled stream type %v", t)
    72  		return ""
    73  	}
    74  }
    75  
    76  func (t streamType) String() string {
    77  	switch t {
    78  	case streamTypeMsgAppV2:
    79  		return "stream MsgApp v2"
    80  	case streamTypeMessage:
    81  		return "stream Message"
    82  	default:
    83  		return "unknown stream"
    84  	}
    85  }
    86  
    87  var (
    88  	// linkHeartbeatMessage is a special message used as heartbeat message in
    89  	// link layer. It never conflicts with messages from raft because raft
    90  	// doesn't send out messages without From and To fields.
    91  	linkHeartbeatMessage = raftpb.Message{Type: raftpb.MsgHeartbeat}
    92  )
    93  
    94  func isLinkHeartbeatMessage(m *raftpb.Message) bool {
    95  	return m.Type == raftpb.MsgHeartbeat && m.From == 0 && m.To == 0
    96  }
    97  
    98  type outgoingConn struct {
    99  	t streamType
   100  	io.Writer
   101  	http.Flusher
   102  	io.Closer
   103  }
   104  
   105  // streamWriter writes messages to the attached outgoingConn.
   106  type streamWriter struct {
   107  	peerID types.ID
   108  	status *peerStatus
   109  	fs     *stats.FollowerStats
   110  	r      Raft
   111  
   112  	mu      sync.Mutex // guard field working and closer
   113  	closer  io.Closer
   114  	working bool
   115  
   116  	msgc  chan raftpb.Message
   117  	connc chan *outgoingConn
   118  	stopc chan struct{}
   119  	done  chan struct{}
   120  }
   121  
   122  // startStreamWriter creates a streamWrite and starts a long running go-routine that accepts
   123  // messages and writes to the attached outgoing connection.
   124  func startStreamWriter(id types.ID, status *peerStatus, fs *stats.FollowerStats, r Raft) *streamWriter {
   125  	w := &streamWriter{
   126  		peerID: id,
   127  		status: status,
   128  		fs:     fs,
   129  		r:      r,
   130  		msgc:   make(chan raftpb.Message, streamBufSize),
   131  		connc:  make(chan *outgoingConn),
   132  		stopc:  make(chan struct{}),
   133  		done:   make(chan struct{}),
   134  	}
   135  	go w.run()
   136  	return w
   137  }
   138  
   139  func (cw *streamWriter) run() {
   140  	var (
   141  		msgc       chan raftpb.Message
   142  		heartbeatc <-chan time.Time
   143  		t          streamType
   144  		enc        encoder
   145  		flusher    http.Flusher
   146  		batched    int
   147  	)
   148  	tickc := time.NewTicker(ConnReadTimeout / 3)
   149  	defer tickc.Stop()
   150  	unflushed := 0
   151  
   152  	plog.Infof("started streaming with peer %s (writer)", cw.peerID)
   153  
   154  	for {
   155  		select {
   156  		case <-heartbeatc:
   157  			err := enc.encode(&linkHeartbeatMessage)
   158  			unflushed += linkHeartbeatMessage.Size()
   159  			if err == nil {
   160  				flusher.Flush()
   161  				batched = 0
   162  				sentBytes.WithLabelValues(cw.peerID.String()).Add(float64(unflushed))
   163  				unflushed = 0
   164  				continue
   165  			}
   166  
   167  			cw.status.deactivate(failureType{source: t.String(), action: "heartbeat"}, err.Error())
   168  
   169  			sentFailures.WithLabelValues(cw.peerID.String()).Inc()
   170  			cw.close()
   171  			plog.Warningf("lost the TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
   172  			heartbeatc, msgc = nil, nil
   173  
   174  		case m := <-msgc:
   175  			err := enc.encode(&m)
   176  			if err == nil {
   177  				unflushed += m.Size()
   178  
   179  				if len(msgc) == 0 || batched > streamBufSize/2 {
   180  					flusher.Flush()
   181  					sentBytes.WithLabelValues(cw.peerID.String()).Add(float64(unflushed))
   182  					unflushed = 0
   183  					batched = 0
   184  				} else {
   185  					batched++
   186  				}
   187  
   188  				continue
   189  			}
   190  
   191  			cw.status.deactivate(failureType{source: t.String(), action: "write"}, err.Error())
   192  			cw.close()
   193  			plog.Warningf("lost the TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
   194  			heartbeatc, msgc = nil, nil
   195  			cw.r.ReportUnreachable(m.To)
   196  			sentFailures.WithLabelValues(cw.peerID.String()).Inc()
   197  
   198  		case conn := <-cw.connc:
   199  			cw.mu.Lock()
   200  			closed := cw.closeUnlocked()
   201  			t = conn.t
   202  			switch conn.t {
   203  			case streamTypeMsgAppV2:
   204  				enc = newMsgAppV2Encoder(conn.Writer, cw.fs)
   205  			case streamTypeMessage:
   206  				enc = &messageEncoder{w: conn.Writer}
   207  			default:
   208  				plog.Panicf("unhandled stream type %s", conn.t)
   209  			}
   210  			flusher = conn.Flusher
   211  			unflushed = 0
   212  			cw.status.activate()
   213  			cw.closer = conn.Closer
   214  			cw.working = true
   215  			cw.mu.Unlock()
   216  
   217  			if closed {
   218  				plog.Warningf("closed an existing TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
   219  			}
   220  			plog.Infof("established a TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
   221  			heartbeatc, msgc = tickc.C, cw.msgc
   222  		case <-cw.stopc:
   223  			if cw.close() {
   224  				plog.Infof("closed the TCP streaming connection with peer %s (%s writer)", cw.peerID, t)
   225  			}
   226  			plog.Infof("stopped streaming with peer %s (writer)", cw.peerID)
   227  			close(cw.done)
   228  			return
   229  		}
   230  	}
   231  }
   232  
   233  func (cw *streamWriter) writec() (chan<- raftpb.Message, bool) {
   234  	cw.mu.Lock()
   235  	defer cw.mu.Unlock()
   236  	return cw.msgc, cw.working
   237  }
   238  
   239  func (cw *streamWriter) close() bool {
   240  	cw.mu.Lock()
   241  	defer cw.mu.Unlock()
   242  	return cw.closeUnlocked()
   243  }
   244  
   245  func (cw *streamWriter) closeUnlocked() bool {
   246  	if !cw.working {
   247  		return false
   248  	}
   249  	if err := cw.closer.Close(); err != nil {
   250  		plog.Errorf("peer %s (writer) connection close error: %v", cw.peerID, err)
   251  	}
   252  	if len(cw.msgc) > 0 {
   253  		cw.r.ReportUnreachable(uint64(cw.peerID))
   254  	}
   255  	cw.msgc = make(chan raftpb.Message, streamBufSize)
   256  	cw.working = false
   257  	return true
   258  }
   259  
   260  func (cw *streamWriter) attach(conn *outgoingConn) bool {
   261  	select {
   262  	case cw.connc <- conn:
   263  		return true
   264  	case <-cw.done:
   265  		return false
   266  	}
   267  }
   268  
   269  func (cw *streamWriter) stop() {
   270  	close(cw.stopc)
   271  	<-cw.done
   272  }
   273  
   274  // streamReader is a long-running go-routine that dials to the remote stream
   275  // endpoint and reads messages from the response body returned.
   276  type streamReader struct {
   277  	peerID types.ID
   278  	typ    streamType
   279  
   280  	tr     *Transport
   281  	picker *urlPicker
   282  	status *peerStatus
   283  	recvc  chan<- raftpb.Message
   284  	propc  chan<- raftpb.Message
   285  
   286  	rl *rate.Limiter // alters the frequency of dial retrial attempts
   287  
   288  	errorc chan<- error
   289  
   290  	mu     sync.Mutex
   291  	paused bool
   292  	closer io.Closer
   293  
   294  	ctx    context.Context
   295  	cancel context.CancelFunc
   296  	done   chan struct{}
   297  }
   298  
   299  func (cr *streamReader) start() {
   300  	cr.done = make(chan struct{})
   301  	if cr.errorc == nil {
   302  		cr.errorc = cr.tr.ErrorC
   303  	}
   304  	if cr.ctx == nil {
   305  		cr.ctx, cr.cancel = context.WithCancel(context.Background())
   306  	}
   307  	go cr.run()
   308  }
   309  
   310  func (cr *streamReader) run() {
   311  	t := cr.typ
   312  	plog.Infof("started streaming with peer %s (%s reader)", cr.peerID, t)
   313  	for {
   314  		rc, err := cr.dial(t)
   315  		if err != nil {
   316  			if err != errUnsupportedStreamType {
   317  				cr.status.deactivate(failureType{source: t.String(), action: "dial"}, err.Error())
   318  			}
   319  		} else {
   320  			cr.status.activate()
   321  			plog.Infof("established a TCP streaming connection with peer %s (%s reader)", cr.peerID, cr.typ)
   322  			err = cr.decodeLoop(rc, t)
   323  			plog.Warningf("lost the TCP streaming connection with peer %s (%s reader)", cr.peerID, cr.typ)
   324  			switch {
   325  			// all data is read out
   326  			case err == io.EOF:
   327  			// connection is closed by the remote
   328  			case transport.IsClosedConnError(err):
   329  			default:
   330  				cr.status.deactivate(failureType{source: t.String(), action: "read"}, err.Error())
   331  			}
   332  		}
   333  		// Wait for a while before new dial attempt
   334  		err = cr.rl.Wait(cr.ctx)
   335  		if cr.ctx.Err() != nil {
   336  			plog.Infof("stopped streaming with peer %s (%s reader)", cr.peerID, t)
   337  			close(cr.done)
   338  			return
   339  		}
   340  		if err != nil {
   341  			plog.Errorf("streaming with peer %s (%s reader) rate limiter error: %v", cr.peerID, t, err)
   342  		}
   343  	}
   344  }
   345  
   346  func (cr *streamReader) decodeLoop(rc io.ReadCloser, t streamType) error {
   347  	var dec decoder
   348  	cr.mu.Lock()
   349  	switch t {
   350  	case streamTypeMsgAppV2:
   351  		dec = newMsgAppV2Decoder(rc, cr.tr.ID, cr.peerID)
   352  	case streamTypeMessage:
   353  		dec = &messageDecoder{r: rc}
   354  	default:
   355  		plog.Panicf("unhandled stream type %s", t)
   356  	}
   357  	select {
   358  	case <-cr.ctx.Done():
   359  		cr.mu.Unlock()
   360  		if err := rc.Close(); err != nil {
   361  			return err
   362  		}
   363  		return io.EOF
   364  	default:
   365  		cr.closer = rc
   366  	}
   367  	cr.mu.Unlock()
   368  
   369  	for {
   370  		m, err := dec.decode()
   371  		if err != nil {
   372  			cr.mu.Lock()
   373  			cr.close()
   374  			cr.mu.Unlock()
   375  			return err
   376  		}
   377  
   378  		receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(m.Size()))
   379  
   380  		cr.mu.Lock()
   381  		paused := cr.paused
   382  		cr.mu.Unlock()
   383  
   384  		if paused {
   385  			continue
   386  		}
   387  
   388  		if isLinkHeartbeatMessage(&m) {
   389  			// raft is not interested in link layer
   390  			// heartbeat message, so we should ignore
   391  			// it.
   392  			continue
   393  		}
   394  
   395  		recvc := cr.recvc
   396  		if m.Type == raftpb.MsgProp {
   397  			recvc = cr.propc
   398  		}
   399  
   400  		select {
   401  		case recvc <- m:
   402  		default:
   403  			if cr.status.isActive() {
   404  				plog.MergeWarningf("dropped internal raft message from %s since receiving buffer is full (overloaded network)", types.ID(m.From))
   405  			}
   406  			plog.Debugf("dropped %s from %s since receiving buffer is full", m.Type, types.ID(m.From))
   407  			recvFailures.WithLabelValues(types.ID(m.From).String()).Inc()
   408  		}
   409  	}
   410  }
   411  
   412  func (cr *streamReader) stop() {
   413  	cr.mu.Lock()
   414  	cr.cancel()
   415  	cr.close()
   416  	cr.mu.Unlock()
   417  	<-cr.done
   418  }
   419  
   420  func (cr *streamReader) dial(t streamType) (io.ReadCloser, error) {
   421  	u := cr.picker.pick()
   422  	uu := u
   423  	uu.Path = path.Join(t.endpoint(), cr.tr.ID.String())
   424  
   425  	req, err := http.NewRequest("GET", uu.String(), nil)
   426  	if err != nil {
   427  		cr.picker.unreachable(u)
   428  		return nil, fmt.Errorf("failed to make http request to %v (%v)", u, err)
   429  	}
   430  	req.Header.Set("X-Server-From", cr.tr.ID.String())
   431  	req.Header.Set("X-Server-Version", version.Version)
   432  	req.Header.Set("X-Min-Cluster-Version", version.MinClusterVersion)
   433  	req.Header.Set("X-Etcd-Cluster-ID", cr.tr.ClusterID.String())
   434  	req.Header.Set("X-Raft-To", cr.peerID.String())
   435  
   436  	setPeerURLsHeader(req, cr.tr.URLs)
   437  
   438  	req = req.WithContext(cr.ctx)
   439  
   440  	cr.mu.Lock()
   441  	select {
   442  	case <-cr.ctx.Done():
   443  		cr.mu.Unlock()
   444  		return nil, fmt.Errorf("stream reader is stopped")
   445  	default:
   446  	}
   447  	cr.mu.Unlock()
   448  
   449  	resp, err := cr.tr.streamRt.RoundTrip(req)
   450  	if err != nil {
   451  		cr.picker.unreachable(u)
   452  		return nil, err
   453  	}
   454  
   455  	rv := serverVersion(resp.Header)
   456  	lv := semver.Must(semver.NewVersion(version.Version))
   457  	if compareMajorMinorVersion(rv, lv) == -1 && !checkStreamSupport(rv, t) {
   458  		httputil.GracefulClose(resp)
   459  		cr.picker.unreachable(u)
   460  		return nil, errUnsupportedStreamType
   461  	}
   462  
   463  	switch resp.StatusCode {
   464  	case http.StatusGone:
   465  		httputil.GracefulClose(resp)
   466  		cr.picker.unreachable(u)
   467  		reportCriticalError(errMemberRemoved, cr.errorc)
   468  		return nil, errMemberRemoved
   469  	case http.StatusOK:
   470  		return resp.Body, nil
   471  	case http.StatusNotFound:
   472  		httputil.GracefulClose(resp)
   473  		cr.picker.unreachable(u)
   474  		return nil, fmt.Errorf("peer %s failed to find local node %s", cr.peerID, cr.tr.ID)
   475  	case http.StatusPreconditionFailed:
   476  		b, err := ioutil.ReadAll(resp.Body)
   477  		if err != nil {
   478  			cr.picker.unreachable(u)
   479  			return nil, err
   480  		}
   481  		httputil.GracefulClose(resp)
   482  		cr.picker.unreachable(u)
   483  
   484  		switch strings.TrimSuffix(string(b), "\n") {
   485  		case errIncompatibleVersion.Error():
   486  			plog.Errorf("request sent was ignored by peer %s (server version incompatible)", cr.peerID)
   487  			return nil, errIncompatibleVersion
   488  		case errClusterIDMismatch.Error():
   489  			plog.Errorf("request sent was ignored (cluster ID mismatch: peer[%s]=%s, local=%s)",
   490  				cr.peerID, resp.Header.Get("X-Etcd-Cluster-ID"), cr.tr.ClusterID)
   491  			return nil, errClusterIDMismatch
   492  		default:
   493  			return nil, fmt.Errorf("unhandled error %q when precondition failed", string(b))
   494  		}
   495  	default:
   496  		httputil.GracefulClose(resp)
   497  		cr.picker.unreachable(u)
   498  		return nil, fmt.Errorf("unhandled http status %d", resp.StatusCode)
   499  	}
   500  }
   501  
   502  func (cr *streamReader) close() {
   503  	if cr.closer != nil {
   504  		if err := cr.closer.Close(); err != nil {
   505  			plog.Errorf("peer %s (reader) connection close error: %v", cr.peerID, err)
   506  		}
   507  	}
   508  	cr.closer = nil
   509  }
   510  
   511  func (cr *streamReader) pause() {
   512  	cr.mu.Lock()
   513  	defer cr.mu.Unlock()
   514  	cr.paused = true
   515  }
   516  
   517  func (cr *streamReader) resume() {
   518  	cr.mu.Lock()
   519  	defer cr.mu.Unlock()
   520  	cr.paused = false
   521  }
   522  
   523  // checkStreamSupport checks whether the stream type is supported in the
   524  // given version.
   525  func checkStreamSupport(v *semver.Version, t streamType) bool {
   526  	nv := &semver.Version{Major: v.Major, Minor: v.Minor}
   527  	for _, s := range supportedStream[nv.String()] {
   528  		if s == t {
   529  			return true
   530  		}
   531  	}
   532  	return false
   533  }