go.etcd.io/etcd@v3.3.27+incompatible/rafthttp/msgappv2_codec.go (about)

     1  // Copyright 2015 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package rafthttp
    16  
    17  import (
    18  	"encoding/binary"
    19  	"fmt"
    20  	"io"
    21  	"time"
    22  
    23  	"github.com/coreos/etcd/etcdserver/stats"
    24  	"github.com/coreos/etcd/pkg/pbutil"
    25  	"github.com/coreos/etcd/pkg/types"
    26  	"github.com/coreos/etcd/raft/raftpb"
    27  )
    28  
    29  const (
    30  	msgTypeLinkHeartbeat uint8 = 0
    31  	msgTypeAppEntries    uint8 = 1
    32  	msgTypeApp           uint8 = 2
    33  
    34  	msgAppV2BufSize = 1024 * 1024
    35  )
    36  
    37  // msgappv2 stream sends three types of message: linkHeartbeatMessage,
    38  // AppEntries and MsgApp. AppEntries is the MsgApp that is sent in
    39  // replicate state in raft, whose index and term are fully predictable.
    40  //
    41  // Data format of linkHeartbeatMessage:
    42  // | offset | bytes | description |
    43  // +--------+-------+-------------+
    44  // | 0      | 1     | \x00        |
    45  //
    46  // Data format of AppEntries:
    47  // | offset | bytes | description |
    48  // +--------+-------+-------------+
    49  // | 0      | 1     | \x01        |
    50  // | 1      | 8     | length of entries |
    51  // | 9      | 8     | length of first entry |
    52  // | 17     | n1    | first entry |
    53  // ...
    54  // | x      | 8     | length of k-th entry data |
    55  // | x+8    | nk    | k-th entry data |
    56  // | x+8+nk | 8     | commit index |
    57  //
    58  // Data format of MsgApp:
    59  // | offset | bytes | description |
    60  // +--------+-------+-------------+
    61  // | 0      | 1     | \x02        |
    62  // | 1      | 8     | length of encoded message |
    63  // | 9      | n     | encoded message |
    64  type msgAppV2Encoder struct {
    65  	w  io.Writer
    66  	fs *stats.FollowerStats
    67  
    68  	term      uint64
    69  	index     uint64
    70  	buf       []byte
    71  	uint64buf []byte
    72  	uint8buf  []byte
    73  }
    74  
    75  func newMsgAppV2Encoder(w io.Writer, fs *stats.FollowerStats) *msgAppV2Encoder {
    76  	return &msgAppV2Encoder{
    77  		w:         w,
    78  		fs:        fs,
    79  		buf:       make([]byte, msgAppV2BufSize),
    80  		uint64buf: make([]byte, 8),
    81  		uint8buf:  make([]byte, 1),
    82  	}
    83  }
    84  
    85  func (enc *msgAppV2Encoder) encode(m *raftpb.Message) error {
    86  	start := time.Now()
    87  	switch {
    88  	case isLinkHeartbeatMessage(m):
    89  		enc.uint8buf[0] = byte(msgTypeLinkHeartbeat)
    90  		if _, err := enc.w.Write(enc.uint8buf); err != nil {
    91  			return err
    92  		}
    93  	case enc.index == m.Index && enc.term == m.LogTerm && m.LogTerm == m.Term:
    94  		enc.uint8buf[0] = byte(msgTypeAppEntries)
    95  		if _, err := enc.w.Write(enc.uint8buf); err != nil {
    96  			return err
    97  		}
    98  		// write length of entries
    99  		binary.BigEndian.PutUint64(enc.uint64buf, uint64(len(m.Entries)))
   100  		if _, err := enc.w.Write(enc.uint64buf); err != nil {
   101  			return err
   102  		}
   103  		for i := 0; i < len(m.Entries); i++ {
   104  			// write length of entry
   105  			binary.BigEndian.PutUint64(enc.uint64buf, uint64(m.Entries[i].Size()))
   106  			if _, err := enc.w.Write(enc.uint64buf); err != nil {
   107  				return err
   108  			}
   109  			if n := m.Entries[i].Size(); n < msgAppV2BufSize {
   110  				if _, err := m.Entries[i].MarshalTo(enc.buf); err != nil {
   111  					return err
   112  				}
   113  				if _, err := enc.w.Write(enc.buf[:n]); err != nil {
   114  					return err
   115  				}
   116  			} else {
   117  				if _, err := enc.w.Write(pbutil.MustMarshal(&m.Entries[i])); err != nil {
   118  					return err
   119  				}
   120  			}
   121  			enc.index++
   122  		}
   123  		// write commit index
   124  		binary.BigEndian.PutUint64(enc.uint64buf, m.Commit)
   125  		if _, err := enc.w.Write(enc.uint64buf); err != nil {
   126  			return err
   127  		}
   128  		enc.fs.Succ(time.Since(start))
   129  	default:
   130  		if err := binary.Write(enc.w, binary.BigEndian, msgTypeApp); err != nil {
   131  			return err
   132  		}
   133  		// write size of message
   134  		if err := binary.Write(enc.w, binary.BigEndian, uint64(m.Size())); err != nil {
   135  			return err
   136  		}
   137  		// write message
   138  		if _, err := enc.w.Write(pbutil.MustMarshal(m)); err != nil {
   139  			return err
   140  		}
   141  
   142  		enc.term = m.Term
   143  		enc.index = m.Index
   144  		if l := len(m.Entries); l > 0 {
   145  			enc.index = m.Entries[l-1].Index
   146  		}
   147  		enc.fs.Succ(time.Since(start))
   148  	}
   149  	return nil
   150  }
   151  
   152  type msgAppV2Decoder struct {
   153  	r             io.Reader
   154  	local, remote types.ID
   155  
   156  	term      uint64
   157  	index     uint64
   158  	buf       []byte
   159  	uint64buf []byte
   160  	uint8buf  []byte
   161  }
   162  
   163  func newMsgAppV2Decoder(r io.Reader, local, remote types.ID) *msgAppV2Decoder {
   164  	return &msgAppV2Decoder{
   165  		r:         r,
   166  		local:     local,
   167  		remote:    remote,
   168  		buf:       make([]byte, msgAppV2BufSize),
   169  		uint64buf: make([]byte, 8),
   170  		uint8buf:  make([]byte, 1),
   171  	}
   172  }
   173  
   174  func (dec *msgAppV2Decoder) decode() (raftpb.Message, error) {
   175  	var (
   176  		m   raftpb.Message
   177  		typ uint8
   178  	)
   179  	if _, err := io.ReadFull(dec.r, dec.uint8buf); err != nil {
   180  		return m, err
   181  	}
   182  	typ = uint8(dec.uint8buf[0])
   183  	switch typ {
   184  	case msgTypeLinkHeartbeat:
   185  		return linkHeartbeatMessage, nil
   186  	case msgTypeAppEntries:
   187  		m = raftpb.Message{
   188  			Type:    raftpb.MsgApp,
   189  			From:    uint64(dec.remote),
   190  			To:      uint64(dec.local),
   191  			Term:    dec.term,
   192  			LogTerm: dec.term,
   193  			Index:   dec.index,
   194  		}
   195  
   196  		// decode entries
   197  		if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil {
   198  			return m, err
   199  		}
   200  		l := binary.BigEndian.Uint64(dec.uint64buf)
   201  		m.Entries = make([]raftpb.Entry, int(l))
   202  		for i := 0; i < int(l); i++ {
   203  			if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil {
   204  				return m, err
   205  			}
   206  			size := binary.BigEndian.Uint64(dec.uint64buf)
   207  			var buf []byte
   208  			if size < msgAppV2BufSize {
   209  				buf = dec.buf[:size]
   210  				if _, err := io.ReadFull(dec.r, buf); err != nil {
   211  					return m, err
   212  				}
   213  			} else {
   214  				buf = make([]byte, int(size))
   215  				if _, err := io.ReadFull(dec.r, buf); err != nil {
   216  					return m, err
   217  				}
   218  			}
   219  			dec.index++
   220  			// 1 alloc
   221  			pbutil.MustUnmarshal(&m.Entries[i], buf)
   222  		}
   223  		// decode commit index
   224  		if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil {
   225  			return m, err
   226  		}
   227  		m.Commit = binary.BigEndian.Uint64(dec.uint64buf)
   228  	case msgTypeApp:
   229  		var size uint64
   230  		if err := binary.Read(dec.r, binary.BigEndian, &size); err != nil {
   231  			return m, err
   232  		}
   233  		buf := make([]byte, int(size))
   234  		if _, err := io.ReadFull(dec.r, buf); err != nil {
   235  			return m, err
   236  		}
   237  		pbutil.MustUnmarshal(&m, buf)
   238  
   239  		dec.term = m.Term
   240  		dec.index = m.Index
   241  		if l := len(m.Entries); l > 0 {
   242  			dec.index = m.Entries[l-1].Index
   243  		}
   244  	default:
   245  		return m, fmt.Errorf("failed to parse type %d in msgappv2 stream", typ)
   246  	}
   247  	return m, nil
   248  }