github.com/cloudwego/kitex@v0.9.0/pkg/remote/trans/nphttp2/grpc/http2_server.go (about)

     1  /*
     2   *
     3   * Copyright 2014 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   * This file may have been modified by CloudWeGo authors. All CloudWeGo
    18   * Modifications are Copyright 2021 CloudWeGo Authors.
    19   */
    20  
    21  package grpc
    22  
    23  import (
    24  	"bytes"
    25  	"context"
    26  	"errors"
    27  	"fmt"
    28  	"io"
    29  	"math"
    30  	"math/rand"
    31  	"net"
    32  	"strconv"
    33  	"sync"
    34  	"sync/atomic"
    35  	"time"
    36  
    37  	"github.com/cloudwego/kitex/pkg/remote/codec/protobuf/encoding"
    38  
    39  	"github.com/cloudwego/netpoll"
    40  	"golang.org/x/net/http2"
    41  	"golang.org/x/net/http2/hpack"
    42  	"google.golang.org/protobuf/proto"
    43  
    44  	"github.com/cloudwego/kitex/pkg/gofunc"
    45  	"github.com/cloudwego/kitex/pkg/klog"
    46  	"github.com/cloudwego/kitex/pkg/remote/trans/nphttp2/grpc/grpcframe"
    47  	"github.com/cloudwego/kitex/pkg/remote/trans/nphttp2/metadata"
    48  	"github.com/cloudwego/kitex/pkg/remote/trans/nphttp2/status"
    49  	"github.com/cloudwego/kitex/pkg/utils"
    50  )
    51  
    52  var (
    53  	// ErrIllegalHeaderWrite indicates that setting header is illegal because of
    54  	// the stream's state.
    55  	ErrIllegalHeaderWrite = errors.New("transport: the stream is done or WriteHeader was already called")
    56  	// ErrHeaderListSizeLimitViolation indicates that the header list size is larger
    57  	// than the limit set by peer.
    58  	ErrHeaderListSizeLimitViolation = errors.New("transport: trying to send header list size larger than the limit set by peer")
    59  )
    60  
    61  func init() {
    62  	rand.Seed(time.Now().UnixNano())
    63  }
    64  
    65  // http2Server implements the ServerTransport interface with HTTP2.
    66  type http2Server struct {
    67  	lastRead    int64
    68  	ctx         context.Context
    69  	done        chan struct{}
    70  	conn        net.Conn
    71  	loopy       *loopyWriter
    72  	readerDone  chan struct{} // sync point to enable testing.
    73  	writerDone  chan struct{} // sync point to enable testing.
    74  	remoteAddr  net.Addr
    75  	localAddr   net.Addr
    76  	maxStreamID uint32 // max stream ID ever seen
    77  	framer      *framer
    78  	// The max number of concurrent streams.
    79  	maxStreams uint32
    80  	// controlBuf delivers all the control related tasks (e.g., window
    81  	// updates, reset streams, and various settings) to the controller.
    82  	controlBuf *controlBuffer
    83  	fc         *trInFlow
    84  	// Keepalive and max-age parameters for the server.
    85  	kp ServerKeepalive
    86  	// Keepalive enforcement policy.
    87  	kep EnforcementPolicy
    88  	// The time instance last ping was received.
    89  	lastPingAt time.Time
    90  	// Number of times the client has violated keepalive ping policy so far.
    91  	pingStrikes uint8
    92  	// Flag to signify that number of ping strikes should be reset to 0.
    93  	// This is set whenever data or header frames are sent.
    94  	// 1 means yes.
    95  	resetPingStrikes      uint32 // Accessed atomically.
    96  	initialWindowSize     int32
    97  	bdpEst                *bdpEstimator
    98  	maxSendHeaderListSize *uint32
    99  
   100  	mu sync.Mutex // guard the following
   101  	// drainChan is initialized when drain(...) is called the first time.
   102  	// After which the server writes out the first GoAway(with ID 2^31-1) frame.
   103  	// Then an independent goroutine will be launched to later send the second GoAway.
   104  	// During this time we don't want to write another first GoAway(with ID 2^31 -1) frame.
   105  	// Thus call to drain(...) will be a no-op if drainChan is already initialized since draining is
   106  	// already underway.
   107  	drainChan     chan struct{}
   108  	state         transportState
   109  	activeStreams map[uint32]*Stream
   110  	// idle is the time instant when the connection went idle.
   111  	// This is either the beginning of the connection or when the number of
   112  	// RPCs go down to 0.
   113  	// When the connection is busy, this value is set to 0.
   114  	idle time.Time
   115  
   116  	bufferPool *bufferPool
   117  }
   118  
   119  // newHTTP2Server constructs a ServerTransport based on HTTP2. ConnectionError is
   120  // returned if something goes wrong.
   121  func newHTTP2Server(ctx context.Context, conn net.Conn, config *ServerConfig) (_ ServerTransport, err error) {
   122  	maxHeaderListSize := defaultServerMaxHeaderListSize
   123  	if config.MaxHeaderListSize != nil {
   124  		maxHeaderListSize = *config.MaxHeaderListSize
   125  	}
   126  
   127  	framer := newFramer(conn, config.WriteBufferSize, config.ReadBufferSize, maxHeaderListSize)
   128  	// Send initial settings as connection preface to client.
   129  	isettings := []http2.Setting{{
   130  		ID:  http2.SettingMaxFrameSize,
   131  		Val: http2MaxFrameLen,
   132  	}}
   133  
   134  	// 0 is permitted in the HTTP2 spec.
   135  	maxStreams := config.MaxStreams
   136  	if maxStreams == 0 {
   137  		maxStreams = math.MaxUint32
   138  	} else {
   139  		isettings = append(isettings, http2.Setting{
   140  			ID:  http2.SettingMaxConcurrentStreams,
   141  			Val: maxStreams,
   142  		})
   143  	}
   144  
   145  	dynamicWindow := true
   146  	iwz := initialWindowSize
   147  	if config.InitialWindowSize >= defaultWindowSize {
   148  		iwz = config.InitialWindowSize
   149  		dynamicWindow = false
   150  
   151  		isettings = append(isettings, http2.Setting{
   152  			ID:  http2.SettingInitialWindowSize,
   153  			Val: iwz,
   154  		})
   155  	}
   156  	icwz := initialWindowSize
   157  	if config.InitialConnWindowSize >= defaultWindowSize {
   158  		icwz = config.InitialConnWindowSize
   159  		dynamicWindow = false
   160  	}
   161  	if config.MaxHeaderListSize != nil {
   162  		isettings = append(isettings, http2.Setting{
   163  			ID:  http2.SettingMaxHeaderListSize,
   164  			Val: *config.MaxHeaderListSize,
   165  		})
   166  	}
   167  
   168  	if err := framer.WriteSettings(isettings...); err != nil {
   169  		return nil, connectionErrorf(false, err, "transport: %v", err)
   170  	}
   171  
   172  	// Adjust the connection flow control window if needed.
   173  	if icwz > defaultWindowSize {
   174  		if delta := icwz - defaultWindowSize; delta > 0 {
   175  			if err := framer.WriteWindowUpdate(0, delta); err != nil {
   176  				return nil, connectionErrorf(false, err, "transport: %v", err)
   177  			}
   178  		}
   179  	}
   180  	kp := config.KeepaliveParams
   181  	if kp.MaxConnectionIdle == 0 {
   182  		kp.MaxConnectionIdle = defaultMaxConnectionIdle
   183  	}
   184  	if kp.MaxConnectionAge == 0 {
   185  		kp.MaxConnectionAge = defaultMaxConnectionAge
   186  	}
   187  	if kp.MaxConnectionAgeGrace == 0 {
   188  		kp.MaxConnectionAgeGrace = defaultMaxConnectionAgeGrace
   189  	}
   190  	if kp.Time == 0 {
   191  		kp.Time = defaultServerKeepaliveTime
   192  	}
   193  	if kp.Timeout == 0 {
   194  		kp.Timeout = defaultServerKeepaliveTimeout
   195  	}
   196  	kep := config.KeepaliveEnforcementPolicy
   197  	if kep.MinTime == 0 {
   198  		kep.MinTime = defaultKeepalivePolicyMinTime
   199  	}
   200  
   201  	done := make(chan struct{})
   202  	t := &http2Server{
   203  		ctx:               ctx,
   204  		done:              done,
   205  		conn:              conn,
   206  		remoteAddr:        conn.RemoteAddr(),
   207  		localAddr:         conn.LocalAddr(),
   208  		framer:            framer,
   209  		readerDone:        make(chan struct{}),
   210  		writerDone:        make(chan struct{}),
   211  		maxStreams:        math.MaxUint32,
   212  		fc:                &trInFlow{limit: icwz},
   213  		state:             reachable,
   214  		activeStreams:     make(map[uint32]*Stream),
   215  		kp:                kp,
   216  		kep:               kep,
   217  		idle:              time.Now(),
   218  		initialWindowSize: int32(iwz),
   219  		bufferPool:        newBufferPool(),
   220  	}
   221  	t.controlBuf = newControlBuffer(t.done)
   222  	if dynamicWindow {
   223  		t.bdpEst = &bdpEstimator{
   224  			bdp:               initialWindowSize,
   225  			updateFlowControl: t.updateFlowControl,
   226  		}
   227  	}
   228  
   229  	t.framer.writer.Flush()
   230  
   231  	defer func() {
   232  		if err != nil {
   233  			t.Close()
   234  		}
   235  	}()
   236  
   237  	// Check the validity of client preface.
   238  	preface := make([]byte, len(ClientPreface))
   239  	if _, err := io.ReadFull(t.conn, preface); err != nil {
   240  		// In deployments where a gRPC server runs behind a cloud load balancer
   241  		// which performs regular TCP level health checks, the connection is
   242  		// closed immediately by the latter.  Returning io.EOF here allows the
   243  		// grpc server implementation to recognize this scenario and suppress
   244  		// logging to reduce spam.
   245  		if err == io.EOF {
   246  			return nil, io.EOF
   247  		}
   248  		return nil, connectionErrorf(false, err, "transport: http2Server.HandleStreams failed to receive the preface from client: %v", err)
   249  	}
   250  	if !bytes.Equal(preface, ClientPreface) {
   251  		return nil, connectionErrorf(false, nil, "transport: http2Server.HandleStreams received bogus greeting from client: %q", preface)
   252  	}
   253  
   254  	frame, err := t.framer.ReadFrame()
   255  	if err == io.EOF || err == io.ErrUnexpectedEOF {
   256  		return nil, err
   257  	}
   258  	if err != nil {
   259  		return nil, connectionErrorf(false, err, "transport: http2Server.HandleStreams failed to read initial settings frame: %v", err)
   260  	}
   261  	atomic.StoreInt64(&t.lastRead, time.Now().UnixNano())
   262  	sf, ok := frame.(*grpcframe.SettingsFrame)
   263  	if !ok {
   264  		return nil, connectionErrorf(false, nil, "transport: http2Server.HandleStreams saw invalid preface type %T from client", frame)
   265  	}
   266  	t.handleSettings(sf)
   267  
   268  	gofunc.RecoverGoFuncWithInfo(ctx, func() {
   269  		t.loopy = newLoopyWriter(serverSide, t.framer, t.controlBuf, t.bdpEst)
   270  		t.loopy.ssGoAwayHandler = t.outgoingGoAwayHandler
   271  		if err := t.loopy.run(conn.RemoteAddr().String()); err != nil {
   272  			klog.CtxErrorf(ctx, "KITEX: grpc server loopyWriter.run returning, error=%v", err)
   273  		}
   274  		t.conn.Close()
   275  		close(t.writerDone)
   276  	}, gofunc.NewBasicInfo("", conn.RemoteAddr().String()))
   277  
   278  	gofunc.RecoverGoFuncWithInfo(ctx, t.keepalive, gofunc.NewBasicInfo("", conn.RemoteAddr().String()))
   279  	return t, nil
   280  }
   281  
   282  // operateHeader takes action on the decoded headers.
   283  func (t *http2Server) operateHeaders(frame *grpcframe.MetaHeadersFrame, handle func(*Stream), traceCtx func(context.Context, string) context.Context) (fatal bool) {
   284  	streamID := frame.Header().StreamID
   285  	state := &decodeState{
   286  		serverSide: true,
   287  	}
   288  	if err := state.decodeHeader(frame); err != nil {
   289  		if se, ok := status.FromError(err); ok {
   290  			t.controlBuf.put(&cleanupStream{
   291  				streamID: streamID,
   292  				rst:      true,
   293  				rstCode:  statusCodeConvTab[se.Code()],
   294  				onWrite:  func() {},
   295  			})
   296  		}
   297  		return false
   298  	}
   299  
   300  	buf := newRecvBuffer()
   301  	s := &Stream{
   302  		id:             streamID,
   303  		st:             t,
   304  		buf:            buf,
   305  		fc:             &inFlow{limit: uint32(t.initialWindowSize)},
   306  		recvCompress:   state.data.encoding,
   307  		sendCompress:   state.data.acceptEncoding,
   308  		method:         state.data.method,
   309  		contentSubtype: state.data.contentSubtype,
   310  	}
   311  	if frame.StreamEnded() {
   312  		// s is just created by the caller. No lock needed.
   313  		s.state = streamReadDone
   314  	}
   315  	if state.data.timeoutSet {
   316  		s.ctx, s.cancel = context.WithTimeout(t.ctx, state.data.timeout)
   317  	} else {
   318  		s.ctx, s.cancel = context.WithCancel(t.ctx)
   319  	}
   320  	// Attach the received metadata to the context.
   321  	if len(state.data.mdata) > 0 {
   322  		s.ctx = metadata.NewIncomingContext(s.ctx, state.data.mdata)
   323  	}
   324  
   325  	t.mu.Lock()
   326  	if t.state != reachable {
   327  		t.mu.Unlock()
   328  		s.cancel()
   329  		return false
   330  	}
   331  	if uint32(len(t.activeStreams)) >= t.maxStreams {
   332  		t.mu.Unlock()
   333  		t.controlBuf.put(&cleanupStream{
   334  			streamID: streamID,
   335  			rst:      true,
   336  			rstCode:  http2.ErrCodeRefusedStream,
   337  			onWrite:  func() {},
   338  		})
   339  		s.cancel()
   340  		return false
   341  	}
   342  	if streamID%2 != 1 || streamID <= t.maxStreamID {
   343  		t.mu.Unlock()
   344  		// illegal gRPC stream id.
   345  		klog.CtxErrorf(s.ctx, "transport: http2Server.HandleStreams received an illegal stream id: %v", streamID)
   346  		s.cancel()
   347  		return true
   348  	}
   349  	t.maxStreamID = streamID
   350  	t.activeStreams[streamID] = s
   351  	if len(t.activeStreams) == 1 {
   352  		t.idle = time.Time{}
   353  	}
   354  	t.mu.Unlock()
   355  	s.requestRead = func(n int) {
   356  		t.adjustWindow(s, uint32(n))
   357  	}
   358  	s.ctx = traceCtx(s.ctx, s.method)
   359  	s.ctxDone = s.ctx.Done()
   360  	s.wq = newWriteQuota(defaultWriteQuota, s.ctxDone)
   361  	s.trReader = &transportReader{
   362  		reader: &recvBufferReader{
   363  			ctx:        s.ctx,
   364  			ctxDone:    s.ctxDone,
   365  			recv:       s.buf,
   366  			freeBuffer: t.bufferPool.put,
   367  		},
   368  		windowHandler: func(n int) {
   369  			t.updateWindow(s, uint32(n))
   370  		},
   371  	}
   372  	// Register the stream with loopy.
   373  	t.controlBuf.put(&registerStream{
   374  		streamID: s.id,
   375  		wq:       s.wq,
   376  	})
   377  	handle(s)
   378  	return false
   379  }
   380  
   381  // HandleStreams receives incoming streams using the given handler. This is
   382  // typically run in a separate goroutine.
   383  // traceCtx attaches trace to ctx and returns the new context.
   384  func (t *http2Server) HandleStreams(handle func(*Stream), traceCtx func(context.Context, string) context.Context) {
   385  	defer close(t.readerDone)
   386  	for {
   387  		t.controlBuf.throttle()
   388  		frame, err := t.framer.ReadFrame()
   389  		atomic.StoreInt64(&t.lastRead, time.Now().UnixNano())
   390  		if err != nil {
   391  			if se, ok := err.(http2.StreamError); ok {
   392  				klog.CtxWarnf(t.ctx, "transport: http2Server.HandleStreams encountered http2.StreamError: %v", se)
   393  				t.mu.Lock()
   394  				s := t.activeStreams[se.StreamID]
   395  				t.mu.Unlock()
   396  				if s != nil {
   397  					t.closeStream(s, true, se.Code, false)
   398  				} else {
   399  					t.controlBuf.put(&cleanupStream{
   400  						streamID: se.StreamID,
   401  						rst:      true,
   402  						rstCode:  se.Code,
   403  						onWrite:  func() {},
   404  					})
   405  				}
   406  				continue
   407  			}
   408  			if err == io.EOF || err == io.ErrUnexpectedEOF || errors.Is(err, netpoll.ErrEOF) {
   409  				t.Close()
   410  				return
   411  			}
   412  			klog.CtxWarnf(t.ctx, "transport: http2Server.HandleStreams failed to read frame: %v", err)
   413  			t.Close()
   414  			return
   415  		}
   416  		switch frame := frame.(type) {
   417  		case *grpcframe.MetaHeadersFrame:
   418  			if t.operateHeaders(frame, handle, traceCtx) {
   419  				t.Close()
   420  				break
   421  			}
   422  		case *grpcframe.DataFrame:
   423  			t.handleData(frame)
   424  		case *http2.RSTStreamFrame:
   425  			t.handleRSTStream(frame)
   426  		case *grpcframe.SettingsFrame:
   427  			t.handleSettings(frame)
   428  		case *http2.PingFrame:
   429  			t.handlePing(frame)
   430  		case *http2.WindowUpdateFrame:
   431  			t.handleWindowUpdate(frame)
   432  		case *grpcframe.GoAwayFrame:
   433  			// TODO: Handle GoAway from the client appropriately.
   434  		default:
   435  			klog.CtxErrorf(t.ctx, "transport: http2Server.HandleStreams found unhandled frame type %v.", frame)
   436  		}
   437  		t.framer.reader.Release()
   438  	}
   439  }
   440  
   441  func (t *http2Server) getStream(f http2.Frame) (*Stream, bool) {
   442  	t.mu.Lock()
   443  	defer t.mu.Unlock()
   444  	if t.activeStreams == nil {
   445  		// The transport is closing.
   446  		return nil, false
   447  	}
   448  	s, ok := t.activeStreams[f.Header().StreamID]
   449  	if !ok {
   450  		// The stream is already done.
   451  		return nil, false
   452  	}
   453  	return s, true
   454  }
   455  
   456  // adjustWindow sends out extra window update over the initial window size
   457  // of stream if the application is requesting data larger in size than
   458  // the window.
   459  func (t *http2Server) adjustWindow(s *Stream, n uint32) {
   460  	if w := s.fc.maybeAdjust(n); w > 0 {
   461  		t.controlBuf.put(&outgoingWindowUpdate{streamID: s.id, increment: w})
   462  	}
   463  }
   464  
   465  // updateFlowControl updates the incoming flow control windows
   466  // for the transport and the stream based on the current bdp
   467  // estimation.
   468  func (t *http2Server) updateFlowControl(n uint32) {
   469  	t.mu.Lock()
   470  	for _, s := range t.activeStreams {
   471  		s.fc.newLimit(n)
   472  	}
   473  	t.initialWindowSize = int32(n)
   474  	t.mu.Unlock()
   475  	t.controlBuf.put(&outgoingWindowUpdate{
   476  		streamID:  0,
   477  		increment: t.fc.newLimit(n),
   478  	})
   479  	t.controlBuf.put(&outgoingSettings{
   480  		ss: []http2.Setting{
   481  			{
   482  				ID:  http2.SettingInitialWindowSize,
   483  				Val: n,
   484  			},
   485  		},
   486  	})
   487  }
   488  
   489  // updateWindow adjusts the inbound quota for the stream and the transport.
   490  // Window updates will deliver to the controller for sending when
   491  // the cumulative quota exceeds the corresponding threshold.
   492  func (t *http2Server) updateWindow(s *Stream, n uint32) {
   493  	if w := s.fc.onRead(n); w > 0 {
   494  		t.controlBuf.put(&outgoingWindowUpdate{
   495  			streamID:  s.id,
   496  			increment: w,
   497  		})
   498  	}
   499  }
   500  
   501  func (t *http2Server) handleData(f *grpcframe.DataFrame) {
   502  	size := f.Header().Length
   503  	var sendBDPPing bool
   504  	if t.bdpEst != nil {
   505  		sendBDPPing = t.bdpEst.add(size)
   506  	}
   507  	// Decouple connection's flow control from application's read.
   508  	// An update on connection's flow control should not depend on
   509  	// whether user application has read the data or not. Such a
   510  	// restriction is already imposed on the stream's flow control,
   511  	// and therefore the sender will be blocked anyways.
   512  	// Decoupling the connection flow control will prevent other
   513  	// active(fast) streams from starving in presence of slow or
   514  	// inactive streams.
   515  	if w := t.fc.onData(size); w > 0 {
   516  		t.controlBuf.put(&outgoingWindowUpdate{
   517  			streamID:  0,
   518  			increment: w,
   519  		})
   520  	}
   521  	if sendBDPPing {
   522  		// Avoid excessive ping detection (e.g. in an L7 proxy)
   523  		// by sending a window update prior to the BDP ping.
   524  		if w := t.fc.reset(); w > 0 {
   525  			t.controlBuf.put(&outgoingWindowUpdate{
   526  				streamID:  0,
   527  				increment: w,
   528  			})
   529  		}
   530  		t.controlBuf.put(bdpPing)
   531  	}
   532  	// Select the right stream to dispatch.
   533  	s, ok := t.getStream(f)
   534  	if !ok {
   535  		return
   536  	}
   537  	if size > 0 {
   538  		if err := s.fc.onData(size); err != nil {
   539  			t.closeStream(s, true, http2.ErrCodeFlowControl, false)
   540  			return
   541  		}
   542  		if f.Header().Flags.Has(http2.FlagDataPadded) {
   543  			if w := s.fc.onRead(size - uint32(len(f.Data()))); w > 0 {
   544  				t.controlBuf.put(&outgoingWindowUpdate{s.id, w})
   545  			}
   546  		}
   547  		// TODO(bradfitz, zhaoq): A copy is required here because there is no
   548  		// guarantee f.Data() is consumed before the arrival of next frame.
   549  		// Can this copy be eliminated?
   550  		if len(f.Data()) > 0 {
   551  			buffer := t.bufferPool.get()
   552  			buffer.Reset()
   553  			buffer.Write(f.Data())
   554  			s.write(recvMsg{buffer: buffer})
   555  		}
   556  	}
   557  	if f.Header().Flags.Has(http2.FlagDataEndStream) {
   558  		// Received the end of stream from the client.
   559  		s.compareAndSwapState(streamActive, streamReadDone)
   560  		s.write(recvMsg{err: io.EOF})
   561  	}
   562  }
   563  
   564  func (t *http2Server) handleRSTStream(f *http2.RSTStreamFrame) {
   565  	// If the stream is not deleted from the transport's active streams map, then do a regular close stream.
   566  	if s, ok := t.getStream(f); ok {
   567  		t.closeStream(s, false, 0, false)
   568  		return
   569  	}
   570  	// If the stream is already deleted from the active streams map, then put a cleanupStream item into controlbuf to delete the stream from loopy writer's established streams map.
   571  	t.controlBuf.put(&cleanupStream{
   572  		streamID: f.Header().StreamID,
   573  		rst:      false,
   574  		rstCode:  0,
   575  		onWrite:  func() {},
   576  	})
   577  }
   578  
   579  func (t *http2Server) handleSettings(f *grpcframe.SettingsFrame) {
   580  	if f.IsAck() {
   581  		return
   582  	}
   583  	var ss []http2.Setting
   584  	var updateFuncs []func()
   585  	f.ForeachSetting(func(s http2.Setting) error {
   586  		switch s.ID {
   587  		case http2.SettingMaxHeaderListSize:
   588  			updateFuncs = append(updateFuncs, func() {
   589  				t.maxSendHeaderListSize = new(uint32)
   590  				*t.maxSendHeaderListSize = s.Val
   591  			})
   592  		default:
   593  			ss = append(ss, s)
   594  		}
   595  		return nil
   596  	})
   597  	t.controlBuf.executeAndPut(func(interface{}) bool {
   598  		for _, f := range updateFuncs {
   599  			f()
   600  		}
   601  		return true
   602  	}, &incomingSettings{
   603  		ss: ss,
   604  	})
   605  }
   606  
   607  const (
   608  	maxPingStrikes     = 2
   609  	defaultPingTimeout = 2 * time.Hour
   610  )
   611  
   612  func (t *http2Server) handlePing(f *http2.PingFrame) {
   613  	if f.IsAck() {
   614  		if f.Data == goAwayPing.data && t.drainChan != nil {
   615  			close(t.drainChan)
   616  			return
   617  		}
   618  		// Maybe it's a BDP ping.
   619  		if t.bdpEst != nil {
   620  			t.bdpEst.calculate(f.Data)
   621  		}
   622  		return
   623  	}
   624  	pingAck := &ping{ack: true}
   625  	copy(pingAck.data[:], f.Data[:])
   626  	t.controlBuf.put(pingAck)
   627  
   628  	now := time.Now()
   629  	defer func() {
   630  		t.lastPingAt = now
   631  	}()
   632  	// A reset ping strikes means that we don't need to check for policy
   633  	// violation for this ping and the pingStrikes counter should be set
   634  	// to 0.
   635  	if atomic.CompareAndSwapUint32(&t.resetPingStrikes, 1, 0) {
   636  		t.pingStrikes = 0
   637  		return
   638  	}
   639  	t.mu.Lock()
   640  	ns := len(t.activeStreams)
   641  	t.mu.Unlock()
   642  	if ns < 1 && !t.kep.PermitWithoutStream {
   643  		// Keepalive shouldn't be active thus, this new ping should
   644  		// have come after at least defaultPingTimeout.
   645  		if t.lastPingAt.Add(defaultPingTimeout).After(now) {
   646  			t.pingStrikes++
   647  		}
   648  	} else {
   649  		// Check if keepalive policy is respected.
   650  		if t.lastPingAt.Add(t.kep.MinTime).After(now) {
   651  			t.pingStrikes++
   652  		}
   653  	}
   654  
   655  	if t.pingStrikes > maxPingStrikes {
   656  		// Send goaway and close the connection.
   657  		klog.CtxErrorf(t.ctx, "transport: Got too many pings from the client, closing the connection.")
   658  		t.controlBuf.put(&goAway{code: http2.ErrCodeEnhanceYourCalm, debugData: []byte("too_many_pings"), closeConn: true})
   659  	}
   660  }
   661  
   662  func (t *http2Server) handleWindowUpdate(f *http2.WindowUpdateFrame) {
   663  	t.controlBuf.put(&incomingWindowUpdate{
   664  		streamID:  f.Header().StreamID,
   665  		increment: f.Increment,
   666  	})
   667  }
   668  
   669  func appendHeaderFieldsFromMD(headerFields []hpack.HeaderField, md metadata.MD) []hpack.HeaderField {
   670  	for k, vv := range md {
   671  		if isReservedHeader(k) {
   672  			// Clients don't tolerate reading restricted headers after some non restricted ones were sent.
   673  			continue
   674  		}
   675  		for _, v := range vv {
   676  			headerFields = append(headerFields, hpack.HeaderField{Name: k, Value: encodeMetadataHeader(k, v)})
   677  		}
   678  	}
   679  	return headerFields
   680  }
   681  
   682  func (t *http2Server) checkForHeaderListSize(it interface{}) bool {
   683  	if t.maxSendHeaderListSize == nil {
   684  		return true
   685  	}
   686  	hdrFrame := it.(*headerFrame)
   687  	var sz int64
   688  	for _, f := range hdrFrame.hf {
   689  		if sz += int64(f.Size()); sz > int64(*t.maxSendHeaderListSize) {
   690  			klog.CtxErrorf(t.ctx, "header list size to send violates the maximum size (%d bytes) set by client", *t.maxSendHeaderListSize)
   691  			return false
   692  		}
   693  	}
   694  	return true
   695  }
   696  
   697  // WriteHeader sends the header metadata md back to the client.
   698  func (t *http2Server) WriteHeader(s *Stream, md metadata.MD) error {
   699  	if s.updateHeaderSent() || s.getState() == streamDone {
   700  		return ErrIllegalHeaderWrite
   701  	}
   702  	s.hdrMu.Lock()
   703  	if md.Len() > 0 {
   704  		if s.header.Len() > 0 {
   705  			s.header = metadata.AppendMD(s.header, md)
   706  		} else {
   707  			s.header = md
   708  		}
   709  	}
   710  	if err := t.writeHeaderLocked(s); err != nil {
   711  		s.hdrMu.Unlock()
   712  		return err
   713  	}
   714  	s.hdrMu.Unlock()
   715  	return nil
   716  }
   717  
   718  func (t *http2Server) setResetPingStrikes() {
   719  	atomic.StoreUint32(&t.resetPingStrikes, 1)
   720  }
   721  
   722  func (t *http2Server) writeHeaderLocked(s *Stream) error {
   723  	// first and create a slice of that exact size.
   724  	headerFields := make([]hpack.HeaderField, 0, 3+s.header.Len()) // at least :status, content-type will be there if none else.
   725  	headerFields = append(headerFields, hpack.HeaderField{Name: ":status", Value: "200"})
   726  	headerFields = append(headerFields, hpack.HeaderField{Name: "content-type", Value: contentType(s.contentSubtype)})
   727  	sendCompress := encoding.FindCompressorName(s.sendCompress)
   728  	if sendCompress != "" {
   729  		headerFields = append(headerFields, hpack.HeaderField{Name: "grpc-encoding", Value: sendCompress})
   730  	}
   731  	headerFields = appendHeaderFieldsFromMD(headerFields, s.header)
   732  	success, err := t.controlBuf.executeAndPut(t.checkForHeaderListSize, &headerFrame{
   733  		streamID:  s.id,
   734  		hf:        headerFields,
   735  		endStream: false,
   736  		onWrite:   t.setResetPingStrikes,
   737  	})
   738  	if !success {
   739  		if err != nil {
   740  			return err
   741  		}
   742  		t.closeStream(s, true, http2.ErrCodeInternal, false)
   743  		return ErrHeaderListSizeLimitViolation
   744  	}
   745  	return nil
   746  }
   747  
   748  // WriteStatus sends stream status to the client and terminates the stream.
   749  // There is no further I/O operations being able to perform on this stream.
   750  // TODO(zhaoq): Now it indicates the end of entire stream. Revisit if early
   751  // OK is adopted.
   752  func (t *http2Server) WriteStatus(s *Stream, st *status.Status) error {
   753  	if s.getState() == streamDone {
   754  		return nil
   755  	}
   756  	s.hdrMu.Lock()
   757  	// TODO(mmukhi): Benchmark if the performance gets better if count the metadata and other header fields
   758  	// first and create a slice of that exact size.
   759  	headerFields := make([]hpack.HeaderField, 0, 2) // grpc-status and grpc-message will be there if none else.
   760  	if !s.updateHeaderSent() {                      // No headers have been sent.
   761  		if len(s.header) > 0 { // Send a separate header frame.
   762  			if err := t.writeHeaderLocked(s); err != nil {
   763  				s.hdrMu.Unlock()
   764  				return err
   765  			}
   766  		} else { // Send a trailer only response.
   767  			headerFields = append(headerFields, hpack.HeaderField{Name: ":status", Value: "200"})
   768  			headerFields = append(headerFields, hpack.HeaderField{Name: "content-type", Value: contentType(s.contentSubtype)})
   769  		}
   770  	}
   771  	headerFields = append(headerFields, hpack.HeaderField{Name: "grpc-status", Value: strconv.Itoa(int(st.Code()))})
   772  	headerFields = append(headerFields, hpack.HeaderField{Name: "grpc-message", Value: encodeGrpcMessage(st.Message())})
   773  	if bizStatusErr := s.BizStatusErr(); bizStatusErr != nil {
   774  		headerFields = append(headerFields, hpack.HeaderField{Name: "biz-status", Value: strconv.Itoa(int(bizStatusErr.BizStatusCode()))})
   775  		if len(bizStatusErr.BizExtra()) != 0 {
   776  			value, _ := utils.Map2JSONStr(bizStatusErr.BizExtra())
   777  			headerFields = append(headerFields, hpack.HeaderField{Name: "biz-extra", Value: value})
   778  		}
   779  	}
   780  
   781  	if p := st.Proto(); p != nil && len(p.Details) > 0 {
   782  		stBytes, err := proto.Marshal(p)
   783  		if err != nil {
   784  			// TODO: return error instead, when callers are able to handle it.
   785  			klog.CtxErrorf(t.ctx, "transport: failed to marshal rpc status: %v, error: %v", p, err)
   786  		} else {
   787  			headerFields = append(headerFields, hpack.HeaderField{Name: "grpc-status-details-bin", Value: encodeBinHeader(stBytes)})
   788  		}
   789  	}
   790  
   791  	// Attach the trailer metadata.
   792  	headerFields = appendHeaderFieldsFromMD(headerFields, s.trailer)
   793  	trailingHeader := &headerFrame{
   794  		streamID:  s.id,
   795  		hf:        headerFields,
   796  		endStream: true,
   797  		onWrite:   t.setResetPingStrikes,
   798  	}
   799  	s.hdrMu.Unlock()
   800  	success, err := t.controlBuf.execute(t.checkForHeaderListSize, trailingHeader)
   801  	if !success {
   802  		if err != nil {
   803  			return err
   804  		}
   805  		t.closeStream(s, true, http2.ErrCodeInternal, false)
   806  		return ErrHeaderListSizeLimitViolation
   807  	}
   808  	// Send a RST_STREAM after the trailers if the client has not already half-closed.
   809  	rst := s.getState() == streamActive
   810  	t.finishStream(s, rst, http2.ErrCodeNo, trailingHeader, true)
   811  	return nil
   812  }
   813  
   814  // Write converts the data into HTTP2 data frame and sends it out. Non-nil error
   815  // is returns if it fails (e.g., framing error, transport error).
   816  func (t *http2Server) Write(s *Stream, hdr, data []byte, opts *Options) error {
   817  	if !s.isHeaderSent() { // Headers haven't been written yet.
   818  		if err := t.WriteHeader(s, nil); err != nil {
   819  			return err
   820  		}
   821  	} else {
   822  		// Writing headers checks for this condition.
   823  		if s.getState() == streamDone {
   824  			// TODO(mmukhi, dfawley): Should the server write also return io.EOF?
   825  			s.cancel()
   826  			select {
   827  			case <-t.done:
   828  				return ErrConnClosing
   829  			default:
   830  			}
   831  			return ContextErr(s.ctx.Err())
   832  		}
   833  	}
   834  	df := &dataFrame{
   835  		streamID:    s.id,
   836  		h:           hdr,
   837  		d:           data,
   838  		onEachWrite: t.setResetPingStrikes,
   839  	}
   840  	if len(hdr) == 0 && len(data) != 0 {
   841  		df.dcache = data
   842  	}
   843  	if err := s.wq.get(int32(len(hdr) + len(data))); err != nil {
   844  		select {
   845  		case <-t.done:
   846  			return ErrConnClosing
   847  		default:
   848  		}
   849  		return ContextErr(s.ctx.Err())
   850  	}
   851  	return t.controlBuf.put(df)
   852  }
   853  
   854  // keepalive running in a separate goroutine does the following:
   855  // 1. Gracefully closes an idle connection after a duration of keepalive.MaxConnectionIdle.
   856  // 2. Gracefully closes any connection after a duration of keepalive.MaxConnectionAge.
   857  // 3. Forcibly closes a connection after an additive period of keepalive.MaxConnectionAgeGrace over keepalive.MaxConnectionAge.
   858  // 4. Makes sure a connection is alive by sending pings with a frequency of keepalive.Time and closes a non-responsive connection
   859  // after an additional duration of keepalive.Timeout.
   860  func (t *http2Server) keepalive() {
   861  	p := &ping{}
   862  	// True iff a ping has been sent, and no data has been received since then.
   863  	outstandingPing := false
   864  	// Amount of time remaining before which we should receive an ACK for the
   865  	// last sent ping.
   866  	kpTimeoutLeft := time.Duration(0)
   867  	// Records the last value of t.lastRead before we go block on the timer.
   868  	// This is required to check for read activity since then.
   869  	prevNano := time.Now().UnixNano()
   870  	// Initialize the different timers to their default values.
   871  	idleTimer := time.NewTimer(t.kp.MaxConnectionIdle)
   872  	ageTimer := time.NewTimer(t.kp.MaxConnectionAge)
   873  	kpTimer := time.NewTimer(t.kp.Time)
   874  	defer func() {
   875  		// We need to drain the underlying channel in these timers after a call
   876  		// to Stop(), only if we are interested in resetting them. Clearly we
   877  		// are not interested in resetting them here.
   878  		idleTimer.Stop()
   879  		ageTimer.Stop()
   880  		kpTimer.Stop()
   881  	}()
   882  
   883  	for {
   884  		select {
   885  		case <-idleTimer.C:
   886  			t.mu.Lock()
   887  			idle := t.idle
   888  			if idle.IsZero() { // The connection is non-idle.
   889  				t.mu.Unlock()
   890  				idleTimer.Reset(t.kp.MaxConnectionIdle)
   891  				continue
   892  			}
   893  			val := t.kp.MaxConnectionIdle - time.Since(idle)
   894  			t.mu.Unlock()
   895  			if val <= 0 {
   896  				// The connection has been idle for a duration of keepalive.MaxConnectionIdle or more.
   897  				// Gracefully close the connection.
   898  				t.drain(http2.ErrCodeNo, []byte{})
   899  				return
   900  			}
   901  			idleTimer.Reset(val)
   902  		case <-ageTimer.C:
   903  			t.drain(http2.ErrCodeNo, []byte{})
   904  			ageTimer.Reset(t.kp.MaxConnectionAgeGrace)
   905  			select {
   906  			case <-ageTimer.C:
   907  				// Close the connection after grace period.
   908  				klog.Infof("transport: closing server transport due to maximum connection age.")
   909  				t.Close()
   910  			case <-t.done:
   911  			}
   912  			return
   913  		case <-kpTimer.C:
   914  			lastRead := atomic.LoadInt64(&t.lastRead)
   915  			if lastRead > prevNano {
   916  				// There has been read activity since the last time we were
   917  				// here. Setup the timer to fire at kp.Time seconds from
   918  				// lastRead time and continue.
   919  				outstandingPing = false
   920  				kpTimer.Reset(time.Duration(lastRead) + t.kp.Time - time.Duration(time.Now().UnixNano()))
   921  				prevNano = lastRead
   922  				continue
   923  			}
   924  			if outstandingPing && kpTimeoutLeft <= 0 {
   925  				klog.Infof("transport: closing server transport due to idleness.")
   926  				t.Close()
   927  				return
   928  			}
   929  			if !outstandingPing {
   930  				t.controlBuf.put(p)
   931  				kpTimeoutLeft = t.kp.Timeout
   932  				outstandingPing = true
   933  			}
   934  			// The amount of time to sleep here is the minimum of kp.Time and
   935  			// timeoutLeft. This will ensure that we wait only for kp.Time
   936  			// before sending out the next ping (for cases where the ping is
   937  			// acked).
   938  			sleepDuration := minTime(t.kp.Time, kpTimeoutLeft)
   939  			kpTimeoutLeft -= sleepDuration
   940  			kpTimer.Reset(sleepDuration)
   941  		case <-t.done:
   942  			return
   943  		}
   944  	}
   945  }
   946  
   947  // Close starts shutting down the http2Server transport.
   948  // TODO(zhaoq): Now the destruction is not blocked on any pending streams. This
   949  // could cause some resource issue. Revisit this later.
   950  func (t *http2Server) Close() error {
   951  	t.mu.Lock()
   952  	if t.state == closing {
   953  		t.mu.Unlock()
   954  		return errors.New("transport: Close() was already called")
   955  	}
   956  	t.state = closing
   957  	streams := t.activeStreams
   958  	t.activeStreams = nil
   959  	t.mu.Unlock()
   960  	t.controlBuf.finish()
   961  	close(t.done)
   962  	err := t.conn.Close()
   963  	// Cancel all active streams.
   964  	for _, s := range streams {
   965  		s.cancel()
   966  	}
   967  	return err
   968  }
   969  
   970  // deleteStream deletes the stream s from transport's active streams.
   971  func (t *http2Server) deleteStream(s *Stream, eosReceived bool) {
   972  	// In case stream sending and receiving are invoked in separate
   973  	// goroutines (e.g., bi-directional streaming), cancel needs to be
   974  	// called to interrupt the potential blocking on other goroutines.
   975  	s.cancel()
   976  
   977  	t.mu.Lock()
   978  	if _, ok := t.activeStreams[s.id]; ok {
   979  		delete(t.activeStreams, s.id)
   980  		if len(t.activeStreams) == 0 {
   981  			t.idle = time.Now()
   982  		}
   983  	}
   984  	t.mu.Unlock()
   985  }
   986  
   987  // finishStream closes the stream and puts the trailing headerFrame into controlbuf.
   988  func (t *http2Server) finishStream(s *Stream, rst bool, rstCode http2.ErrCode, hdr *headerFrame, eosReceived bool) {
   989  	oldState := s.swapState(streamDone)
   990  	if oldState == streamDone {
   991  		// If the stream was already done, return.
   992  		return
   993  	}
   994  
   995  	hdr.cleanup = &cleanupStream{
   996  		streamID: s.id,
   997  		rst:      rst,
   998  		rstCode:  rstCode,
   999  		onWrite: func() {
  1000  			t.deleteStream(s, eosReceived)
  1001  		},
  1002  	}
  1003  	t.controlBuf.put(hdr)
  1004  }
  1005  
  1006  // closeStream clears the footprint of a stream when the stream is not needed any more.
  1007  func (t *http2Server) closeStream(s *Stream, rst bool, rstCode http2.ErrCode, eosReceived bool) {
  1008  	s.swapState(streamDone)
  1009  	t.deleteStream(s, eosReceived)
  1010  
  1011  	t.controlBuf.put(&cleanupStream{
  1012  		streamID: s.id,
  1013  		rst:      rst,
  1014  		rstCode:  rstCode,
  1015  		onWrite:  func() {},
  1016  	})
  1017  }
  1018  
  1019  func (t *http2Server) RemoteAddr() net.Addr {
  1020  	return t.remoteAddr
  1021  }
  1022  
  1023  func (t *http2Server) LocalAddr() net.Addr {
  1024  	return t.localAddr
  1025  }
  1026  
  1027  func (t *http2Server) Drain() {
  1028  	t.drain(http2.ErrCodeNo, []byte{})
  1029  }
  1030  
  1031  func (t *http2Server) drain(code http2.ErrCode, debugData []byte) {
  1032  	t.mu.Lock()
  1033  	defer t.mu.Unlock()
  1034  	if t.drainChan != nil {
  1035  		return
  1036  	}
  1037  	t.drainChan = make(chan struct{})
  1038  	t.controlBuf.put(&goAway{code: code, debugData: debugData, headsUp: true})
  1039  }
  1040  
  1041  var goAwayPing = &ping{data: [8]byte{1, 6, 1, 8, 0, 3, 3, 9}}
  1042  
  1043  // Handles outgoing GoAway and returns true if loopy needs to put itself
  1044  // in draining mode.
  1045  func (t *http2Server) outgoingGoAwayHandler(g *goAway) (bool, error) {
  1046  	t.mu.Lock()
  1047  	if t.state == closing { // TODO(mmukhi): This seems unnecessary.
  1048  		t.mu.Unlock()
  1049  		// The transport is closing.
  1050  		return false, ErrConnClosing
  1051  	}
  1052  	sid := t.maxStreamID
  1053  	if !g.headsUp {
  1054  		// Stop accepting more streams now.
  1055  		t.state = draining
  1056  		if len(t.activeStreams) == 0 {
  1057  			g.closeConn = true
  1058  		}
  1059  		t.mu.Unlock()
  1060  		if err := t.framer.WriteGoAway(sid, g.code, g.debugData); err != nil {
  1061  			return false, err
  1062  		}
  1063  		if g.closeConn {
  1064  			// Abruptly close the connection following the GoAway (via
  1065  			// loopywriter).  But flush out what's inside the buffer first.
  1066  			t.framer.writer.Flush()
  1067  			return false, fmt.Errorf("transport: Connection closing")
  1068  		}
  1069  		return true, nil
  1070  	}
  1071  	t.mu.Unlock()
  1072  	// For a graceful close, send out a GoAway with stream ID of MaxUInt32,
  1073  	// Follow that with a ping and wait for the ack to come back or a timer
  1074  	// to expire. During this time accept new streams since they might have
  1075  	// originated before the GoAway reaches the client.
  1076  	// After getting the ack or timer expiration send out another GoAway this
  1077  	// time with an ID of the max stream server intends to process.
  1078  	if err := t.framer.WriteGoAway(math.MaxUint32, http2.ErrCodeNo, []byte{}); err != nil {
  1079  		return false, err
  1080  	}
  1081  	if err := t.framer.WritePing(false, goAwayPing.data); err != nil {
  1082  		return false, err
  1083  	}
  1084  
  1085  	gofunc.RecoverGoFuncWithInfo(context.Background(), func() {
  1086  		timer := time.NewTimer(time.Minute)
  1087  		defer timer.Stop()
  1088  		select {
  1089  		case <-t.drainChan:
  1090  		case <-timer.C:
  1091  		case <-t.done:
  1092  			return
  1093  		}
  1094  		t.controlBuf.put(&goAway{code: g.code, debugData: g.debugData})
  1095  	}, gofunc.EmptyInfo)
  1096  	return false, nil
  1097  }