github.com/onflow/flow-go@v0.33.17/engine/common/grpc/forwarder/forwarder.go (about)

     1  package forwarder
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"sync"
     7  
     8  	"google.golang.org/grpc/codes"
     9  	"google.golang.org/grpc/status"
    10  
    11  	"github.com/onflow/flow-go/engine/access/rpc/connection"
    12  	"github.com/onflow/flow-go/model/flow"
    13  
    14  	"github.com/onflow/flow/protobuf/go/flow/access"
    15  )
    16  
    17  // Upstream is a container for an individual upstream containing the id, client and closer for it
    18  type Upstream struct {
    19  	id     *flow.Identity         // the public identity of one network participant (node)
    20  	client access.AccessAPIClient // client with gRPC connection
    21  	closer io.Closer              // closer for client connection, should use to close the connection when done
    22  }
    23  
    24  // Forwarder forwards all requests to a set of upstream access nodes or observers
    25  type Forwarder struct {
    26  	lock        sync.Mutex
    27  	roundRobin  int
    28  	upstream    []Upstream
    29  	connFactory connection.ConnectionFactory
    30  }
    31  
    32  func NewForwarder(identities flow.IdentityList, connectionFactory connection.ConnectionFactory) (*Forwarder, error) {
    33  	forwarder := &Forwarder{connFactory: connectionFactory}
    34  	err := forwarder.setFlowAccessAPI(identities)
    35  	return forwarder, err
    36  }
    37  
    38  // setFlowAccessAPI sets a backend access API that forwards some requests to an upstream node.
    39  // It is used by Observer services, Blockchain Data Service, etc.
    40  // Make sure that this is just for observation and not a staked participant in the flow network.
    41  // This means that observers see a copy of the data but there is no interaction to ensure integrity from the root block.
    42  func (f *Forwarder) setFlowAccessAPI(accessNodeAddressAndPort flow.IdentityList) error {
    43  	f.upstream = make([]Upstream, accessNodeAddressAndPort.Count())
    44  	for i, identity := range accessNodeAddressAndPort {
    45  		// Store the faultTolerantClient setup parameters such as address, public, key and timeout, so that
    46  		// we can refresh the API on connection loss
    47  		f.upstream[i].id = identity
    48  
    49  		// We fail on any single error on startup, so that
    50  		// we identify bootstrapping errors early
    51  		err := f.reconnectingClient(i)
    52  		if err != nil {
    53  			return err
    54  		}
    55  	}
    56  
    57  	f.roundRobin = 0
    58  	return nil
    59  }
    60  
    61  // reconnectingClient returns an active client, or creates a new connection.
    62  func (f *Forwarder) reconnectingClient(i int) error {
    63  	identity := f.upstream[i].id
    64  
    65  	accessApiClient, closer, err := f.connFactory.GetAccessAPIClientWithPort(identity.Address, identity.NetworkPubKey)
    66  	if err != nil {
    67  		return fmt.Errorf("failed to connect to access node at %s: %w", accessApiClient, err)
    68  	}
    69  	// closer is not nil iff err is nil, should use to close the connection when done
    70  	f.upstream[i].closer = closer
    71  	f.upstream[i].client = accessApiClient
    72  	return nil
    73  }
    74  
    75  // FaultTolerantClient implements an upstream connection that reconnects on errors
    76  // a reasonable amount of time.
    77  func (f *Forwarder) FaultTolerantClient() (access.AccessAPIClient, io.Closer, error) {
    78  	if f.upstream == nil || len(f.upstream) == 0 {
    79  		return nil, nil, status.Errorf(codes.Unimplemented, "method not implemented")
    80  	}
    81  
    82  	// Reasoning: A retry count of three gives an acceptable 5% failure ratio from a 37% failure ratio.
    83  	// A bigger number is problematic due to the DNS resolve and connection times,
    84  	// plus the need to log and debug each individual connection failure.
    85  	//
    86  	// This reasoning eliminates the need of making this parameter configurable.
    87  	// The logic works rolling over a single connection as well making clean code.
    88  	const retryMax = 3
    89  
    90  	f.lock.Lock()
    91  	defer f.lock.Unlock()
    92  
    93  	var err error
    94  	for i := 0; i < retryMax; i++ {
    95  		f.roundRobin++
    96  		f.roundRobin = f.roundRobin % len(f.upstream)
    97  		err = f.reconnectingClient(f.roundRobin)
    98  		if err != nil {
    99  			continue
   100  		}
   101  		return f.upstream[f.roundRobin].client, f.upstream[f.roundRobin].closer, nil
   102  	}
   103  
   104  	return nil, nil, status.Errorf(codes.Unavailable, err.Error())
   105  }