github.com/polarismesh/polaris@v1.17.8/apiserver/xdsserverv3/server.go (about)

     1  /**
     2   * Tencent is pleased to support the open source community by making Polaris available.
     3   *
     4   * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
     5   *
     6   * Licensed under the BSD 3-Clause License (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at
     9   *
    10   * https://opensource.org/licenses/BSD-3-Clause
    11   *
    12   * Unless required by applicable law or agreed to in writing, software distributed
    13   * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    14   * CONDITIONS OF ANY KIND, either express or implied. See the License for the
    15   * specific language governing permissions and limitations under the License.
    16   */
    17  
    18  package xdsserverv3
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"net"
    24  	"strconv"
    25  	"time"
    26  
    27  	clusterservice "github.com/envoyproxy/go-control-plane/envoy/service/cluster/v3"
    28  	discoverygrpc "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
    29  	endpointservice "github.com/envoyproxy/go-control-plane/envoy/service/endpoint/v3"
    30  	listenerservice "github.com/envoyproxy/go-control-plane/envoy/service/listener/v3"
    31  	routeservice "github.com/envoyproxy/go-control-plane/envoy/service/route/v3"
    32  	runtimeservice "github.com/envoyproxy/go-control-plane/envoy/service/runtime/v3"
    33  	secretservice "github.com/envoyproxy/go-control-plane/envoy/service/secret/v3"
    34  	cachev3 "github.com/envoyproxy/go-control-plane/pkg/cache/v3"
    35  	serverv3 "github.com/envoyproxy/go-control-plane/pkg/server/v3"
    36  	apiservice "github.com/polarismesh/specification/source/go/api/v1/service_manage"
    37  	"go.uber.org/atomic"
    38  	"go.uber.org/zap"
    39  	"golang.org/x/sync/singleflight"
    40  	"google.golang.org/grpc"
    41  
    42  	"github.com/polarismesh/polaris/apiserver"
    43  	"github.com/polarismesh/polaris/apiserver/xdsserverv3/resource"
    44  	"github.com/polarismesh/polaris/cache"
    45  	api "github.com/polarismesh/polaris/common/api/v1"
    46  	connlimit "github.com/polarismesh/polaris/common/conn/limit"
    47  	commonlog "github.com/polarismesh/polaris/common/log"
    48  	"github.com/polarismesh/polaris/common/model"
    49  	"github.com/polarismesh/polaris/common/utils"
    50  	"github.com/polarismesh/polaris/service"
    51  )
    52  
    53  type ResourceServer interface {
    54  	Generate(versionLocal string, registryInfo map[string]map[model.ServiceKey]*resource.ServiceInfo)
    55  }
    56  
    57  // XDSServer is the xDS server
    58  type XDSServer struct {
    59  	ctx             context.Context
    60  	listenIP        string
    61  	listenPort      uint32
    62  	start           bool
    63  	restart         bool
    64  	exitCh          chan struct{}
    65  	namingServer    service.DiscoverServer
    66  	cache           cachev3.SnapshotCache
    67  	versionNum      *atomic.Uint64
    68  	server          *grpc.Server
    69  	connLimitConfig *connlimit.Config
    70  
    71  	nodeMgr           *resource.XDSNodeManager
    72  	registryInfo      map[string]map[model.ServiceKey]*resource.ServiceInfo
    73  	resourceGenerator *XdsResourceGenerator
    74  
    75  	active       *atomic.Bool
    76  	finishCtx    context.Context
    77  	singleFlight singleflight.Group
    78  }
    79  
    80  // Initialize 初始化
    81  func (x *XDSServer) Initialize(ctx context.Context, option map[string]interface{},
    82  	apiConf map[string]apiserver.APIConfig) error {
    83  	x.registryInfo = make(map[string]map[model.ServiceKey]*resource.ServiceInfo)
    84  	x.listenPort = uint32(option["listenPort"].(int))
    85  	x.listenIP = option["listenIP"].(string)
    86  	x.nodeMgr = resource.NewXDSNodeManager()
    87  	x.cache = NewSnapshotCache(cachev3.NewSnapshotCache(false, resource.PolarisNodeHash{
    88  		NodeMgr: x.nodeMgr,
    89  	}, commonlog.GetScopeOrDefaultByName(commonlog.XDSLoggerName)), x)
    90  	x.active = atomic.NewBool(false)
    91  	x.versionNum = atomic.NewUint64(0)
    92  	x.ctx = ctx
    93  
    94  	var err error
    95  
    96  	x.namingServer, err = service.GetOriginServer()
    97  	if err != nil {
    98  		log.Errorf("%v", err)
    99  		return err
   100  	}
   101  
   102  	if raw, _ := option["connLimit"].(map[interface{}]interface{}); raw != nil {
   103  		connConfig, err := connlimit.ParseConnLimitConfig(raw)
   104  		if err != nil {
   105  			return err
   106  		}
   107  		x.connLimitConfig = connConfig
   108  	}
   109  	x.resourceGenerator = &XdsResourceGenerator{
   110  		namingServer: x.namingServer,
   111  		cache:        x.cache,
   112  		versionNum:   x.versionNum,
   113  		xdsNodesMgr:  x.nodeMgr,
   114  	}
   115  	return nil
   116  }
   117  
   118  // Run 启动运行
   119  func (x *XDSServer) Run(errCh chan error) {
   120  	// 启动 grpc server
   121  	ctx := context.Background()
   122  	cb := resource.NewCallback(commonlog.GetScopeOrDefaultByName(commonlog.XDSLoggerName), x.nodeMgr)
   123  	srv := serverv3.NewServer(ctx, x.cache, cb)
   124  	var grpcOptions []grpc.ServerOption
   125  	grpcOptions = append(grpcOptions, grpc.MaxConcurrentStreams(1000))
   126  	grpcServer := grpc.NewServer(grpcOptions...)
   127  	x.server = grpcServer
   128  	address := fmt.Sprintf("%v:%v", x.listenIP, x.listenPort)
   129  	listener, err := net.Listen("tcp", address)
   130  	if err != nil {
   131  		log.Errorf("%v", err)
   132  		errCh <- err
   133  		return
   134  	}
   135  
   136  	if x.connLimitConfig != nil && x.connLimitConfig.OpenConnLimit {
   137  		log.Infof("grpc server use max connection limit: %d, grpc max limit: %d",
   138  			x.connLimitConfig.MaxConnPerHost, x.connLimitConfig.MaxConnLimit)
   139  		listener, err = connlimit.NewListener(listener, x.GetProtocol(), x.connLimitConfig)
   140  		if err != nil {
   141  			log.Errorf("conn limit init err: %s", err.Error())
   142  			errCh <- err
   143  			return
   144  		}
   145  	}
   146  
   147  	registerServer(grpcServer, srv)
   148  	log.Infof("management server listening on %d\n", x.listenPort)
   149  	if err = grpcServer.Serve(listener); err != nil {
   150  		log.Errorf("%v", err)
   151  		errCh <- err
   152  		return
   153  	}
   154  	log.Info("xds server stop")
   155  }
   156  
   157  func registerServer(grpcServer *grpc.Server, server serverv3.Server) {
   158  	// register services
   159  	discoverygrpc.RegisterAggregatedDiscoveryServiceServer(grpcServer, server)
   160  	endpointservice.RegisterEndpointDiscoveryServiceServer(grpcServer, server)
   161  	clusterservice.RegisterClusterDiscoveryServiceServer(grpcServer, server)
   162  	routeservice.RegisterRouteDiscoveryServiceServer(grpcServer, server)
   163  	listenerservice.RegisterListenerDiscoveryServiceServer(grpcServer, server)
   164  	secretservice.RegisterSecretDiscoveryServiceServer(grpcServer, server)
   165  	runtimeservice.RegisterRuntimeDiscoveryServiceServer(grpcServer, server)
   166  }
   167  
   168  // Stop 停止服务
   169  func (x *XDSServer) Stop() {
   170  	connlimit.RemoveLimitListener(x.GetProtocol())
   171  	if x.server != nil {
   172  		x.server.Stop()
   173  	}
   174  }
   175  
   176  // Restart 重启服务
   177  func (x *XDSServer) Restart(option map[string]interface{}, apiConf map[string]apiserver.APIConfig,
   178  	errCh chan error) error {
   179  
   180  	log.Infof("restart xds server with new config: +%v", option)
   181  
   182  	x.restart = true
   183  	x.Stop()
   184  	if x.start {
   185  		<-x.exitCh
   186  	}
   187  
   188  	log.Info("old xds server has stopped, begin restarting it")
   189  	if err := x.Initialize(context.Background(), option, apiConf); err != nil {
   190  		log.Errorf("restart grpc server err: %s", err.Error())
   191  		return err
   192  	}
   193  
   194  	log.Info("init grpc server successfully, restart it")
   195  	x.restart = false
   196  	go x.Run(errCh)
   197  	return nil
   198  }
   199  
   200  // GetProtocol 服务注册到北极星中的协议
   201  func (x *XDSServer) GetProtocol() string {
   202  	return "xdsv3"
   203  }
   204  
   205  // GetPort 服务注册到北极星中的端口
   206  func (x *XDSServer) GetPort() uint32 {
   207  	return x.listenPort
   208  }
   209  
   210  func (x *XDSServer) activeUpdateTask() {
   211  	if !x.active.CompareAndSwap(false, true) {
   212  		return
   213  	}
   214  	log.Info("active update xds resource snapshot task")
   215  
   216  	if err := x.initRegistryInfo(); err != nil {
   217  		log.Errorf("initRegistryInfo %v", err)
   218  		return
   219  	}
   220  
   221  	if err := x.getRegistryInfoWithCache(x.ctx, x.registryInfo); err != nil {
   222  		log.Errorf("getRegistryInfoWithCache %v", err)
   223  		return
   224  	}
   225  	x.Generate(x.registryInfo)
   226  	go x.startSynTask(x.ctx)
   227  }
   228  
   229  func (x *XDSServer) startSynTask(ctx context.Context) {
   230  	// 读取 polaris 缓存数据
   231  	synXdsConfFunc := func() {
   232  		registryInfo := make(map[string]map[model.ServiceKey]*resource.ServiceInfo)
   233  
   234  		err := x.getRegistryInfoWithCache(ctx, registryInfo)
   235  		if err != nil {
   236  			log.Error("get registry info from cache", zap.Error(err))
   237  			return
   238  		}
   239  
   240  		needPush := make(map[string]map[model.ServiceKey]*resource.ServiceInfo)
   241  
   242  		// 处理删除 ns 中最后一个 service
   243  		for ns, infos := range x.registryInfo {
   244  			_, ok := registryInfo[ns]
   245  			if !ok && len(infos) > 0 {
   246  				// 这一次轮询时,该命名空间下的最后一个服务已经被删除了,此时,当前的命名空间需要处理
   247  				needPush[ns] = map[model.ServiceKey]*resource.ServiceInfo{}
   248  				x.registryInfo[ns] = map[model.ServiceKey]*resource.ServiceInfo{}
   249  			}
   250  		}
   251  
   252  		// 与本地缓存对比,是否发生了变化,对发生变化的命名空间,推送配置
   253  		for ns, infos := range registryInfo {
   254  			cacheServiceInfos, ok := x.registryInfo[ns]
   255  			if !ok {
   256  				// 新命名空间,需要处理
   257  				needPush[ns] = infos
   258  				x.registryInfo[ns] = infos
   259  				continue
   260  			}
   261  
   262  			// todo 不考虑命名空间删除的情况
   263  			// 判断当前这个空间,是否需要更新配置
   264  			if x.checkUpdate(infos, cacheServiceInfos) {
   265  				needPush[ns] = infos
   266  				x.registryInfo[ns] = infos
   267  			}
   268  		}
   269  
   270  		if len(needPush) > 0 {
   271  			log.Info("start update xds resource snapshot ticker task", zap.Int("need-push", len(needPush)))
   272  			x.Generate(needPush)
   273  		}
   274  	}
   275  
   276  	ticker := time.NewTicker(5 * cache.UpdateCacheInterval)
   277  	for {
   278  		select {
   279  		case <-ticker.C:
   280  			synXdsConfFunc()
   281  		case <-ctx.Done():
   282  			ticker.Stop()
   283  			log.Info("stop update xds resource snapshot ticker task")
   284  			return
   285  		}
   286  	}
   287  }
   288  
   289  func (x *XDSServer) initRegistryInfo() error {
   290  	namespaces := x.namingServer.Cache().Namespace().GetNamespaceList()
   291  	// 启动时,获取全量的 namespace 信息,用来推送空配置
   292  	for _, n := range namespaces {
   293  		x.registryInfo[n.Name] = map[model.ServiceKey]*resource.ServiceInfo{}
   294  	}
   295  	return nil
   296  }
   297  
   298  // syncPolarisServiceInfo 初始化本地 cache,初始化 xds cache
   299  func (x *XDSServer) getRegistryInfoWithCache(ctx context.Context,
   300  	registryInfo map[string]map[model.ServiceKey]*resource.ServiceInfo) error {
   301  
   302  	// 从 cache 中获取全量的服务信息
   303  	serviceIterProc := func(key string, value *model.Service) (bool, error) {
   304  		if _, ok := registryInfo[value.Namespace]; !ok {
   305  			registryInfo[value.Namespace] = map[model.ServiceKey]*resource.ServiceInfo{}
   306  		}
   307  
   308  		svcKey := model.ServiceKey{
   309  			Namespace: value.Namespace,
   310  			Name:      value.Name,
   311  		}
   312  
   313  		info := &resource.ServiceInfo{
   314  			ID:         value.ID,
   315  			Name:       value.Name,
   316  			Namespace:  value.Namespace,
   317  			ServiceKey: svcKey,
   318  			Instances:  []*apiservice.Instance{},
   319  			Ports:      value.ServicePorts,
   320  		}
   321  		registryInfo[value.Namespace][svcKey] = info
   322  		return true, nil
   323  	}
   324  
   325  	if err := x.namingServer.Cache().Service().IteratorServices(serviceIterProc); err != nil {
   326  		log.Errorf("syn polaris services error %v", err)
   327  		return err
   328  	}
   329  
   330  	// 遍历每一个服务,获取路由、熔断策略和全量的服务实例信息
   331  	for _, v := range registryInfo {
   332  		for _, svc := range v {
   333  			s := &apiservice.Service{
   334  				Name:      utils.NewStringValue(svc.Name),
   335  				Namespace: utils.NewStringValue(svc.Namespace),
   336  				Revision:  utils.NewStringValue("-1"),
   337  			}
   338  
   339  			// 获取routing配置
   340  			routerRule, err := x.namingServer.Cache().RoutingConfig().GetRouterConfig("", svc.Name, svc.Namespace)
   341  			if err != nil {
   342  				log.Errorf("error sync routing for namespace(%s) service(%s), info : %s", svc.Namespace,
   343  					svc.Name, err.Error())
   344  				return fmt.Errorf("[XDSV3] error sync routing for %s", svc.Name)
   345  			}
   346  
   347  			svc.SvcRoutingRevision = routerRule.GetRevision().GetValue()
   348  			svc.Routing = routerRule
   349  
   350  			// 获取instance配置
   351  			resp := x.namingServer.ServiceInstancesCache(ctx, s)
   352  			if resp.GetCode().Value != api.ExecuteSuccess {
   353  				log.Errorf("[XDSV3] error sync instances for namespace(%s) service(%s), info : %s",
   354  					svc.Namespace, svc.Name, resp.Info.GetValue())
   355  				return fmt.Errorf("error sync instances for %s", svc.Name)
   356  			}
   357  
   358  			svc.AliasFor = x.namingServer.Cache().Service().GetAliasFor(svc.Name, svc.Namespace)
   359  			svc.SvcInsRevision = resp.Service.Revision.Value
   360  			svc.Instances = resp.Instances
   361  			ports := x.namingServer.Cache().Instance().GetServicePorts(svc.ID)
   362  			if svc.AliasFor != nil {
   363  				ports = x.namingServer.Cache().Instance().GetServicePorts(svc.AliasFor.ID)
   364  			}
   365  			svc.Ports = ports
   366  
   367  			// 获取ratelimit配置
   368  			ratelimitResp := x.namingServer.GetRateLimitWithCache(ctx, s)
   369  			if ratelimitResp.GetCode().Value != api.ExecuteSuccess {
   370  				log.Errorf("[XDSV3] error sync ratelimit for %s, info : %s", svc.Name,
   371  					ratelimitResp.Info.GetValue())
   372  				return fmt.Errorf("error sync ratelimit for %s", svc.Name)
   373  			}
   374  			if ratelimitResp.RateLimit != nil {
   375  				svc.SvcRateLimitRevision = ratelimitResp.RateLimit.Revision.Value
   376  				svc.RateLimit = ratelimitResp.RateLimit
   377  			}
   378  			// 获取circuitBreaker配置
   379  			circuitBreakerResp := x.namingServer.GetCircuitBreakerWithCache(ctx, s)
   380  			if circuitBreakerResp.GetCode().Value != api.ExecuteSuccess {
   381  				log.Errorf("[XDSV3] error sync circuitBreaker for %s, info : %s",
   382  					svc.Name, circuitBreakerResp.Info.GetValue())
   383  				return fmt.Errorf("error sync circuitBreaker for %s", svc.Name)
   384  			}
   385  			if circuitBreakerResp.CircuitBreaker != nil {
   386  				svc.CircuitBreakerRevision = circuitBreakerResp.CircuitBreaker.Revision.Value
   387  				svc.CircuitBreaker = circuitBreakerResp.CircuitBreaker
   388  			}
   389  
   390  			// 获取faultDetect配置
   391  			faultDetectResp := x.namingServer.GetFaultDetectWithCache(ctx, s)
   392  			if faultDetectResp.GetCode().Value != api.ExecuteSuccess {
   393  				log.Errorf("[XDSV3] error sync faultDetect for %s, info : %s",
   394  					svc.Name, faultDetectResp.Info.GetValue())
   395  				return fmt.Errorf("error sync faultDetect for %s", svc.Name)
   396  			}
   397  			if faultDetectResp.FaultDetector != nil {
   398  				svc.FaultDetectRevision = faultDetectResp.FaultDetector.Revision
   399  				svc.FaultDetect = faultDetectResp.FaultDetector
   400  			}
   401  		}
   402  	}
   403  	return nil
   404  }
   405  
   406  func (x *XDSServer) Generate(needPush map[string]map[model.ServiceKey]*resource.ServiceInfo) {
   407  	versionLocal := time.Now().Format(time.RFC3339) + "/" + strconv.FormatUint(x.versionNum.Inc(), 10)
   408  	x.resourceGenerator.Generate(versionLocal, needPush)
   409  }
   410  
   411  func (x *XDSServer) checkUpdate(curServiceInfo, cacheServiceInfo map[model.ServiceKey]*resource.ServiceInfo) bool {
   412  	if len(curServiceInfo) != len(cacheServiceInfo) {
   413  		return true
   414  	}
   415  	for _, info := range curServiceInfo {
   416  		find := false
   417  		for _, serviceInfo := range cacheServiceInfo {
   418  			if info.Name == serviceInfo.Name {
   419  				// 通过 revision 判断
   420  				if info.SvcInsRevision != serviceInfo.SvcInsRevision {
   421  					return true
   422  				}
   423  				if info.SvcRoutingRevision != serviceInfo.SvcRoutingRevision {
   424  					return true
   425  				}
   426  				if info.SvcRateLimitRevision != serviceInfo.SvcRateLimitRevision {
   427  					return true
   428  				}
   429  				if info.CircuitBreakerRevision != serviceInfo.CircuitBreakerRevision {
   430  					return true
   431  				}
   432  				if info.FaultDetectRevision != serviceInfo.FaultDetectRevision {
   433  					return true
   434  				}
   435  				find = true
   436  			}
   437  		}
   438  		if !find {
   439  			return true
   440  		}
   441  	}
   442  	return false
   443  }