github.com/polarismesh/polaris@v1.17.8/service/l5_service.go (about) 1 /** 2 * Tencent is pleased to support the open source community by making Polaris available. 3 * 4 * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 5 * 6 * Licensed under the BSD 3-Clause License (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * https://opensource.org/licenses/BSD-3-Clause 11 * 12 * Unless required by applicable law or agreed to in writing, software distributed 13 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 14 * CONDITIONS OF ANY KIND, either express or implied. See the License for the 15 * specific language governing permissions and limitations under the License. 16 */ 17 18 package service 19 20 import ( 21 "context" 22 "errors" 23 "fmt" 24 "strconv" 25 "strings" 26 "sync/atomic" 27 28 "github.com/golang/protobuf/proto" 29 30 "github.com/polarismesh/polaris/common/api/l5" 31 "github.com/polarismesh/polaris/common/model" 32 ) 33 34 var ( 35 // Namespace2SidLayoutID namespace to sid layout id 36 Namespace2SidLayoutID = map[string]uint32{ 37 "Production": 1, 38 "Development": 2, 39 "Pre-release": 3, 40 "Test": 4, 41 "Polaris": 5, 42 "default": 6, 43 } 44 45 // SidLayoutID2Namespace sid layout id to namespace 46 SidLayoutID2Namespace = map[uint32]string{ 47 1: "Production", 48 2: "Development", 49 3: "Pre-release", 50 4: "Test", 51 5: "Polaris", 52 6: "default", 53 } 54 ) 55 56 // 记录l5service发现中的一些状态 57 type l5service struct { 58 discoverRevision string 59 discoverClusterCount uint32 60 } 61 62 // SyncByAgentCmd 根据sid获取路由信息 63 // 老函数: 64 // Stat::instance()->inc_sync_req_cnt(); 65 // 保存client的IP,该函数只是存储到本地的缓存中 66 // Stat::instance()->add_agent(sbac.agent_ip()); 67 func (s *Server) SyncByAgentCmd(ctx context.Context, sbac *l5.Cl5SyncByAgentCmd) ( 68 *l5.Cl5SyncByAgentAckCmd, error) { 69 clientIP := sbac.GetAgentIp() 70 optList := sbac.GetOptList().GetOpt() 71 72 routes := s.getRoutes(clientIP, optList) 73 modIDList, callees, sidConfigs := s.getCallees(routes) 74 policys, sections := s.getPolicysAndSections(modIDList) 75 76 sbaac := &l5.Cl5SyncByAgentAckCmd{ 77 AgentIp: proto.Int32(sbac.GetAgentIp()), 78 SyncFlow: proto.Int32(sbac.GetSyncFlow() + 1), 79 } 80 ipConfigs := make(map[uint32]*model.Location) // 所有的被调IP+主调IP 81 if len(callees) != 0 { 82 serverList := &l5.Cl5ServList{ 83 Serv: make([]*l5.Cl5ServObj, 0, len(callees)), 84 } 85 for _, entry := range callees { 86 server := &l5.Cl5ServObj{ 87 ModId: proto.Int32(int32(entry.ModID)), 88 CmdId: proto.Int32(int32(entry.CmdID)), 89 Ip: proto.Int32(int32(entry.IP)), 90 Port: proto.Int32(int32(entry.Port)), 91 Weight: proto.Int32(int32(entry.Weight)), 92 } 93 serverList.Serv = append(serverList.Serv, server) 94 ipConfigs[entry.IP] = entry.Location // 填充ipConfigs信息 95 } 96 sbaac.ServList = serverList 97 } 98 99 if len(policys) != 0 { 100 routeList := &l5.Cl5RuleList{ 101 Poly: make([]*l5.Cl5PolyObj, 0, len(policys)), 102 Sect: make([]*l5.Cl5SectObj, 0, len(sections)), 103 } 104 for _, entry := range policys { 105 obj := &l5.Cl5PolyObj{ 106 ModId: proto.Int32(int32(entry.ModID)), 107 Div: proto.Int32(int32(entry.Div)), 108 Mod: proto.Int32(int32(entry.Mod)), 109 } 110 routeList.Poly = append(routeList.Poly, obj) 111 } 112 for _, entry := range sections { 113 obj := &l5.Cl5SectObj{ 114 ModId: proto.Int32(int32(entry.ModID)), 115 From: proto.Int32(int32(entry.From)), 116 To: proto.Int32(int32(entry.To)), 117 CmdId: proto.Int32(int32(entry.Xid)), 118 } 119 routeList.Sect = append(routeList.Sect, obj) 120 } 121 sbaac.RuleList = routeList 122 } 123 124 // 保持和cl5源码一致,agent的地域信息,如果找不到,则不加入到ipConfigs中 125 if loc := s.getLocation(ParseIPInt2Str(uint32(sbac.GetAgentIp()))); loc != nil { 126 ipConfigs[uint32(sbac.GetAgentIp())] = loc 127 } 128 if len(ipConfigs) != 0 { 129 ipConfigList := &l5.Cl5IpcList{ 130 Ipc: make([]*l5.Cl5IpcObj, 0, len(ipConfigs)), 131 } 132 for key, entry := range ipConfigs { 133 obj := &l5.Cl5IpcObj{ 134 Ip: proto.Int32(int32(key)), 135 AreaId: proto.Int32(int32(entry.RegionID)), 136 CityId: proto.Int32(int32(entry.ZoneID)), 137 IdcId: proto.Int32(int32(entry.CampusID)), 138 } 139 ipConfigList.Ipc = append(ipConfigList.Ipc, obj) 140 } 141 sbaac.IpcList = ipConfigList 142 } 143 144 sbaac.SidList = CreateCl5SidList(sidConfigs) 145 sbaac.L5SvrList = s.getCl5DiscoverList(ctx, uint32(clientIP)) 146 return sbaac, nil 147 } 148 149 // get routes 150 func (s *Server) getRoutes(clientIP int32, optList []*l5.Cl5OptObj) []*model.Route { 151 cl5Cache := s.caches.CL5() 152 routes := cl5Cache.GetRouteByIP(uint32(clientIP)) 153 if routes == nil { 154 routes = make([]*model.Route, 0) 155 } 156 for _, entry := range optList { 157 modID := entry.GetModId() 158 cmdID := entry.GetCmdId() 159 if ok := cl5Cache.CheckRouteExisted(uint32(clientIP), uint32(modID), uint32(cmdID)); !ok { 160 route := &model.Route{ 161 IP: uint32(clientIP), 162 ModID: uint32(entry.GetModId()), 163 CmdID: uint32(entry.GetCmdId()), 164 SetID: "NOSET", 165 } 166 routes = append(routes, route) 167 // Stat::instance()->add_route(route.ip,route.modId,route.cmdId); TODO 168 } 169 } 170 171 return routes 172 } 173 174 // get callee 175 func (s *Server) getCallees(routes []*model.Route) (map[uint32]bool, []*model.Callee, []*model.SidConfig) { 176 modIDList := make(map[uint32]bool) 177 var callees []*model.Callee 178 var sidConfigs []*model.SidConfig 179 for _, entry := range routes { 180 servers := s.getCalleeByRoute(entry) // 返回nil代表没有找到任何实例 181 if servers == nil { 182 log.Warnf("[Cl5] can not found the instances for sid(%d:%d)", entry.ModID, entry.CmdID) 183 // Stat::instance()->add_lost_route(sbac.agent_ip(),vt_route[i].modId,vt_route[i].cmdId); TODO 184 continue 185 } 186 if len(servers) != 0 { // 不为nil,但是数组长度为0,意味着实例的权重不符合规则 187 callees = append(callees, servers...) 188 } 189 190 modIDList[entry.ModID] = true 191 if sidConfig := s.getSidConfig(entry.ModID, entry.CmdID); sidConfig != nil { 192 sidConfigs = append(sidConfigs, sidConfig) 193 } 194 } 195 196 return modIDList, callees, sidConfigs 197 } 198 199 // get policy and section 200 func (s *Server) getPolicysAndSections(modIDList map[uint32]bool) ([]*model.Policy, []*model.Section) { 201 cl5Cache := s.caches.CL5() 202 var policys []*model.Policy 203 var sections []*model.Section 204 for modID := range modIDList { 205 if policy := cl5Cache.GetPolicy(modID); policy != nil { 206 policys = append(policys, policy) 207 } 208 if secs := cl5Cache.GetSection(modID); len(secs) != 0 { 209 sections = append(sections, secs...) 210 } 211 } 212 213 return policys, sections 214 } 215 216 // RegisterByNameCmd 根据名字获取sid信息 217 func (s *Server) RegisterByNameCmd(rbnc *l5.Cl5RegisterByNameCmd) (*l5.Cl5RegisterByNameAckCmd, error) { 218 // Stat::instance()->inc_register_req_cnt(); TODO 219 220 nameList := rbnc.GetNameList() 221 sidConfigs := make([]*model.SidConfig, 0) 222 for _, name := range nameList.GetName() { 223 if sidConfig := s.getSidConfigByName(name); sidConfig != nil { 224 sidConfigs = append(sidConfigs, sidConfig) 225 } 226 } 227 228 cl5RegisterAckCmd := &l5.Cl5RegisterByNameAckCmd{ 229 CallerIp: proto.Int32(rbnc.GetCallerIp()), 230 } 231 232 cl5RegisterAckCmd.SidList = CreateCl5SidList(sidConfigs) 233 return cl5RegisterAckCmd, nil 234 } 235 236 func (s *Server) computeService(modID uint32, cmdID uint32) *model.Service { 237 sidStr := model.MarshalModCmd(modID, cmdID) 238 // 根据sid找到所述命名空间 239 namespaces := ComputeNamespace(modID, cmdID) 240 for _, namespace := range namespaces { 241 // 根据sid找到polaris服务,这里是源服务 242 service := s.getServiceCache(sidStr, namespace) 243 if service != nil { 244 return service 245 } 246 } 247 return nil 248 } 249 250 // 根据访问关系获取所有符合的被调信息 251 func (s *Server) getCalleeByRoute(route *model.Route) []*model.Callee { 252 out := make([]*model.Callee, 0) 253 if route == nil { 254 return nil 255 } 256 service := s.computeService(route.ModID, route.CmdID) 257 if service == nil { 258 return nil 259 } 260 s.RecordDiscoverStatis(service.Name, service.Namespace) 261 262 hasInstance := false 263 _ = s.caches.Instance().IteratorInstancesWithService(service.ID, 264 func(_ string, entry *model.Instance) (b bool, e error) { 265 // 过滤掉不健康或者隔离状态的server 266 if !entry.Healthy() || entry.Isolate() { 267 return true, nil 268 } 269 270 hasInstance = true 271 // 如果不存在internal-cl5-setId,则默认都是NOSET,适用于别名场景 272 setValue := "NOSET" 273 metadata := entry.Metadata() 274 if val, ok := metadata["internal-cl5-setId"]; ok { 275 setValue = val 276 } 277 278 // 与route的setID匹配,那么直接返回instance.weight 279 weight := entry.Weight() 280 found := false 281 if setValue == route.SetID { 282 found = true 283 } else if !strings.Contains(setValue, route.SetID) { 284 found = false 285 } else { 286 var weights []uint32 287 if val, ok := metadata["internal-cl5-weight"]; ok { 288 weights = ParseWeight(val) 289 } 290 setIDs := ParseSetID(setValue) 291 for i, setID := range setIDs { 292 if setID == route.SetID { 293 found = true 294 if weights != nil && i < len(weights) { 295 weight = weights[i] 296 } 297 break 298 } 299 } 300 } 301 302 // 该Set无被调或者被调的权重为0,则忽略 303 if !found || weight == 0 { 304 return true, nil 305 } 306 307 // 转换ipStr to int 308 ip := ParseIPStr2IntV2(entry.Host()) 309 callee := &model.Callee{ 310 ModID: route.ModID, 311 CmdID: route.CmdID, 312 IP: ip, 313 Port: entry.Port(), 314 Weight: weight, 315 // TODO 没有设置 setID,cl5源码也是没有设置的 316 } 317 // s.getLocation(entry.Host), // ip的地域信息,统一来源于cmdb插件的数据 318 if loc := s.getLocation(entry.Host()); loc != nil { 319 callee.Location = loc 320 } else { 321 // 如果cmdb中找不到数据,则默认地域ID都为0,即默认结构体 322 callee.Location = &model.Location{} 323 } 324 out = append(out, callee) 325 return true, nil 326 }) 327 328 if !hasInstance { 329 return nil 330 } 331 332 return out 333 } 334 335 // 根据sid读取sidConfig的配置信息 336 // 注意,sid--> reference,通过索引服务才能拿到真实的数据 337 func (s *Server) getSidConfig(modID uint32, cmdID uint32) *model.SidConfig { 338 sid := &model.Sid{ModID: modID, CmdID: cmdID} 339 sidStr := model.MarshalSid(sid) 340 341 // 先获取一下namespace 342 namespaces := ComputeNamespace(modID, cmdID) 343 344 var sidService *model.Service 345 for _, namespace := range namespaces { 346 sidService = s.caches.Service().GetServiceByName(sidStr, namespace) 347 if sidService != nil { 348 break 349 } 350 } 351 if sidService == nil { 352 return nil 353 } 354 sidConfig := s.getRealSidConfigMeta(sidService) 355 if sidConfig == nil { 356 return nil 357 } 358 359 sidConfig.ModID = modID 360 sidConfig.CmdID = cmdID 361 362 return sidConfig 363 } 364 365 // 根据名字找到sidConfig 366 // 注意:通过cache,根据cl5Name,找到对应的sid 367 func (s *Server) getSidConfigByName(name string) *model.SidConfig { 368 nameService := s.caches.Service().GetServiceByCl5Name(name) 369 if nameService == nil { 370 return nil 371 } 372 373 sidConfig := s.getRealSidConfigMeta(nameService) 374 if sidConfig == nil { 375 return nil 376 } 377 378 sidMeta, ok := nameService.Meta["internal-cl5-sid"] 379 if !ok { 380 log.Errorf("[Server] not found name(%s) sid", name) 381 return nil 382 } 383 384 sid, err := model.UnmarshalSid(sidMeta) 385 if err != nil { 386 log.Errorf("[Server] unmarshal sid(%s) err: %s", sidMeta, err.Error()) 387 return nil 388 } 389 390 sidConfig.ModID = sid.ModID 391 sidConfig.CmdID = sid.CmdID 392 return sidConfig 393 } 394 395 // 只返回服务名+policy属性 396 func (s *Server) getRealSidConfigMeta(service *model.Service) *model.SidConfig { 397 if service == nil { 398 return nil 399 } 400 401 realService := service 402 // 找一下,是否存在索引服务(别名服务) 403 // 如果存在索引服务,读取索引服务的属性 404 if service.IsAlias() { 405 if referService := s.caches.Service().GetServiceByID(service.Reference); referService != nil { 406 realService = referService 407 } 408 } 409 410 out := &model.SidConfig{ 411 Name: "", 412 Policy: 0, 413 } 414 if nameMeta, ok := realService.Meta["internal-cl5-name"]; ok { 415 out.Name = nameMeta 416 } 417 if policyMeta, ok := realService.Meta["internal-enable-nearby"]; ok { 418 if policyMeta == "true" { 419 out.Policy = 1 420 } 421 } 422 423 return out 424 } 425 426 // 获取cl5.discover 427 func (s *Server) getCl5DiscoverList(ctx context.Context, clientIP uint32) *l5.Cl5L5SvrList { 428 clusterName, _ := ctx.Value(model.Cl5ServerCluster{}).(string) 429 if clusterName == "" { 430 log.Warnf("[Cl5] get server cluster name is empty") 431 return nil 432 } 433 protocol, _ := ctx.Value(model.Cl5ServerProtocol{}).(string) 434 435 service := s.getCl5DiscoverService(clusterName, clientIP) 436 if service == nil { 437 log.Errorf("[Cl5] not found server cluster service(%s)", clusterName) 438 return nil 439 } 440 instances := s.caches.Instance().GetInstancesByServiceID(service.ID) 441 if len(instances) == 0 { 442 log.Errorf("[Cl5] not found any instances for the service(%s, %s)", 443 clusterName, "Polaris") 444 return nil 445 } 446 447 var out l5.Cl5L5SvrList 448 out.Ip = make([]int32, 0, len(instances)) 449 for _, entry := range instances { 450 // 获取同协议的数据 451 if entry.Protocol() != protocol { 452 continue 453 } 454 // 过滤掉不健康或者隔离状态的server 455 if !entry.Healthy() || entry.Isolate() { 456 continue 457 } 458 ip := ParseIPStr2IntV2(entry.Host()) 459 out.Ip = append(out.Ip, int32(ip)) 460 } 461 // 如果没有任何数据,那直接返回空,使用agent配置的IPlist 462 if len(out.GetIp()) == 0 { 463 log.Errorf("[Cl5] get cl5 cluster(%s) instances count 0", service.Name) 464 return nil 465 } 466 467 return &out 468 } 469 470 // 根据集群名获取对应的服务 471 func (s *Server) getCl5DiscoverService(clusterName string, clientIP uint32) *model.Service { 472 service := s.getServiceCache(clusterName, "Polaris") 473 if service == nil { 474 log.Errorf("[Cl5] not found server cluster service(%s)", clusterName) 475 return nil 476 } 477 478 // 根据service的metadata判断,有多少个子集群 479 clusterCount := uint32(0) 480 if service.Revision == s.l5service.discoverRevision { 481 clusterCount = atomic.LoadUint32(&s.l5service.discoverClusterCount) 482 } else { 483 if meta, ok := service.Meta["internal-cluster-count"]; ok { 484 count, err := strconv.Atoi(meta) 485 if err != nil { 486 log.Errorf("[Cl5] get service count , parse err: %s", err.Error()) 487 } else { 488 clusterCount = uint32(count) 489 s.l5service.discoverRevision = service.Revision 490 atomic.StoreUint32(&s.l5service.discoverClusterCount, clusterCount) 491 } 492 } 493 } 494 495 // 如果集群数为0,那么返回埋点的集群 496 if clusterCount == 0 { 497 return service 498 } 499 500 subIndex := clientIP%uint32(clusterCount) + 1 501 subClusterName := fmt.Sprintf("%s.%d", clusterName, subIndex) 502 // log.Infof("[Cl5] ip(%d), clusterCount(%d), name(%s)", clientIP, clusterCount, subClusterName) // TODO 503 subService := s.getServiceCache(subClusterName, "Polaris") 504 if subService == nil { 505 log.Errorf("[Cl5] not found server cluster for ip(%d), cluster count(%d), cluster name(%s)", 506 clientIP, clusterCount, subClusterName) 507 return service 508 } 509 510 return subService 511 } 512 513 // CreateCl5SidList 构造sidConfigs 514 func CreateCl5SidList(sidConfigs []*model.SidConfig) *l5.Cl5SidList { 515 if len(sidConfigs) == 0 { 516 return nil 517 } 518 519 sidList := &l5.Cl5SidList{ 520 Sid: make([]*l5.Cl5SidObj, 0, len(sidConfigs)), 521 } 522 for _, entry := range sidConfigs { 523 obj := &l5.Cl5SidObj{ 524 ModId: proto.Int32(int32(entry.ModID)), 525 CmdId: proto.Int32(int32(entry.CmdID)), 526 Name: proto.String(entry.Name), 527 Policy: proto.Int32(int32(entry.Policy)), 528 } 529 sidList.Sid = append(sidList.Sid, obj) 530 } 531 532 return sidList 533 } 534 535 // ParseSetID 解析metadata保存的setID字符串 536 func ParseSetID(str string) []string { 537 if str == "" { 538 return nil 539 } 540 541 return strings.Split(str, ",") 542 } 543 544 // ParseWeight 解析metadata保存的weight字符串 545 func ParseWeight(str string) []uint32 { 546 if str == "" { 547 return nil 548 } 549 550 items := strings.Split(str, ",") 551 if len(items) == 0 { 552 return nil 553 } 554 out := make([]uint32, 0, len(items)) 555 for _, item := range items { 556 data, err := strconv.ParseUint(item, 10, 32) 557 if err != nil { 558 log.Errorf("[L5Service] parse uint (%s) err: %s", item, err.Error()) 559 return nil 560 } 561 562 out = append(out, uint32(data)) 563 } 564 565 return out 566 } 567 568 // ParseIPStr2Int 字符串IP转为uint32 569 // 转换失败的,需要明确错误 570 func ParseIPStr2Int(ip string) (uint32, error) { 571 ips := strings.Split(ip, ".") 572 if len(ips) != 4 { 573 log.Errorf("[l5Service] ip str(%s) is invalid", ip) 574 return 0, errors.New("ip string is invalid") 575 } 576 577 out := uint32(0) 578 for i := 0; i < 4; i++ { 579 tmp, err := strconv.ParseUint(ips[i], 10, 64) 580 if err != nil { 581 log.Errorf("[L5Service] ip str(%s) to int is err: %s", ip, err.Error()) 582 return 0, err 583 } 584 585 out = out | (uint32(tmp) << uint(i*8)) 586 } 587 588 return out, nil 589 } 590 591 // ParseIPStr2IntV2 字符串IP转为Int,V2 592 func ParseIPStr2IntV2(ip string) uint32 { 593 item := 0 594 var sum uint32 595 var index uint 596 for i := 0; i < len(ip); i++ { 597 if ip[i] == '.' { 598 sum = sum | (uint32(item) << (index * 8)) 599 item = 0 600 index++ 601 } else { 602 item = item*10 + int(ip[i]) - int('0') 603 } 604 } 605 606 sum = sum | (uint32(item) << (index * 8)) 607 return sum 608 } 609 610 // ParseIPInt2Str uint32的IP转换为字符串型 611 func ParseIPInt2Str(ip uint32) string { 612 ipStr := make([]uint32, 4) 613 for i := 0; i < 4; i++ { 614 ipStr[i] = (ip >> uint(i*8)) & 255 615 } 616 str := fmt.Sprintf("%d.%d.%d.%d", ipStr[0], ipStr[1], ipStr[2], ipStr[3]) 617 return str 618 } 619 620 // ComputeNamespace 根据SID分析,返回其对应的namespace 621 func ComputeNamespace(modID uint32, cmdID uint32) []string { 622 // 为了兼容老的sid,只对新的别名sid才生效 623 // 老的sid都属于生产环境的 624 // 3000001是新的moduleID的开始值 625 if moduleID := modID >> 6; moduleID < 3000001 { 626 return []string{DefaultNamespace, ProductionNamespace} 627 } 628 629 layoutID := modID & 63 // 63 -> 111111 630 namespace, ok := SidLayoutID2Namespace[layoutID] 631 if !ok { 632 // 找不到命名空间的,全部返回默认的,也就是Production 633 log.Warnf("sid(%d:%d) found the layoutID is(%d), not match the namespace list", 634 modID, cmdID, layoutID) 635 return []string{DefaultNamespace} 636 } 637 638 log.Infof("Sid(%d:%d) layoutID(%d), the namespace is: %s", 639 modID, cmdID, layoutID, namespace) 640 return []string{namespace} 641 }