github.com/cilium/cilium@v1.16.2/pkg/endpoint/restore.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package endpoint 5 6 import ( 7 "bufio" 8 "bytes" 9 "context" 10 "encoding/base64" 11 "encoding/json" 12 "errors" 13 "fmt" 14 "io" 15 "net/netip" 16 "os" 17 "path/filepath" 18 "strings" 19 20 "github.com/sirupsen/logrus" 21 "k8s.io/apimachinery/pkg/types" 22 23 "github.com/cilium/cilium/api/v1/models" 24 "github.com/cilium/cilium/pkg/common" 25 "github.com/cilium/cilium/pkg/controller" 26 dptypes "github.com/cilium/cilium/pkg/datapath/types" 27 "github.com/cilium/cilium/pkg/endpoint/regeneration" 28 "github.com/cilium/cilium/pkg/fqdn" 29 "github.com/cilium/cilium/pkg/fqdn/restore" 30 "github.com/cilium/cilium/pkg/identity" 31 "github.com/cilium/cilium/pkg/ipcache" 32 "github.com/cilium/cilium/pkg/labels" 33 "github.com/cilium/cilium/pkg/labelsfilter" 34 "github.com/cilium/cilium/pkg/logging/logfields" 35 "github.com/cilium/cilium/pkg/mac" 36 "github.com/cilium/cilium/pkg/node" 37 "github.com/cilium/cilium/pkg/option" 38 "github.com/cilium/cilium/pkg/time" 39 ) 40 41 var ( 42 restoreEndpointIdentityControllerGroup = controller.NewGroup("restore-endpoint-identity") 43 initialGlobalIdentitiesControllerGroup = controller.NewGroup("initial-global-identities") 44 ) 45 46 // ReadEPsFromDirNames returns a mapping of endpoint ID to endpoint of endpoints 47 // from a list of directory names that can possible contain an endpoint. 48 func ReadEPsFromDirNames(ctx context.Context, owner regeneration.Owner, policyGetter policyRepoGetter, 49 namedPortsGetter namedPortsGetter, basePath string, eptsDirNames []string) map[uint16]*Endpoint { 50 51 completeEPDirNames, incompleteEPDirNames := partitionEPDirNamesByRestoreStatus(eptsDirNames) 52 53 if len(incompleteEPDirNames) > 0 { 54 for _, epDirName := range incompleteEPDirNames { 55 scopedLog := log.WithFields(logrus.Fields{ 56 logfields.EndpointID: epDirName, 57 }) 58 fullDirName := filepath.Join(basePath, epDirName) 59 scopedLog.Info(fmt.Sprintf("Found incomplete restore directory %s. Removing it...", fullDirName)) 60 if err := os.RemoveAll(epDirName); err != nil { 61 scopedLog.WithError(err).Warn(fmt.Sprintf("Error while removing directory %s. Ignoring it...", fullDirName)) 62 } 63 } 64 } 65 66 possibleEPs := map[uint16]*Endpoint{} 67 for _, epDirName := range completeEPDirNames { 68 epDir := filepath.Join(basePath, epDirName) 69 70 scopedLog := log.WithFields(logrus.Fields{ 71 logfields.EndpointID: epDirName, 72 logfields.Path: epDir, 73 }) 74 75 state, err := findEndpointState(epDir, scopedLog) 76 if err != nil { 77 scopedLog.WithError(err).Warn("Couldn't find state, ignoring endpoint") 78 continue 79 } 80 81 ep, err := parseEndpoint(ctx, owner, policyGetter, namedPortsGetter, state) 82 if err != nil { 83 scopedLog.WithError(err).Warn("Unable to parse the C header file") 84 continue 85 } 86 if _, ok := possibleEPs[ep.ID]; ok { 87 // If the endpoint already exists then give priority to the directory 88 // that contains an endpoint that didn't fail to be build. 89 if strings.HasSuffix(ep.DirectoryPath(), epDirName) { 90 possibleEPs[ep.ID] = ep 91 } 92 } else { 93 possibleEPs[ep.ID] = ep 94 } 95 96 // We need to save the host endpoint ID as we'll need it to regenerate 97 // other endpoints. 98 if ep.IsHost() { 99 node.SetEndpointID(ep.GetID()) 100 } 101 } 102 return possibleEPs 103 } 104 105 // findEndpointState finds the JSON representation of an endpoint's state in 106 // a directory. 107 // 108 // It prefers reading from the endpoint state JSON file and falls back to 109 // reading from the header. 110 func findEndpointState(dir string, log *logrus.Entry) ([]byte, error) { 111 state, err := os.ReadFile(filepath.Join(dir, common.EndpointStateFileName)) 112 if err == nil { 113 log.Debug("Restore from JSON file") 114 return state, nil 115 } 116 if !errors.Is(err, os.ErrNotExist) { 117 return nil, err 118 } 119 120 // Fall back to reading state from the C header. 121 // Remove this at some point in the far future. 122 f, err := os.Open(filepath.Join(dir, common.CHeaderFileName)) 123 if err != nil { 124 return nil, err 125 } 126 defer f.Close() 127 128 log.Debug("Restore from C header file") 129 130 br := bufio.NewReader(f) 131 var line []byte 132 for { 133 b, err := br.ReadBytes('\n') 134 if errors.Is(err, io.EOF) { 135 return nil, os.ErrNotExist 136 } 137 if err != nil { 138 return nil, err 139 } 140 if bytes.Contains(b, []byte(ciliumCHeaderPrefix)) { 141 line = b 142 break 143 } 144 } 145 146 epSlice := bytes.Split(line, []byte{':'}) 147 if len(epSlice) != 2 { 148 return nil, fmt.Errorf("invalid format %q. Should contain a single ':'", line) 149 } 150 151 return base64.StdEncoding.AppendDecode(nil, epSlice[1]) 152 } 153 154 // partitionEPDirNamesByRestoreStatus partitions the provided list of directory 155 // names that can possibly contain an endpoint, into two lists, containing those 156 // names that represent an incomplete endpoint restore and those that do not. 157 func partitionEPDirNamesByRestoreStatus(eptsDirNames []string) (complete []string, incomplete []string) { 158 dirNames := make(map[string]struct{}, len(eptsDirNames)) 159 for _, epDirName := range eptsDirNames { 160 dirNames[epDirName] = struct{}{} 161 } 162 163 incompleteSuffixes := []string{nextDirectorySuffix, nextFailedDirectorySuffix} 164 incompleteSet := make(map[string]struct{}) 165 166 for _, epDirName := range eptsDirNames { 167 for _, suff := range incompleteSuffixes { 168 if strings.HasSuffix(epDirName, suff) { 169 if _, exists := dirNames[epDirName[:len(epDirName)-len(suff)]]; exists { 170 incompleteSet[epDirName] = struct{}{} 171 } 172 } 173 } 174 } 175 176 for epDirName := range dirNames { 177 if _, exists := incompleteSet[epDirName]; exists { 178 incomplete = append(incomplete, epDirName) 179 } else { 180 complete = append(complete, epDirName) 181 } 182 } 183 184 return 185 } 186 187 // RegenerateAfterRestore performs the following operations on the specified 188 // Endpoint: 189 // * allocates an identity for the Endpoint 190 // * fetches the latest labels from the pod. 191 // * regenerates the endpoint 192 // Returns an error if any operation fails while trying to perform the above 193 // operations. 194 func (e *Endpoint) RegenerateAfterRestore(regenerator *Regenerator, bwm dptypes.BandwidthManager, resolveMetadata MetadataResolverCB) error { 195 if err := e.restoreIdentity(regenerator); err != nil { 196 return err 197 } 198 199 // Now that we have restored the endpoints' identity, run the metadata 200 // resolver so that we can fetch the latest labels from the pod for this 201 // endpoint. 202 e.RunRestoredMetadataResolver(bwm, resolveMetadata) 203 204 scopedLog := log.WithField(logfields.EndpointID, e.ID) 205 206 regenerationMetadata := ®eneration.ExternalRegenerationMetadata{ 207 Reason: "syncing state to host", 208 RegenerationLevel: regeneration.RegenerateWithDatapath, 209 } 210 if buildSuccess := <-e.Regenerate(regenerationMetadata); !buildSuccess { 211 return fmt.Errorf("failed while regenerating endpoint") 212 } 213 214 scopedLog.WithField(logfields.IPAddr, []string{e.GetIPv4Address(), e.GetIPv6Address()}).Info("Restored endpoint") 215 return nil 216 } 217 218 func (e *Endpoint) restoreIdentity(regenerator *Regenerator) error { 219 if err := e.rlockAlive(); err != nil { 220 e.logDisconnectedMutexAction(err, "before filtering labels during regenerating restored endpoint") 221 return err 222 } 223 scopedLog := log.WithField(logfields.EndpointID, e.ID) 224 // Filter the restored labels with the new daemon's filter 225 l, _ := labelsfilter.Filter(e.OpLabels.AllLabels()) 226 e.runlock() 227 228 // Getting the ep's identity while we are restoring should block the 229 // restoring of the endpoint until we get its security identity ID. 230 // If the endpoint is removed, this controller will cancel the allocator 231 // requests. 232 controllerName := fmt.Sprintf("restoring-ep-identity (%v)", e.ID) 233 var ( 234 id *identity.Identity 235 allocatedIdentity = make(chan struct{}) 236 ) 237 e.UpdateController(controllerName, 238 controller.ControllerParams{ 239 Group: restoreEndpointIdentityControllerGroup, 240 DoFunc: func(ctx context.Context) (err error) { 241 allocateCtx, cancel := context.WithTimeout(ctx, option.Config.KVstoreConnectivityTimeout) 242 defer cancel() 243 id, _, err = e.allocator.AllocateIdentity(allocateCtx, l, true, identity.InvalidIdentity) 244 if err != nil { 245 return err 246 } 247 close(allocatedIdentity) 248 return nil 249 }, 250 }) 251 252 // Wait until we either get an identity or the endpoint is removed or 253 // deleted from the node. 254 select { 255 case <-e.aliveCtx.Done(): 256 return ErrNotAlive 257 case <-allocatedIdentity: 258 } 259 260 // Wait for initial identities and ipcache from the 261 // kvstore before doing any policy calculation for 262 // endpoints that don't have a fixed identity or are 263 // not well known. 264 if !id.IsFixed() && !id.IsWellKnown() { 265 // Getting the initial global identities while we are restoring should 266 // block the restoring of the endpoint. 267 // If the endpoint is removed, this controller will cancel the allocator 268 // WaitForInitialGlobalIdentities function. 269 controllerName := fmt.Sprintf("waiting-initial-global-identities-ep (%v)", e.ID) 270 var gotInitialGlobalIdentities = make(chan struct{}) 271 e.UpdateController(controllerName, 272 controller.ControllerParams{ 273 Group: initialGlobalIdentitiesControllerGroup, 274 DoFunc: func(ctx context.Context) (err error) { 275 identityCtx, cancel := context.WithTimeout(ctx, option.Config.KVstoreConnectivityTimeout) 276 defer cancel() 277 278 err = e.allocator.WaitForInitialGlobalIdentities(identityCtx) 279 if err != nil { 280 scopedLog.WithError(err).Warn("Failed while waiting for initial global identities") 281 return err 282 } 283 close(gotInitialGlobalIdentities) 284 return nil 285 }, 286 }) 287 288 // Wait until we either the initial global identities or the endpoint 289 // is deleted. 290 select { 291 case <-e.aliveCtx.Done(): 292 return ErrNotAlive 293 case <-gotInitialGlobalIdentities: 294 } 295 } 296 297 // Wait for ipcache sync before regeneration for endpoints including 298 // the ones with fixed identity (e.g. host endpoint), this ensures that 299 // the regenerated datapath always lookups from a ready ipcache map. 300 if option.Config.KVStore != "" { 301 ipcache.WaitForKVStoreSync() 302 } 303 304 // Wait for ipcache and identities synchronization from all remote clusters, 305 // to prevent disrupting cross-cluster connections on endpoint regeneration. 306 if err := regenerator.WaitForClusterMeshIPIdentitiesSync(e.aliveCtx); err != nil { 307 return err 308 } 309 310 if err := e.lockAlive(); err != nil { 311 scopedLog.Warn("Endpoint to restore has been deleted") 312 return err 313 } 314 315 e.setState(StateRestoring, "Synchronizing endpoint labels with KVStore") 316 317 if e.SecurityIdentity != nil { 318 if oldSecID := e.SecurityIdentity.ID; id.ID != oldSecID { 319 log.WithFields(logrus.Fields{ 320 logfields.EndpointID: e.ID, 321 logfields.IdentityLabels + ".old": oldSecID, 322 logfields.IdentityLabels + ".new": id.ID, 323 }).Info("Security identity for endpoint is different from the security identity restored for the endpoint") 324 325 // The identity of the endpoint being 326 // restored has changed. This can be 327 // caused by two main reasons: 328 // 329 // 1) Cilium has been upgraded, 330 // downgraded or the configuration has 331 // changed and the new version or 332 // configuration causes different 333 // labels to be considered security 334 // relevant for this endpoint. 335 // 336 // Immediately using the identity may 337 // cause connectivity problems if this 338 // is the first endpoint in the cluster 339 // to use the new identity. All other 340 // nodes will not have had a chance to 341 // adjust the security policies for 342 // their endpoints. Hence, apply a 343 // grace period to allow for the 344 // update. It is not required to check 345 // any local endpoints for potential 346 // outdated security rules, the 347 // notification of the new security 348 // identity will have been received and 349 // will trigger the necessary 350 // recalculation of all local 351 // endpoints. 352 // 353 // 2) The identity is outdated as the 354 // state in the kvstore has changed. 355 // This reason would justify an 356 // immediate use of the new identity 357 // but given the current identity is 358 // already in place, it is also correct 359 // to continue using it for the 360 // duration of a grace period. 361 time.Sleep(option.Config.IdentityChangeGracePeriod) 362 } 363 } 364 // The identity of a freshly restored endpoint is incomplete due to some 365 // parts of the identity not being marshaled to JSON. Hence we must set 366 // the identity even if has not changed. 367 e.SetIdentity(id, true) 368 e.unlock() 369 370 return nil 371 } 372 373 // toSerializedEndpoint converts the Endpoint to its corresponding 374 // serializableEndpoint, which contains all of the fields that are needed upon 375 // restoring an Endpoint after cilium-agent restarts. 376 func (e *Endpoint) toSerializedEndpoint() *serializableEndpoint { 377 378 return &serializableEndpoint{ 379 ID: e.ID, 380 ContainerName: e.GetContainerName(), 381 ContainerID: e.GetContainerID(), 382 DockerNetworkID: e.dockerNetworkID, 383 DockerEndpointID: e.dockerEndpointID, 384 IfName: e.ifName, 385 IfIndex: e.ifIndex, 386 ContainerIfName: e.containerIfName, 387 DisableLegacyIdentifiers: e.disableLegacyIdentifiers, 388 OpLabels: e.OpLabels, 389 LXCMAC: e.mac, 390 IPv6: e.IPv6, 391 IPv6IPAMPool: e.IPv6IPAMPool, 392 IPv4: e.IPv4, 393 IPv4IPAMPool: e.IPv4IPAMPool, 394 NodeMAC: e.nodeMAC, 395 SecurityIdentity: e.SecurityIdentity, 396 Options: e.Options, 397 DNSRules: e.DNSRules, 398 DNSRulesV2: e.DNSRulesV2, 399 DNSHistory: e.DNSHistory, 400 DNSZombies: e.DNSZombies, 401 K8sPodName: e.K8sPodName, 402 K8sNamespace: e.K8sNamespace, 403 K8sUID: e.K8sUID, 404 DatapathConfiguration: e.DatapathConfiguration, 405 CiliumEndpointUID: e.ciliumEndpointUID, 406 Properties: e.properties, 407 NetnsCookie: e.NetNsCookie, 408 } 409 } 410 411 // serializableEndpoint contains the fields from an Endpoint which are needed to be 412 // restored if cilium-agent restarts. 413 // 414 // WARNING - STABLE API 415 // This structure is written as JSON to StateDir/{ID}/ep_config.h to allow to 416 // restore endpoints when the agent is being restarted. The restore operation 417 // will read the file and re-create all endpoints with all fields which are not 418 // marked as private to JSON marshal. Do NOT modify this structure in ways which 419 // is not JSON forward compatible. 420 type serializableEndpoint struct { 421 // ID of the endpoint, unique in the scope of the node 422 ID uint16 423 424 // containerName is the name given to the endpoint by the container runtime 425 ContainerName string 426 427 // containerID is the container ID that docker has assigned to the endpoint 428 // Note: The JSON tag was kept for backward compatibility. 429 ContainerID string `json:"dockerID,omitempty"` 430 431 // dockerNetworkID is the network ID of the libnetwork network if the 432 // endpoint is a docker managed container which uses libnetwork 433 DockerNetworkID string 434 435 // dockerEndpointID is the Docker network endpoint ID if managed by 436 // libnetwork 437 DockerEndpointID string 438 439 // ifName is the name of the host facing interface (veth pair) which 440 // connects into the endpoint 441 IfName string 442 443 // ifIndex is the interface index of the host face interface (veth pair) 444 IfIndex int 445 446 // ContainerIfName is the name of the container facing interface (veth pair). 447 ContainerIfName string 448 449 // DisableLegacyIdentifiers disables lookup using legacy endpoint identifiers 450 // (container name, container id, pod name) for this endpoint. 451 DisableLegacyIdentifiers bool 452 453 // OpLabels is the endpoint's label configuration 454 // 455 // FIXME: Rename this field to Labels 456 OpLabels labels.OpLabels 457 458 // mac is the MAC address of the endpoint 459 // 460 // FIXME: Rename this field to MAC 461 LXCMAC mac.MAC // Container MAC address. 462 463 // IPv6 is the IPv6 address of the endpoint 464 IPv6 netip.Addr 465 466 // IPv6IPAMPool is the IPAM address pool from which the IPv6 address was allocated 467 IPv6IPAMPool string 468 469 // IPv4 is the IPv4 address of the endpoint 470 IPv4 netip.Addr 471 472 // IPv4IPAMPool is the IPAM address pool from which the IPv4 address was allocated 473 IPv4IPAMPool string 474 475 // nodeMAC is the MAC of the node (agent). The MAC is different for every endpoint. 476 NodeMAC mac.MAC 477 478 // SecurityIdentity is the security identity of this endpoint. This is computed from 479 // the endpoint's labels. 480 SecurityIdentity *identity.Identity `json:"SecLabel"` 481 482 // Options determine the datapath configuration of the endpoint. 483 Options *option.IntOptions 484 485 // DNSRules is the collection of current DNS rules for this endpoint. 486 DNSRules restore.DNSRules 487 488 // DNSRulesV2 is the collection of current DNS rules for this endpoint, 489 // that conform to using V2 of the PortProto key. 490 DNSRulesV2 restore.DNSRules 491 492 // DNSHistory is the collection of still-valid DNS responses intercepted for 493 // this endpoint. 494 DNSHistory *fqdn.DNSCache 495 496 // DNSZombies is the collection of DNS entries that have been expired or 497 // evicted from DNSHistory. 498 DNSZombies *fqdn.DNSZombieMappings 499 500 // K8sPodName is the Kubernetes pod name of the endpoint 501 K8sPodName string 502 503 // K8sNamespace is the Kubernetes namespace of the endpoint 504 K8sNamespace string 505 506 // K8sUID is the Kubernetes pod UID of the endpoint 507 K8sUID string 508 509 // DatapathConfiguration is the endpoint's datapath configuration as 510 // passed in via the plugin that created the endpoint, e.g. the CNI 511 // plugin which performed the plumbing will enable certain datapath 512 // features according to the mode selected. 513 DatapathConfiguration models.EndpointDatapathConfiguration 514 515 // CiliumEndpointUID contains the unique identifier ref for the CiliumEndpoint 516 // that this Endpoint was managing. 517 // This is used to avoid overwriting/deleting ciliumendpoints that are managed 518 // by other endpoints. 519 CiliumEndpointUID types.UID 520 521 // Properties are used to store some internal property about this Endpoint. 522 Properties map[string]interface{} 523 524 // NetnsCookie is the network namespace cookie of the Endpoint. 525 NetnsCookie uint64 526 } 527 528 // UnmarshalJSON expects that the contents of `raw` are a serializableEndpoint, 529 // which is then converted into an Endpoint. 530 func (ep *Endpoint) UnmarshalJSON(raw []byte) error { 531 // We may have to populate structures in the Endpoint manually to do the 532 // translation from serializableEndpoint --> Endpoint. 533 restoredEp := &serializableEndpoint{ 534 OpLabels: labels.NewOpLabels(), 535 DNSHistory: fqdn.NewDNSCacheWithLimit(option.Config.ToFQDNsMinTTL, option.Config.ToFQDNsMaxIPsPerHost), 536 DNSZombies: fqdn.NewDNSZombieMappings(option.Config.ToFQDNsMaxDeferredConnectionDeletes, option.Config.ToFQDNsMaxIPsPerHost), 537 } 538 if err := json.Unmarshal(raw, restoredEp); err != nil { 539 return fmt.Errorf("error unmarshaling serializableEndpoint from base64 representation: %w", err) 540 } 541 542 ep.fromSerializedEndpoint(restoredEp) 543 return nil 544 } 545 546 // MarshalJSON marshals the Endpoint as its serializableEndpoint representation. 547 func (ep *Endpoint) MarshalJSON() ([]byte, error) { 548 return json.Marshal(ep.toSerializedEndpoint()) 549 } 550 551 func (ep *Endpoint) fromSerializedEndpoint(r *serializableEndpoint) { 552 ep.ID = r.ID 553 ep.createdAt = time.Now() 554 ep.containerName.Store(&r.ContainerName) 555 ep.containerID.Store(&r.ContainerID) 556 ep.dockerNetworkID = r.DockerNetworkID 557 ep.dockerEndpointID = r.DockerEndpointID 558 ep.ifName = r.IfName 559 ep.ifIndex = r.IfIndex 560 ep.containerIfName = r.ContainerIfName 561 ep.disableLegacyIdentifiers = r.DisableLegacyIdentifiers 562 ep.OpLabels = r.OpLabels 563 ep.mac = r.LXCMAC 564 ep.IPv6 = r.IPv6 565 ep.IPv6IPAMPool = r.IPv6IPAMPool 566 ep.IPv4 = r.IPv4 567 ep.IPv4IPAMPool = r.IPv4IPAMPool 568 ep.nodeMAC = r.NodeMAC 569 ep.SecurityIdentity = r.SecurityIdentity 570 ep.DNSRules = r.DNSRules 571 ep.DNSRulesV2 = r.DNSRulesV2 572 ep.DNSHistory = r.DNSHistory 573 ep.DNSZombies = r.DNSZombies 574 ep.K8sPodName = r.K8sPodName 575 ep.K8sNamespace = r.K8sNamespace 576 ep.K8sUID = r.K8sUID 577 ep.DatapathConfiguration = r.DatapathConfiguration 578 ep.Options = r.Options 579 ep.ciliumEndpointUID = r.CiliumEndpointUID 580 if r.Properties != nil { 581 ep.properties = r.Properties 582 } else { 583 ep.properties = map[string]interface{}{} 584 } 585 ep.NetNsCookie = r.NetnsCookie 586 }