github.com/cilium/cilium@v1.16.2/pkg/fqdn/cache.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package fqdn 5 6 import ( 7 "encoding/json" 8 "net" 9 "net/netip" 10 "regexp" 11 "sort" 12 "unsafe" 13 14 "golang.org/x/exp/maps" 15 "k8s.io/apimachinery/pkg/util/sets" 16 17 "github.com/cilium/cilium/pkg/fqdn/matchpattern" 18 "github.com/cilium/cilium/pkg/fqdn/re" 19 "github.com/cilium/cilium/pkg/lock" 20 "github.com/cilium/cilium/pkg/option" 21 "github.com/cilium/cilium/pkg/slices" 22 "github.com/cilium/cilium/pkg/time" 23 ) 24 25 // cacheEntry objects hold data passed in via DNSCache.Update, nominally 26 // equating to a DNS lookup. They are internal to DNSCache and should not be 27 // returned. 28 // cacheEntry objects are immutable once created; the address of an instance is 29 // a unique identifier. 30 // Note: the JSON names are intended to correlate to field names from 31 // api/v1/models.DNSLookup to allow dumping the json from 32 // `cilium fqdn cache list` to a file that can be unmarshalled via 33 // `--tofqdns-per-cache` 34 type cacheEntry struct { 35 // Name is a DNS name, it my be not fully qualified (e.g. myservice.namespace) 36 Name string `json:"fqdn,omitempty"` 37 38 // LookupTime is when the data begins being valid 39 LookupTime time.Time `json:"lookup-time,omitempty"` 40 41 // ExpirationTime is a calcutated time when the DNS data stops being valid. 42 // It is simply LookupTime + TTL 43 ExpirationTime time.Time `json:"expiration-time,omitempty"` 44 45 // TTL represents the number of seconds past LookupTime that this data is 46 // valid. 47 TTL int `json:"ttl,omitempty"` 48 49 // IPs are the IPs associated with Name for this cacheEntry. 50 IPs []netip.Addr `json:"ips,omitempty"` 51 } 52 53 // isExpiredBy returns true if entry is no longer valid at pointInTime 54 func (entry *cacheEntry) isExpiredBy(pointInTime time.Time) bool { 55 return pointInTime.After(entry.ExpirationTime) 56 } 57 58 // ipEntries maps a unique IP to the cacheEntry that provides it in .IPs. 59 // Multiple IPs may point to the same cacheEntry, or they may all be different. 60 // Crucially, an IP may be present in a cacheEntry but the IP in ipEntries 61 // points to another cacheEntry. This is because the second cacheEntry has a 62 // later expiration for this specific IP, and may not include the other IPs 63 // provided by the first entry. 64 // The DNS name in the entries is not checked, but is assumed to be the same 65 // for all entries. 66 // Note: They are guarded by the DNSCache mutex. 67 type ipEntries map[netip.Addr]*cacheEntry 68 69 // nameEntries maps a DNS name to the cache entry that inserted it into the 70 // cache. It used in reverse DNS lookups. It is similar to ipEntries, above, 71 // but the key is a DNS name. 72 type nameEntries map[string]*cacheEntry 73 74 // getIPs returns an unsorted list of non-expired unique IPs. 75 // This needs a read-lock 76 func (s ipEntries) getIPs(now time.Time) []netip.Addr { 77 ips := make([]netip.Addr, 0, len(s)) // worst case size 78 for ip, entry := range s { 79 if entry != nil && !entry.isExpiredBy(now) { 80 ips = append(ips, ip.Unmap()) 81 } 82 } 83 84 return ips 85 } 86 87 // DNSCache manages DNS data that will expire after a certain TTL. Information 88 // is tracked per-IP address, retaining the latest-expiring DNS data for each 89 // address. 90 // For most real-world DNS data, the entry per name remains small because newer 91 // lookups replace older ones. Large TTLs may cause entries to grow if many 92 // unique IPs are returned in separate lookups. 93 // It is critical to run .GC periodically. This cleans up expired entries and 94 // steps forward the time used to determine that entries are expired. This 95 // means that the Lookup functions may return expired entries until GC is 96 // called. 97 // Redundant entries are removed on insert. 98 type DNSCache struct { 99 lock.RWMutex 100 101 // forward DNS lookups name -> IPEntries 102 // IPEntries maps IP -> entry that provides it. An entry may provide multiple IPs. 103 forward map[string]ipEntries 104 105 // IP->dnsNames lookup 106 // This map is subordinate to forward, above. An IP inserted into forward, or 107 // expired in forward, should also be added/removed in reverse. 108 reverse map[netip.Addr]nameEntries 109 110 // LastCleanup is the latest time for which entries have been expired. It is 111 // used as "now" when doing lookups and advanced by calls to .GC 112 // When an entry is added with an expiration time before lastCleanup, it is 113 // set to that value. 114 lastCleanup time.Time 115 116 // cleanup maps the TTL expiration times (in seconds since the epoch) to 117 // DNS names that expire in that second. On every new insertion where the 118 // new data is actually inserted into the cache (i.e. it expires later than 119 // an existing entry) cleanup will be updated. CleanupExpiredEntries cleans 120 // up these entries on demand. 121 // Note: Lookup functions will not return expired entries, and this is used 122 // to proactively enforce expirations. 123 // Note: It is important to periodically call CleanupExpiredEntries 124 // otherwise this map will grow forever. 125 cleanup map[int64][]string 126 127 // overLimit is a set of DNS names that were over the per-host configured 128 // limit when they received an update. The excess IPs will be removed when 129 // cleanupOverLimitEntries is called, but will continue to be returned by 130 // Lookup until then. 131 // Note: It is important to periodically call GC otherwise this map will 132 // grow forever (it is very bounded, however). 133 overLimit map[string]bool 134 135 // perHostLimit is the number of maximum number of IP per host. 136 perHostLimit int 137 138 // minTTL is the minimun TTL value that a cache entry can have, if the TTL 139 // sent in the Update is lower, the TTL will be owerwritten to this value. 140 // Due is only read-only is not protected by the mutex. 141 minTTL int 142 } 143 144 // NewDNSCache returns an initialized DNSCache 145 func NewDNSCache(minTTL int) *DNSCache { 146 c := &DNSCache{ 147 forward: make(map[string]ipEntries), 148 reverse: make(map[netip.Addr]nameEntries), 149 // lastCleanup is populated on the first insert 150 cleanup: map[int64][]string{}, 151 overLimit: map[string]bool{}, 152 perHostLimit: 0, 153 minTTL: minTTL, 154 } 155 return c 156 } 157 158 // NewDNSCache returns an initialized DNSCache and set the max host limit to 159 // the given argument 160 func NewDNSCacheWithLimit(minTTL int, limit int) *DNSCache { 161 c := NewDNSCache(minTTL) 162 c.perHostLimit = limit 163 return c 164 } 165 166 func (c *DNSCache) DisableCleanupTrack() { 167 c.Lock() 168 defer c.Unlock() 169 c.cleanup = nil 170 } 171 172 // Update inserts a new entry into the cache. 173 // After insertion cache entries for name are expired and redundant entries 174 // evicted. This is O(number of new IPs) for eviction, and O(number of IPs for 175 // name) for expiration. 176 // lookupTime is the time the DNS information began being valid. It should be 177 // in the past. 178 // name is used as is and may be an unqualified name (e.g. myservice.namespace). 179 // ips may be an IPv4 or IPv6 IP. Duplicates will be removed. 180 // ttl is the DNS TTL for ips and is a seconds value. 181 func (c *DNSCache) Update(lookupTime time.Time, name string, ips []netip.Addr, ttl int) bool { 182 if c.minTTL > ttl { 183 ttl = c.minTTL 184 } 185 186 entry := &cacheEntry{ 187 Name: name, 188 LookupTime: lookupTime, 189 ExpirationTime: lookupTime.Add(time.Duration(ttl) * time.Second), 190 TTL: ttl, 191 IPs: ips, 192 } 193 194 c.Lock() 195 defer c.Unlock() 196 return c.updateWithEntry(entry) 197 } 198 199 // updateWithEntry implements the insertion of a cacheEntry. It is used by 200 // DNSCache.Update and DNSCache.UpdateWithEntry. 201 // This needs a write lock 202 func (c *DNSCache) updateWithEntry(entry *cacheEntry) bool { 203 changed := false 204 entries, exists := c.forward[entry.Name] 205 if !exists { 206 changed = true 207 entries = make(map[netip.Addr]*cacheEntry) 208 c.forward[entry.Name] = entries 209 } 210 211 if c.updateWithEntryIPs(entries, entry) { 212 changed = true 213 } 214 215 if c.perHostLimit > 0 && len(entries) > c.perHostLimit { 216 c.overLimit[entry.Name] = true 217 } 218 return changed 219 } 220 221 // AddNameToCleanup adds the IP with the given TTL to the cleanup map to 222 // delete the entry from the policy when it expires. 223 // Need to be called with a write lock 224 func (c *DNSCache) addNameToCleanup(entry *cacheEntry) { 225 if c.cleanup == nil { 226 return 227 } 228 if c.lastCleanup.IsZero() || entry.ExpirationTime.Before(c.lastCleanup) { 229 c.lastCleanup = entry.ExpirationTime 230 } 231 expiration := entry.ExpirationTime.Unix() 232 expiredEntries, exists := c.cleanup[expiration] 233 if !exists { 234 expiredEntries = []string{} 235 } 236 c.cleanup[expiration] = append(expiredEntries, entry.Name) 237 } 238 239 // cleanupExpiredEntries cleans all the expired entries since lastCleanup up to 240 // expires, but not including it. lastCleanup is set to expires and later 241 // cleanups begin from that time. 242 // It returns the list of names that have expired data and a map of removed DNS 243 // cache entries, keyed by IP. 244 func (c *DNSCache) cleanupExpiredEntries(expires time.Time) (affectedNames sets.Set[string], removed map[netip.Addr][]*cacheEntry) { 245 if c.lastCleanup.IsZero() { 246 return nil, nil 247 } 248 249 toCleanNames := sets.New[string]() 250 for c.lastCleanup.Before(expires) { 251 key := c.lastCleanup.Unix() 252 if entries, exists := c.cleanup[key]; exists { 253 toCleanNames.Insert(entries...) 254 delete(c.cleanup, key) 255 } 256 c.lastCleanup = c.lastCleanup.Add(time.Second).Truncate(time.Second) 257 } 258 259 affectedNames = sets.New[string]() 260 removed = make(map[netip.Addr][]*cacheEntry) 261 for name := range toCleanNames { 262 if entries, exists := c.forward[name]; exists { 263 affectedNames.Insert(name) 264 for ip, entry := range c.removeExpired(entries, c.lastCleanup, time.Time{}) { 265 removed[ip] = append(removed[ip], entry) 266 } 267 } 268 } 269 270 return affectedNames, removed 271 } 272 273 // cleanupOverLimitEntries returns the names that has reached the max number of 274 // IP per host. Internally the function sort the entries by the expiration 275 // time. 276 func (c *DNSCache) cleanupOverLimitEntries() (affectedNames sets.Set[string], removed map[netip.Addr][]*cacheEntry) { 277 type IPEntry struct { 278 ip netip.Addr 279 entry *cacheEntry 280 } 281 282 // For global cache the limit maybe is not used at all. 283 if c.perHostLimit == 0 { 284 return nil, nil 285 } 286 287 affectedNames = sets.New[string]() 288 removed = make(map[netip.Addr][]*cacheEntry) 289 290 for dnsName := range c.overLimit { 291 entries, ok := c.forward[dnsName] 292 if !ok { 293 continue 294 } 295 overlimit := len(entries) - c.perHostLimit 296 if overlimit <= 0 { 297 continue 298 } 299 sortedEntries := make([]IPEntry, 0, len(entries)) 300 for ip, entry := range entries { 301 sortedEntries = append(sortedEntries, IPEntry{ip, entry}) 302 } 303 304 sort.Slice(sortedEntries, func(i, j int) bool { 305 return sortedEntries[i].entry.ExpirationTime.Before(sortedEntries[j].entry.ExpirationTime) 306 }) 307 308 for i := 0; i < overlimit; i++ { 309 key := sortedEntries[i] 310 delete(entries, key.ip) 311 c.remove(key.ip, key.entry) 312 removed[key.ip] = append(removed[key.ip], key.entry) 313 } 314 affectedNames.Insert(dnsName) 315 } 316 c.overLimit = map[string]bool{} 317 return affectedNames, removed 318 } 319 320 // GC cleans TTL expired entries up to now, and overlimit entries, returning 321 // both sets. 322 // If zombies is passed in, expired IPs are inserted into it. GC and 323 // other management of zombies is left to the caller. 324 // Note: zombies use the original lookup's ExpirationTime for DeletePendingAt, 325 // not the now parameter. This allows better ordering in zombie GC. 326 func (c *DNSCache) GC(now time.Time, zombies *DNSZombieMappings) (affectedNames sets.Set[string]) { 327 c.Lock() 328 expiredNames, expiredEntries := c.cleanupExpiredEntries(now) 329 overLimitNames, overLimitEntries := c.cleanupOverLimitEntries() 330 c.Unlock() 331 332 if zombies != nil { 333 // Iterate over 2 maps 334 for _, m := range []map[netip.Addr][]*cacheEntry{ 335 expiredEntries, 336 overLimitEntries, 337 } { 338 for ip, entries := range m { 339 for _, entry := range entries { 340 // Set the expiration time to either the GC or the expiration time 341 // of the DNS lookup if it is in the future. 342 // This can be the case when entries are not expired, but they are 343 // over limit. We preserve this time so that, in the event that 344 // non-expired names are GC'd, they will be less preferentially reaped 345 // by zombies. 346 expireTime := now 347 if entry.ExpirationTime.After(expireTime) { 348 expireTime = entry.ExpirationTime 349 } 350 zombies.Upsert(expireTime, ip, entry.Name) 351 } 352 } 353 } 354 } 355 356 return expiredNames.Union(overLimitNames) 357 } 358 359 // UpdateFromCache is a utility function that allows updating a DNSCache 360 // instance with all the internal entries of another. Latest-Expiration still 361 // applies, thus the merged outcome is consistent with adding the entries 362 // individually. 363 // When namesToUpdate has non-zero length only those names are updated from 364 // update, otherwise all DNS names in update are used. 365 func (c *DNSCache) UpdateFromCache(update *DNSCache, namesToUpdate []string) { 366 if update == nil { 367 return 368 } 369 370 c.Lock() 371 defer c.Unlock() 372 c.updateFromCache(update, namesToUpdate) 373 } 374 375 func (c *DNSCache) updateFromCache(update *DNSCache, namesToUpdate []string) { 376 update.RLock() 377 defer update.RUnlock() 378 379 if len(namesToUpdate) == 0 { 380 for name := range update.forward { 381 namesToUpdate = append(namesToUpdate, name) 382 } 383 } 384 for _, name := range namesToUpdate { 385 newEntries, exists := update.forward[name] 386 if !exists { 387 continue 388 } 389 for _, newEntry := range newEntries { 390 c.updateWithEntry(newEntry) 391 } 392 } 393 } 394 395 // ReplaceFromCacheByNames operates as an atomic combination of ForceExpire and 396 // multiple UpdateFromCache invocations. The result is to collect all entries 397 // for DNS names in namesToUpdate from each DNSCache in updates, replacing the 398 // current entries for each of those names. 399 func (c *DNSCache) ReplaceFromCacheByNames(namesToUpdate []string, updates ...*DNSCache) { 400 c.Lock() 401 defer c.Unlock() 402 403 // Remove any DNS name in namesToUpdate with a lookup before "now". This 404 // effectively deletes all lookups because we're holding the lock. 405 c.forceExpireByNames(time.Now(), namesToUpdate) 406 407 for _, update := range updates { 408 c.updateFromCache(update, namesToUpdate) 409 } 410 } 411 412 // Lookup returns a set of unique IPs that are currently unexpired for name, if 413 // any exist. An empty list indicates no valid records exist. The IPs are 414 // returned unsorted. 415 func (c *DNSCache) Lookup(name string) (ips []netip.Addr) { 416 c.RLock() 417 defer c.RUnlock() 418 419 return c.lookupByTime(c.lastCleanup, name) 420 } 421 422 // lookupByTime takes a timestamp for expiration comparisons, and is only 423 // intended for testing. 424 func (c *DNSCache) lookupByTime(now time.Time, name string) (ips []netip.Addr) { 425 entries, found := c.forward[name] 426 if !found { 427 return nil 428 } 429 430 return entries.getIPs(now) 431 } 432 433 // LookupByRegexp returns all non-expired cache entries that match re as a map 434 // of name -> IPs 435 func (c *DNSCache) LookupByRegexp(re *regexp.Regexp) (matches map[string][]netip.Addr) { 436 return c.lookupByRegexpByTime(c.lastCleanup, re) 437 } 438 439 // lookupByRegexpByTime takes a timestamp for expiration comparisons, and is 440 // only intended for testing. 441 func (c *DNSCache) lookupByRegexpByTime(now time.Time, re *regexp.Regexp) (matches map[string][]netip.Addr) { 442 matches = make(map[string][]netip.Addr) 443 444 c.RLock() 445 defer c.RUnlock() 446 447 for name, entry := range c.forward { 448 if re.MatchString(name) { 449 if ips := entry.getIPs(now); len(ips) > 0 { 450 matches[name] = ips 451 } 452 } 453 } 454 455 return matches 456 } 457 458 // LookupIP returns all DNS names in entries that include that IP. The cache 459 // maintains the latest-expiring entry per-name per-IP. This means that multiple 460 // names referrring to the same IP will expire from the cache at different times, 461 // and only 1 entry for each name-IP pair is internally retained. 462 func (c *DNSCache) LookupIP(ip netip.Addr) (names []string) { 463 c.RLock() 464 defer c.RUnlock() 465 466 return c.lookupIPByTime(c.lastCleanup, ip) 467 } 468 469 // lookupIPByTime takes a timestamp for expiration comparisons, and is 470 // only intended for testing. 471 func (c *DNSCache) lookupIPByTime(now time.Time, ip netip.Addr) (names []string) { 472 cacheEntries, found := c.reverse[ip] 473 if !found { 474 return nil 475 } 476 477 for name, entry := range cacheEntries { 478 if entry != nil && !entry.isExpiredBy(now) { 479 names = append(names, name) 480 } 481 } 482 483 sort.Strings(names) 484 return names 485 } 486 487 // entryExistsLocked returns true if this (name, IP) pair is known to the cache. 488 func (c *DNSCache) entryExistsLocked(name string, ip netip.Addr) bool { 489 names, exists := c.reverse[ip] 490 if !exists { 491 return false 492 } 493 494 _, exists = names[name] 495 return exists 496 } 497 498 // updateWithEntryIPs adds a mapping for every IP found in `entry` to `ipEntries` 499 // (which maps IP -> cacheEntry). It will replace existing IP->old mappings in 500 // `entries` if the current entry expires sooner (or has already expired). 501 // This needs a write lock 502 func (c *DNSCache) updateWithEntryIPs(entries ipEntries, entry *cacheEntry) bool { 503 added := false 504 for _, ip := range entry.IPs { 505 old, exists := entries[ip] 506 if old == nil || !exists || old.isExpiredBy(entry.ExpirationTime) { 507 entries[ip] = entry 508 c.upsertReverse(ip, entry) 509 c.addNameToCleanup(entry) 510 added = true 511 } 512 } 513 return added 514 515 } 516 517 // removeExpired removes expired (or nil) cacheEntry pointers from entries, an 518 // ipEntries instance for a specific name. It returns a boolean if any entry is 519 // removed. 520 // now is the "current time" and entries with ExpirationTime before then are 521 // removed. 522 // expireLookupsBefore is an optional parameter. It causes any entry with a 523 // LookupTime before it to be expired. It is intended for use with cache 524 // clearing functions like ForceExpire, and does not maintain the cache's 525 // guarantees. 526 // This needs a write lock 527 func (c *DNSCache) removeExpired(entries ipEntries, now time.Time, expireLookupsBefore time.Time) (removed ipEntries) { 528 removed = make(ipEntries) 529 for ip, entry := range entries { 530 if entry == nil || entry.isExpiredBy(now) || entry.LookupTime.Before(expireLookupsBefore) { 531 delete(entries, ip) 532 c.remove(ip, entry) 533 removed[ip] = entry 534 } 535 } 536 537 return removed 538 } 539 540 // upsertReverse updates the reverse DNS cache for ip with entry, if it expires 541 // later than the already-stored entry. 542 // It is assumed that entry includes ip. 543 // This needs a write lock 544 func (c *DNSCache) upsertReverse(ip netip.Addr, entry *cacheEntry) { 545 entries, exists := c.reverse[ip] 546 if entries == nil || !exists { 547 entries = make(map[string]*cacheEntry) 548 c.reverse[ip] = entries 549 } 550 entries[entry.Name] = entry 551 } 552 553 // remove removes the reference between ip and the name stored in entry from 554 // the DNS cache (both in forward and reverse maps). This assumes the write 555 // lock is taken. 556 func (c *DNSCache) remove(ip netip.Addr, entry *cacheEntry) { 557 c.removeForward(ip, entry) 558 c.removeReverse(ip, entry) 559 } 560 561 // removeForward removes the reference between ip and the name stored in entry. 562 // When no more references from ip to any name exist, the map entry is deleted 563 // outright. 564 // It is assumed that entry includes ip. 565 // This needs a write lock. 566 func (c *DNSCache) removeForward(ip netip.Addr, entry *cacheEntry) { 567 entries, exists := c.forward[entry.Name] 568 if entries == nil || !exists { 569 return 570 } 571 delete(entries, ip) 572 if len(entries) == 0 { 573 delete(c.forward, entry.Name) 574 } 575 } 576 577 // removeReverse is the equivalent of removeForward() but for the reverse map. 578 func (c *DNSCache) removeReverse(ip netip.Addr, entry *cacheEntry) { 579 entries, exists := c.reverse[ip] 580 if entries == nil || !exists { 581 return 582 } 583 delete(entries, entry.Name) 584 if len(entries) == 0 { 585 delete(c.reverse, ip) 586 } 587 } 588 589 // GetIPs takes a snapshot of all IPs in the reverse cache. 590 func (c *DNSCache) GetIPs() map[netip.Addr][]string { 591 c.RWMutex.RLock() 592 defer c.RWMutex.RUnlock() 593 594 out := make(map[netip.Addr][]string, len(c.reverse)) 595 596 for ip, names := range c.reverse { 597 out[ip] = maps.Keys(names) 598 } 599 600 return out 601 } 602 603 // ForceExpire is used to clear entries from the cache before their TTL is 604 // over. This operation does not keep previous guarantees that, for each IP, 605 // the most recent lookup to provide that IP is used. 606 // Note that all parameters must match, if provided. `time.Time{}` is the 607 // match-all time parameter. 608 // For example: 609 // 610 // ForceExpire(time.Time{}, 'cilium.io') expires all entries for cilium.io. 611 // ForceExpire(time.Now(), 'cilium.io') expires all entries for cilium.io 612 // that expired before the current time. 613 // 614 // expireLookupsBefore requires a lookup to have a LookupTime before it in 615 // order to remove it. 616 // nameMatch will remove any DNS names that match. 617 func (c *DNSCache) ForceExpire(expireLookupsBefore time.Time, nameMatch *regexp.Regexp) (namesAffected sets.Set[string]) { 618 c.Lock() 619 defer c.Unlock() 620 621 namesAffected = sets.New[string]() 622 623 for name, entries := range c.forward { 624 // If nameMatch was passed in, we must match it. Otherwise, "match all". 625 if nameMatch != nil && !nameMatch.MatchString(name) { 626 continue 627 } 628 // We pass expireLookupsBefore as the `now` parameter but it is redundant 629 // because LookupTime must be before ExpirationTime. 630 // The second expireLookupsBefore actually matches lookup times, and will 631 // delete the entries completely. 632 for _, entry := range c.removeExpired(entries, expireLookupsBefore, expireLookupsBefore) { 633 namesAffected.Insert(entry.Name) 634 } 635 } 636 637 return namesAffected 638 } 639 640 func (c *DNSCache) forceExpireByNames(expireLookupsBefore time.Time, names []string) { 641 for _, name := range names { 642 entries, exists := c.forward[name] 643 if !exists { 644 continue 645 } 646 647 // We pass expireLookupsBefore as the `now` parameter but it is redundant 648 // because LookupTime must be before ExpirationTime. 649 // The second expireLookupsBefore actually matches lookup times, and will 650 // delete the entries completely. 651 c.removeExpired(entries, expireLookupsBefore, expireLookupsBefore) 652 } 653 } 654 655 // Dump returns unexpired cache entries in the cache. They are deduplicated, 656 // but not usefully sorted. These objects should not be modified. 657 func (c *DNSCache) Dump() (lookups []*cacheEntry) { 658 c.RLock() 659 defer c.RUnlock() 660 661 // Collect all the still-valid entries 662 lookups = make([]*cacheEntry, 0, len(c.forward)) 663 for _, entries := range c.forward { 664 for _, entry := range entries { 665 lookups = append(lookups, entry) 666 } 667 } 668 669 // Dedup the entries. They are created once and are immutable so the address 670 // is a unique identifier. 671 // We iterate through the list, keeping unique pointers. This is correct 672 // because the list is sorted and, if two consecutive entries are the same, 673 // it is safe to overwrite the second duplicate. 674 sort.Slice(lookups, func(i, j int) bool { 675 return uintptr(unsafe.Pointer(lookups[i])) < uintptr(unsafe.Pointer(lookups[j])) 676 }) 677 678 deduped := lookups[:0] // len==0 but cap==cap(lookups) 679 for readIdx, lookup := range lookups { 680 if readIdx == 0 || deduped[len(deduped)-1] != lookups[readIdx] { 681 deduped = append(deduped, lookup) 682 } 683 } 684 685 return deduped 686 } 687 688 // Count returns two values, the count of still-valid FQDNs inside the DNS 689 // cache and the count of the still-valid entries (IPs) in the DNS cache. 690 // 691 // The FQDN count returns the length of the DNS cache size. 692 // 693 // The IP count is not deduplicated, see Dump(). In other words, this value 694 // represents an accurate tally of IPs associated with an FQDN in the DNS 695 // cache. 696 func (c *DNSCache) Count() (uint64, uint64) { 697 c.RLock() 698 defer c.RUnlock() 699 700 var ips uint64 701 for _, entries := range c.forward { 702 ips += uint64(len(entries)) 703 } 704 return uint64(len(c.forward)), ips 705 } 706 707 // MarshalJSON serialises the set of DNS lookup cacheEntries needed to 708 // reconstruct this cache instance. 709 // Note: Expiration times are honored and the reconstructed cache instance is 710 // expected to return the same values as the original at that point in time. 711 func (c *DNSCache) MarshalJSON() ([]byte, error) { 712 lookups := c.Dump() 713 714 // serialise into a JSON object array 715 return json.Marshal(lookups) 716 } 717 718 // UnmarshalJSON rebuilds a DNSCache from serialized JSON. 719 // Note: This is destructive to any currect data. Use UpdateFromCache for bulk 720 // updates. 721 func (c *DNSCache) UnmarshalJSON(raw []byte) error { 722 lookups := make([]*cacheEntry, 0) 723 if err := json.Unmarshal(raw, &lookups); err != nil { 724 return err 725 } 726 727 c.Lock() 728 defer c.Unlock() 729 730 c.forward = make(map[string]ipEntries) 731 c.reverse = make(map[netip.Addr]nameEntries) 732 733 for _, newLookup := range lookups { 734 c.updateWithEntry(newLookup) 735 } 736 737 return nil 738 } 739 740 // DNSZombieMapping is an IP that has expired or been evicted from a DNS cache. 741 // It records the DNS name and IP, along with other bookkeeping timestamps that 742 // help determine when it can be finally deleted. Zombies are dead when 743 // they are not marked alive by CT GC. 744 // Special handling exists when the count of zombies is large. Overlimit 745 // zombies are deleted in GC with the following preferences (this is cumulative 746 // and in order of precedence): 747 // - Zombies with zero AliveAt are evicted before those with a non-zero value 748 // (i.e. known connections marked by CT GC are evicted last) 749 // - Zombies with an earlier DeletePendingAtTime are evicted first. 750 // Note: Upsert sets DeletePendingAt on every update, thus making GC prefer 751 // to evict IPs with less DNS churn on them. 752 // - Zombies with the lowest count of DNS names in them are evicted first 753 type DNSZombieMapping struct { 754 // Names is the list of names that had DNS lookups with this IP. These may 755 // derive from unrelated DNS lookups. The list is maintained de-duplicated. 756 Names []string `json:"names,omitempty"` 757 758 // IP is an address that is pending for delete but may be in-use by a 759 // connection. 760 IP netip.Addr `json:"ip,omitempty"` 761 762 // AliveAt is the last time this IP was marked alive via 763 // DNSZombieMappings.MarkAlive. 764 // When AliveAt is later than DNSZombieMappings.lastCTGCUpdate the zombie is 765 // considered alive. 766 AliveAt time.Time `json:"alive-at,omitempty"` 767 768 // DeletePendingAt is the time at which this IP was most-recently scheduled 769 // for deletion. This can be updated if an IP expires from the DNS caches 770 // multiple times. 771 // When DNSZombieMappings.lastCTGCUpdate is earlier than DeletePendingAt a 772 // zombie is alive. 773 DeletePendingAt time.Time `json:"delete-pending-at,omitempty"` 774 775 // revisionAddedAt is the GCRevision at which this entry was added. 776 // garbage collection must run 2 times before the zombie is eligible for deletion 777 revisionAddedAt uint64 `json:"-"` 778 } 779 780 // DeepCopy returns a copy of zombie that does not share any internal pointers 781 // or fields 782 func (zombie *DNSZombieMapping) DeepCopy() *DNSZombieMapping { 783 return &DNSZombieMapping{ 784 Names: append([]string{}, zombie.Names...), 785 IP: zombie.IP, 786 DeletePendingAt: zombie.DeletePendingAt, 787 AliveAt: zombie.AliveAt, 788 } 789 } 790 791 // DNSZombieMappings collects DNS Name->IP mappings that may be inactive and 792 // evicted, and so may be deleted. They are periodically marked alive by the CT 793 // GC goroutine. When .GC is called, alive and dead zombies are returned, 794 // allowing us to skip deleting an IP from the global DNS cache to avoid 795 // breaking connections that outlast the DNS TTL. 796 type DNSZombieMappings struct { 797 lock.Mutex 798 deletes map[netip.Addr]*DNSZombieMapping 799 lastCTGCUpdate time.Time 800 nextCTGCUpdate time.Time // estimated 801 // ctGCRevision is a serial number tracking the number of conntrack 802 // garbage collection runs. It is used to ensure that entries 803 // are not reaped until CT GC has run at least twice. 804 ctGCRevision uint64 805 max int // max allowed zombies 806 807 // perHostLimit is the number of maximum number of IP per host. 808 perHostLimit int 809 } 810 811 // NewDNSZombieMappings constructs a DNSZombieMappings that is read to use 812 func NewDNSZombieMappings(max, perHostLimit int) *DNSZombieMappings { 813 return &DNSZombieMappings{ 814 deletes: make(map[netip.Addr]*DNSZombieMapping), 815 max: max, 816 perHostLimit: perHostLimit, 817 } 818 } 819 820 // Upsert enqueues the ip -> qname as a possible deletion 821 // updatedExisting is true when an earlier enqueue existed and was updated 822 // If an existing entry is updated, the later expiryTime is applied to the existing entry. 823 func (zombies *DNSZombieMappings) Upsert(expiryTime time.Time, addr netip.Addr, qname ...string) (updatedExisting bool) { 824 zombies.Lock() 825 defer zombies.Unlock() 826 827 zombie, updatedExisting := zombies.deletes[addr] 828 if !updatedExisting { 829 zombie = &DNSZombieMapping{ 830 Names: slices.Unique(qname), 831 IP: addr, 832 DeletePendingAt: expiryTime, 833 revisionAddedAt: zombies.ctGCRevision, 834 } 835 zombies.deletes[addr] = zombie 836 } else { 837 zombie.Names = slices.Unique(append(zombie.Names, qname...)) 838 // Keep the latest expiry time 839 if expiryTime.After(zombie.DeletePendingAt) { 840 zombie.DeletePendingAt = expiryTime 841 } 842 } 843 return updatedExisting 844 } 845 846 // isConnectionAlive returns true if 'zombie' is considered alive. 847 // Zombie is considered dead if all of these conditions apply: 848 // 1. CT GC has run after the DNS Expiry time and grace period (lastCTGCUpdate > DeletePendingAt + GracePeriod), and 849 // 2. The CT GC run did not mark the Zombie alive (lastCTGCUpdate > AliveAt) 850 // 3. CT GC has run at least 2 times since Zombie was entered 851 // otherwise the Zombie is alive. 852 // 853 // We wait for 2 complete GC runs, because this entry may have been added in the middle of a GC run, 854 // in which case it may not have been marked alive. We need to wait for GC to finish at least 2 times 855 // before we can safely consider it dead. 856 func (zombies *DNSZombieMappings) isConnectionAlive(zombie *DNSZombieMapping) bool { 857 if !zombies.lastCTGCUpdate.After(zombie.DeletePendingAt.Add(option.Config.ToFQDNsIdleConnectionGracePeriod)) { 858 return true 859 } 860 if !zombies.lastCTGCUpdate.After(zombie.AliveAt) { 861 return true 862 } 863 if zombies.ctGCRevision < (zombie.revisionAddedAt + 2) { 864 return true 865 } 866 return false 867 868 } 869 870 // getAliveNames returns all the names that are alive. 871 // A name is alive if at least one of the IPs that resolve to it is alive. 872 // The value of the map contains all IPs for the name (both alive and dead). 873 func (zombies *DNSZombieMappings) getAliveNames() map[string][]*DNSZombieMapping { 874 aliveNames := make(map[string][]*DNSZombieMapping) 875 876 for _, z := range zombies.deletes { 877 if zombies.isConnectionAlive(z) { 878 for _, name := range z.Names { 879 if _, ok := aliveNames[name]; !ok { 880 aliveNames[name] = make([]*DNSZombieMapping, 0, 5) 881 } 882 aliveNames[name] = append(aliveNames[name], z) 883 } 884 } 885 } 886 887 // Add all of the "dead" IPs for live names into the result 888 for _, z := range zombies.deletes { 889 if !zombies.isConnectionAlive(z) { 890 for _, name := range z.Names { 891 if _, ok := aliveNames[name]; ok { 892 aliveNames[name] = append(aliveNames[name], z) 893 } 894 } 895 } 896 } 897 898 return aliveNames 899 } 900 901 // isZombieAlive returns true if zombie is alive 902 // 903 // A zombie is alive if its connection is alive or if one of its names is 904 // alive. The function takes an argument that contains the aliveNames (can be 905 // obtained via getAliveNames()) 906 func (zombies *DNSZombieMappings) isZombieAlive(zombie *DNSZombieMapping, aliveNames map[string][]*DNSZombieMapping) (alive, overLimit bool) { 907 if zombies.isConnectionAlive(zombie) { 908 alive = true 909 if zombies.perHostLimit == 0 { 910 return alive, overLimit 911 } 912 } 913 914 for _, name := range zombie.Names { 915 if z, ok := aliveNames[name]; ok { 916 alive = true 917 if zombies.perHostLimit == 0 { 918 return alive, overLimit 919 } else if len(z) > zombies.perHostLimit { 920 overLimit = true 921 return alive, overLimit 922 } 923 } 924 } 925 926 return alive, overLimit 927 } 928 929 // sortZombieMappingSlice sorts the provided slice so that less important 930 // zombies shuffle to the front of the slice (from where they are eliminated). 931 // To achieve this, it sorts by three criteria, in order of priority: 932 // 933 // 1. when the connection was last marked alive (earlier == less important) 934 // 2. when this ip was last scheduled for deletion (earlier == less important) 935 // 3. tie-break by number of DNS names for that IP 936 func sortZombieMappingSlice(alive []*DNSZombieMapping) { 937 sort.Slice(alive, func(i, j int) bool { 938 switch { 939 case alive[i].AliveAt.Before(alive[j].AliveAt): 940 return true 941 case alive[i].AliveAt.After(alive[j].AliveAt): 942 return false 943 // We have AliveAt equality after this point. 944 case alive[i].DeletePendingAt.Before(alive[j].DeletePendingAt): 945 return true 946 case alive[i].DeletePendingAt.After(alive[j].DeletePendingAt): 947 return false 948 // DeletePendingAt is also equal. Tie-break by number of Names. 949 default: 950 return len(alive[i].Names) < len(alive[j].Names) 951 } 952 }) 953 } 954 955 // GC returns alive and dead DNSZombieMapping entries. This removes dead 956 // zombies interally, and repeated calls will return different data. 957 // Zombies are alive if they have been marked alive (with MarkAlive). When 958 // SetCTGCTime is called and an zombie not marked alive, it becomes dead. 959 // Calling Upsert on a dead zombie will make it alive again. 960 // Alive zombies are limited by zombies.max. 0 means no zombies are allowed, 961 // disabling the behavior. It is expected to be a large value and is in place 962 // to avoid runaway zombie growth when CT GC is at a large interval. 963 func (zombies *DNSZombieMappings) GC() (alive, dead []*DNSZombieMapping) { 964 zombies.Lock() 965 defer zombies.Unlock() 966 967 aliveNames := zombies.getAliveNames() 968 969 // Collect zombies we can delete 970 for _, zombie := range zombies.deletes { 971 zombieAlive, overLimit := zombies.isZombieAlive(zombie, aliveNames) 972 if overLimit { 973 // No-op: This zombie is part of a name in 'aliveNames' 974 // that needs to impose a per-host IP limit. Decide 975 // whether to add to alive or dead in the next loop. 976 } else if zombieAlive { 977 alive = append(alive, zombie.DeepCopy()) 978 } else { 979 // Emit the actual object here since we will no longer update it 980 dead = append(dead, zombie) 981 } 982 } 983 984 if zombies.perHostLimit > 0 { 985 warnActiveDNSEntries := false 986 deadIdx := len(dead) 987 988 // Find names which have too many IPs associated mark them dead. 989 // 990 // Multiple names can refer to the same IP, so if we expire the 991 // zombie by IP then we need to ensure that it doesn't get 992 // added to both 'alive' and 'dead'. 993 // 994 // 1) Assemble all of the 'dead', starting from 'deadIdx'. 995 // Assemble alive candidates in 'possibleAlive'. 996 // 2) Ensure that 'possibleAlive' doesn't contain any of the 997 // entries in 'dead[deadIdx:]'. 998 // 3) Add the remaining 'possibleAlive' to 'alive'. 999 possibleAlive := make(map[*DNSZombieMapping]struct{}) 1000 for _, aliveIPsForName := range aliveNames { 1001 if len(aliveIPsForName) <= zombies.perHostLimit { 1002 // Already handled in the loop above. 1003 continue 1004 } 1005 overLimit := len(aliveIPsForName) - zombies.perHostLimit 1006 sortZombieMappingSlice(aliveIPsForName) 1007 dead = append(dead, aliveIPsForName[:overLimit]...) 1008 for _, z := range aliveIPsForName[overLimit:] { 1009 possibleAlive[z] = struct{}{} 1010 } 1011 if dead[len(dead)-1].AliveAt.IsZero() { 1012 warnActiveDNSEntries = true 1013 } 1014 } 1015 if warnActiveDNSEntries { 1016 log.Warningf("Evicting expired DNS cache entries that may be in-use due to per-host limits. This may cause recently created connections to be disconnected. Raise %s to mitigate this.", option.ToFQDNsMaxIPsPerHost) 1017 } 1018 1019 for _, dead := range dead[deadIdx:] { 1020 delete(possibleAlive, dead) 1021 } 1022 1023 for zombie := range possibleAlive { 1024 alive = append(alive, zombie.DeepCopy()) 1025 } 1026 } 1027 1028 // Limit alive zombies to max. This is messy, and will break some existing 1029 // connections. We sort by whether the connection is marked alive or not, the 1030 // oldest created connections, and tie-break by the number of DNS names for 1031 // that IP. 1032 overLimit := len(alive) - zombies.max 1033 if overLimit > 0 { 1034 sortZombieMappingSlice(alive) 1035 dead = append(dead, alive[:overLimit]...) 1036 alive = alive[overLimit:] 1037 if dead[len(dead)-1].AliveAt.IsZero() { 1038 log.Warning("Evicting expired DNS cache entries that may be in-use. This may cause recently created connections to be disconnected. Raise --tofqdns-max-deferred-connection-deletes to mitigate this.") 1039 } 1040 } 1041 1042 // Delete the zombies we collected above from the internal map 1043 for _, zombie := range dead { 1044 delete(zombies.deletes, zombie.IP) 1045 } 1046 1047 return alive, dead 1048 } 1049 1050 // MarkAlive makes an zombie alive and not dead. When now is later than the 1051 // time set with SetCTGCTime the zombie remains alive. 1052 func (zombies *DNSZombieMappings) MarkAlive(now time.Time, ip netip.Addr) { 1053 zombies.Lock() 1054 defer zombies.Unlock() 1055 1056 if zombie, exists := zombies.deletes[ip]; exists { 1057 zombie.AliveAt = now 1058 } 1059 } 1060 1061 // SetCTGCTime marks the start of the most recent CT GC. This must be set after 1062 // all MarkAlive calls complete to avoid a race between the DNS garbage 1063 // collector and the CT GC. This would occur when a DNS zombie that has not 1064 // been visited by the CT GC run is seen by a concurrent DNS garbage collector 1065 // run, and then deleted. 1066 // When 'ctGCStart' is later than an alive timestamp, set with MarkAlive, the zombie is 1067 // no longer alive. Thus, this call acts as a gating function for what data is 1068 // returned by GC. 1069 func (zombies *DNSZombieMappings) SetCTGCTime(ctGCStart, estNext time.Time) { 1070 zombies.Lock() 1071 zombies.lastCTGCUpdate = ctGCStart 1072 zombies.nextCTGCUpdate = estNext 1073 zombies.ctGCRevision++ 1074 zombies.Unlock() 1075 } 1076 1077 // ForceExpire is used to clear zombies irrespective of their alive status. 1078 // Only zombies with DeletePendingAt times before expireLookupBefore are 1079 // considered for deletion. Each name in an zombie is matched against 1080 // nameMatcher (nil is match all) and when an zombie no longer has any valid 1081 // names will it be removed outright. 1082 // Note that all parameters must match, if provided. `time.Time{}` is the 1083 // match-all time parameter. 1084 // expireLookupsBefore requires an zombie to have been enqueued before the 1085 // specified time in order to remove it. 1086 // For example: 1087 // 1088 // ForceExpire(time.Time{}, 'cilium.io') expires all entries for cilium.io. 1089 // ForceExpire(time.Now(), 'cilium.io') expires all entries for cilium.io 1090 // that expired before the current time. 1091 // 1092 // nameMatch will remove that specific DNS name from zombies that include it, 1093 // deleting it when no DNS names remain. 1094 func (zombies *DNSZombieMappings) ForceExpire(expireLookupsBefore time.Time, nameMatch *regexp.Regexp) (namesAffected []string) { 1095 zombies.Lock() 1096 defer zombies.Unlock() 1097 return zombies.forceExpireLocked(expireLookupsBefore, nameMatch, nil) 1098 } 1099 1100 func (zombies *DNSZombieMappings) forceExpireLocked(expireLookupsBefore time.Time, nameMatch *regexp.Regexp, cidr *net.IPNet) (namesAffected []string) { 1101 var toDelete []*DNSZombieMapping 1102 1103 for _, zombie := range zombies.deletes { 1104 // Do not expire zombies that were enqueued after expireLookupsBefore, but 1105 // only if the value is non-zero 1106 if !expireLookupsBefore.IsZero() && zombie.DeletePendingAt.After(expireLookupsBefore) { 1107 continue 1108 } 1109 1110 // If cidr is provided, skip zombies with IPs outside the range 1111 if cidr != nil && !cidr.Contains(zombie.IP.AsSlice()) { 1112 continue 1113 } 1114 1115 // A zombie has multiple names, collect the ones that should remain (i.e. 1116 // do not match nameMatch) 1117 var newNames []string 1118 for _, name := range zombie.Names { 1119 if nameMatch != nil && !nameMatch.MatchString(name) { 1120 newNames = append(newNames, name) 1121 } else { 1122 namesAffected = append(namesAffected, name) 1123 } 1124 } 1125 zombie.Names = newNames 1126 1127 // Delete the zombie outright if no names remain 1128 if len(zombie.Names) == 0 { 1129 toDelete = append(toDelete, zombie) 1130 } 1131 } 1132 1133 // Delete the zombies that are now empty 1134 for _, zombie := range toDelete { 1135 delete(zombies.deletes, zombie.IP) 1136 } 1137 1138 return namesAffected 1139 } 1140 1141 // ForceExpireByNameIP wraps ForceExpire to simplify clearing all IPs from a 1142 // new DNS lookup. 1143 // The error return is for errors compiling the internal regexp. This should 1144 // never happen. 1145 func (zombies *DNSZombieMappings) ForceExpireByNameIP(expireLookupsBefore time.Time, name string, ips ...net.IP) error { 1146 reStr := matchpattern.ToAnchoredRegexp(name) 1147 re, err := re.CompileRegex(reStr) 1148 if err != nil { 1149 return err 1150 } 1151 1152 zombies.Lock() 1153 defer zombies.Unlock() 1154 for _, ip := range ips { 1155 cidr := net.IPNet{Mask: net.CIDRMask(len(ip)*8, len(ip)*8)} 1156 cidr.IP = ip.Mask(cidr.Mask) 1157 zombies.forceExpireLocked(expireLookupsBefore, re, &cidr) 1158 } 1159 return nil 1160 } 1161 1162 // PrefixMatcherFunc is a function passed to (*DNSZombieMappings).DumpAlive, 1163 // called on each zombie to determine whether it should be returned. 1164 type PrefixMatcherFunc func(ip netip.Addr) bool 1165 type NameMatcherFunc func(name string) bool 1166 1167 // DumpAlive returns copies of still-alive zombies matching prefixMatcher. 1168 func (zombies *DNSZombieMappings) DumpAlive(prefixMatcher PrefixMatcherFunc) (alive []*DNSZombieMapping) { 1169 zombies.Lock() 1170 defer zombies.Unlock() 1171 1172 aliveNames := zombies.getAliveNames() 1173 for _, zombie := range zombies.deletes { 1174 if alive, _ := zombies.isZombieAlive(zombie, aliveNames); !alive { 1175 continue 1176 } 1177 // only proceed if zombie is alive and the IP matches the CIDR selector 1178 if prefixMatcher != nil && !prefixMatcher(zombie.IP) { 1179 continue 1180 } 1181 1182 alive = append(alive, zombie.DeepCopy()) 1183 } 1184 1185 return alive 1186 } 1187 1188 // MarshalJSON encodes DNSZombieMappings into JSON. Only the DNSZombieMapping 1189 // entries are encoded. 1190 func (zombies *DNSZombieMappings) MarshalJSON() ([]byte, error) { 1191 zombies.Lock() 1192 defer zombies.Unlock() 1193 1194 // This hackery avoids exposing DNSZombieMappings.deletes as a public field. 1195 // The JSON package cannot serialize private fields so we have to make a 1196 // proxy type here. 1197 aux := struct { 1198 Deletes map[netip.Addr]*DNSZombieMapping `json:"deletes,omitempty"` 1199 }{ 1200 Deletes: zombies.deletes, 1201 } 1202 1203 return json.Marshal(aux) 1204 } 1205 1206 // UnmarshalJSON rebuilds a DNSZombieMappings from serialized JSON. It resets 1207 // the AliveAt timestamps, requiring a CT GC cycle to occur before any zombies 1208 // are deleted (by not being marked alive). 1209 // Note: This is destructive to any currect data 1210 func (zombies *DNSZombieMappings) UnmarshalJSON(raw []byte) error { 1211 zombies.Lock() 1212 defer zombies.Unlock() 1213 1214 // This hackery avoids exposing DNSZombieMappings.deletes as a public field. 1215 // The JSON package cannot deserialize private fields so we have to make a 1216 // proxy type here. 1217 aux := struct { 1218 Deletes map[netip.Addr]*DNSZombieMapping `json:"deletes,omitempty"` 1219 }{ 1220 Deletes: zombies.deletes, 1221 } 1222 if err := json.Unmarshal(raw, &aux); err != nil { 1223 return err 1224 } 1225 zombies.deletes = aux.Deletes 1226 1227 // Reset the alive time & conntrack revision to ensure no deletes happen until we run CT GC again 1228 for _, zombie := range zombies.deletes { 1229 zombie.AliveAt = time.Time{} 1230 zombie.revisionAddedAt = zombies.ctGCRevision 1231 } 1232 return nil 1233 }