sigs.k8s.io/external-dns@v0.14.1/controller/controller.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controller
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/prometheus/client_golang/prometheus"
    27  	log "github.com/sirupsen/logrus"
    28  
    29  	"sigs.k8s.io/external-dns/endpoint"
    30  	"sigs.k8s.io/external-dns/plan"
    31  	"sigs.k8s.io/external-dns/provider"
    32  	"sigs.k8s.io/external-dns/registry"
    33  	"sigs.k8s.io/external-dns/source"
    34  )
    35  
    36  var (
    37  	registryErrorsTotal = prometheus.NewCounter(
    38  		prometheus.CounterOpts{
    39  			Namespace: "external_dns",
    40  			Subsystem: "registry",
    41  			Name:      "errors_total",
    42  			Help:      "Number of Registry errors.",
    43  		},
    44  	)
    45  	sourceErrorsTotal = prometheus.NewCounter(
    46  		prometheus.CounterOpts{
    47  			Namespace: "external_dns",
    48  			Subsystem: "source",
    49  			Name:      "errors_total",
    50  			Help:      "Number of Source errors.",
    51  		},
    52  	)
    53  	sourceEndpointsTotal = prometheus.NewGauge(
    54  		prometheus.GaugeOpts{
    55  			Namespace: "external_dns",
    56  			Subsystem: "source",
    57  			Name:      "endpoints_total",
    58  			Help:      "Number of Endpoints in all sources",
    59  		},
    60  	)
    61  	registryEndpointsTotal = prometheus.NewGauge(
    62  		prometheus.GaugeOpts{
    63  			Namespace: "external_dns",
    64  			Subsystem: "registry",
    65  			Name:      "endpoints_total",
    66  			Help:      "Number of Endpoints in the registry",
    67  		},
    68  	)
    69  	lastSyncTimestamp = prometheus.NewGauge(
    70  		prometheus.GaugeOpts{
    71  			Namespace: "external_dns",
    72  			Subsystem: "controller",
    73  			Name:      "last_sync_timestamp_seconds",
    74  			Help:      "Timestamp of last successful sync with the DNS provider",
    75  		},
    76  	)
    77  	lastReconcileTimestamp = prometheus.NewGauge(
    78  		prometheus.GaugeOpts{
    79  			Namespace: "external_dns",
    80  			Subsystem: "controller",
    81  			Name:      "last_reconcile_timestamp_seconds",
    82  			Help:      "Timestamp of last attempted sync with the DNS provider",
    83  		},
    84  	)
    85  	controllerNoChangesTotal = prometheus.NewCounter(
    86  		prometheus.CounterOpts{
    87  			Namespace: "external_dns",
    88  			Subsystem: "controller",
    89  			Name:      "no_op_runs_total",
    90  			Help:      "Number of reconcile loops ending up with no changes on the DNS provider side.",
    91  		},
    92  	)
    93  	deprecatedRegistryErrors = prometheus.NewCounter(
    94  		prometheus.CounterOpts{
    95  			Subsystem: "registry",
    96  			Name:      "errors_total",
    97  			Help:      "Number of Registry errors.",
    98  		},
    99  	)
   100  	deprecatedSourceErrors = prometheus.NewCounter(
   101  		prometheus.CounterOpts{
   102  			Subsystem: "source",
   103  			Name:      "errors_total",
   104  			Help:      "Number of Source errors.",
   105  		},
   106  	)
   107  	registryARecords = prometheus.NewGauge(
   108  		prometheus.GaugeOpts{
   109  			Namespace: "external_dns",
   110  			Subsystem: "registry",
   111  			Name:      "a_records",
   112  			Help:      "Number of Registry A records.",
   113  		},
   114  	)
   115  	registryAAAARecords = prometheus.NewGauge(
   116  		prometheus.GaugeOpts{
   117  			Namespace: "external_dns",
   118  			Subsystem: "registry",
   119  			Name:      "aaaa_records",
   120  			Help:      "Number of Registry AAAA records.",
   121  		},
   122  	)
   123  	sourceARecords = prometheus.NewGauge(
   124  		prometheus.GaugeOpts{
   125  			Namespace: "external_dns",
   126  			Subsystem: "source",
   127  			Name:      "a_records",
   128  			Help:      "Number of Source A records.",
   129  		},
   130  	)
   131  	sourceAAAARecords = prometheus.NewGauge(
   132  		prometheus.GaugeOpts{
   133  			Namespace: "external_dns",
   134  			Subsystem: "source",
   135  			Name:      "aaaa_records",
   136  			Help:      "Number of Source AAAA records.",
   137  		},
   138  	)
   139  	verifiedARecords = prometheus.NewGauge(
   140  		prometheus.GaugeOpts{
   141  			Namespace: "external_dns",
   142  			Subsystem: "controller",
   143  			Name:      "verified_a_records",
   144  			Help:      "Number of DNS A-records that exists both in source and registry.",
   145  		},
   146  	)
   147  	verifiedAAAARecords = prometheus.NewGauge(
   148  		prometheus.GaugeOpts{
   149  			Namespace: "external_dns",
   150  			Subsystem: "controller",
   151  			Name:      "verified_aaaa_records",
   152  			Help:      "Number of DNS AAAA-records that exists both in source and registry.",
   153  		},
   154  	)
   155  )
   156  
   157  func init() {
   158  	prometheus.MustRegister(registryErrorsTotal)
   159  	prometheus.MustRegister(sourceErrorsTotal)
   160  	prometheus.MustRegister(sourceEndpointsTotal)
   161  	prometheus.MustRegister(registryEndpointsTotal)
   162  	prometheus.MustRegister(lastSyncTimestamp)
   163  	prometheus.MustRegister(lastReconcileTimestamp)
   164  	prometheus.MustRegister(deprecatedRegistryErrors)
   165  	prometheus.MustRegister(deprecatedSourceErrors)
   166  	prometheus.MustRegister(controllerNoChangesTotal)
   167  	prometheus.MustRegister(registryARecords)
   168  	prometheus.MustRegister(registryAAAARecords)
   169  	prometheus.MustRegister(sourceARecords)
   170  	prometheus.MustRegister(sourceAAAARecords)
   171  	prometheus.MustRegister(verifiedARecords)
   172  	prometheus.MustRegister(verifiedAAAARecords)
   173  }
   174  
   175  // Controller is responsible for orchestrating the different components.
   176  // It works in the following way:
   177  // * Ask the DNS provider for current list of endpoints.
   178  // * Ask the Source for the desired list of endpoints.
   179  // * Take both lists and calculate a Plan to move current towards desired state.
   180  // * Tell the DNS provider to apply the changes calculated by the Plan.
   181  type Controller struct {
   182  	Source   source.Source
   183  	Registry registry.Registry
   184  	// The policy that defines which changes to DNS records are allowed
   185  	Policy plan.Policy
   186  	// The interval between individual synchronizations
   187  	Interval time.Duration
   188  	// The DomainFilter defines which DNS records to keep or exclude
   189  	DomainFilter endpoint.DomainFilter
   190  	// The nextRunAt used for throttling and batching reconciliation
   191  	nextRunAt time.Time
   192  	// The nextRunAtMux is for atomic updating of nextRunAt
   193  	nextRunAtMux sync.Mutex
   194  	// MangedRecordTypes are DNS record types that will be considered for management.
   195  	ManagedRecordTypes []string
   196  	// ExcludeRecordTypes are DNS record types that will be excluded from management.
   197  	ExcludeRecordTypes []string
   198  	// MinEventSyncInterval is used as window for batching events
   199  	MinEventSyncInterval time.Duration
   200  }
   201  
   202  // RunOnce runs a single iteration of a reconciliation loop.
   203  func (c *Controller) RunOnce(ctx context.Context) error {
   204  	lastReconcileTimestamp.SetToCurrentTime()
   205  
   206  	records, err := c.Registry.Records(ctx)
   207  	if err != nil {
   208  		registryErrorsTotal.Inc()
   209  		deprecatedRegistryErrors.Inc()
   210  		return err
   211  	}
   212  
   213  	registryEndpointsTotal.Set(float64(len(records)))
   214  	regARecords, regAAAARecords := countAddressRecords(records)
   215  	registryARecords.Set(float64(regARecords))
   216  	registryAAAARecords.Set(float64(regAAAARecords))
   217  	ctx = context.WithValue(ctx, provider.RecordsContextKey, records)
   218  
   219  	endpoints, err := c.Source.Endpoints(ctx)
   220  	if err != nil {
   221  		sourceErrorsTotal.Inc()
   222  		deprecatedSourceErrors.Inc()
   223  		return err
   224  	}
   225  	sourceEndpointsTotal.Set(float64(len(endpoints)))
   226  	srcARecords, srcAAAARecords := countAddressRecords(endpoints)
   227  	sourceARecords.Set(float64(srcARecords))
   228  	sourceAAAARecords.Set(float64(srcAAAARecords))
   229  	vARecords, vAAAARecords := countMatchingAddressRecords(endpoints, records)
   230  	verifiedARecords.Set(float64(vARecords))
   231  	verifiedAAAARecords.Set(float64(vAAAARecords))
   232  	endpoints, err = c.Registry.AdjustEndpoints(endpoints)
   233  	if err != nil {
   234  		return fmt.Errorf("adjusting endpoints: %w", err)
   235  	}
   236  	registryFilter := c.Registry.GetDomainFilter()
   237  
   238  	plan := &plan.Plan{
   239  		Policies:       []plan.Policy{c.Policy},
   240  		Current:        records,
   241  		Desired:        endpoints,
   242  		DomainFilter:   endpoint.MatchAllDomainFilters{&c.DomainFilter, &registryFilter},
   243  		ManagedRecords: c.ManagedRecordTypes,
   244  		ExcludeRecords: c.ExcludeRecordTypes,
   245  		OwnerID:        c.Registry.OwnerID(),
   246  	}
   247  
   248  	plan = plan.Calculate()
   249  
   250  	if plan.Changes.HasChanges() {
   251  		err = c.Registry.ApplyChanges(ctx, plan.Changes)
   252  		if err != nil {
   253  			registryErrorsTotal.Inc()
   254  			deprecatedRegistryErrors.Inc()
   255  			return err
   256  		}
   257  	} else {
   258  		controllerNoChangesTotal.Inc()
   259  		log.Info("All records are already up to date")
   260  	}
   261  
   262  	lastSyncTimestamp.SetToCurrentTime()
   263  
   264  	return nil
   265  }
   266  
   267  // Counts the intersections of A and AAAA records in endpoint and registry.
   268  func countMatchingAddressRecords(endpoints []*endpoint.Endpoint, registryRecords []*endpoint.Endpoint) (int, int) {
   269  	recordsMap := make(map[string]map[string]struct{})
   270  	for _, regRecord := range registryRecords {
   271  		if _, found := recordsMap[regRecord.DNSName]; !found {
   272  			recordsMap[regRecord.DNSName] = make(map[string]struct{})
   273  		}
   274  		recordsMap[regRecord.DNSName][regRecord.RecordType] = struct{}{}
   275  	}
   276  	aCount := 0
   277  	aaaaCount := 0
   278  	for _, sourceRecord := range endpoints {
   279  		if _, found := recordsMap[sourceRecord.DNSName]; found {
   280  			if _, found := recordsMap[sourceRecord.DNSName][sourceRecord.RecordType]; found {
   281  				switch sourceRecord.RecordType {
   282  				case endpoint.RecordTypeA:
   283  					aCount++
   284  				case endpoint.RecordTypeAAAA:
   285  					aaaaCount++
   286  				}
   287  			}
   288  		}
   289  	}
   290  	return aCount, aaaaCount
   291  }
   292  
   293  func countAddressRecords(endpoints []*endpoint.Endpoint) (int, int) {
   294  	aCount := 0
   295  	aaaaCount := 0
   296  	for _, endPoint := range endpoints {
   297  		switch endPoint.RecordType {
   298  		case endpoint.RecordTypeA:
   299  			aCount++
   300  		case endpoint.RecordTypeAAAA:
   301  			aaaaCount++
   302  		}
   303  	}
   304  	return aCount, aaaaCount
   305  }
   306  
   307  // ScheduleRunOnce makes sure execution happens at most once per interval.
   308  func (c *Controller) ScheduleRunOnce(now time.Time) {
   309  	c.nextRunAtMux.Lock()
   310  	defer c.nextRunAtMux.Unlock()
   311  	// schedule only if a reconciliation is not already planned
   312  	// to happen in the following c.MinEventSyncInterval
   313  	if !c.nextRunAt.Before(now.Add(c.MinEventSyncInterval)) {
   314  		c.nextRunAt = now.Add(c.MinEventSyncInterval)
   315  	}
   316  }
   317  
   318  func (c *Controller) ShouldRunOnce(now time.Time) bool {
   319  	c.nextRunAtMux.Lock()
   320  	defer c.nextRunAtMux.Unlock()
   321  	if now.Before(c.nextRunAt) {
   322  		return false
   323  	}
   324  	c.nextRunAt = now.Add(c.Interval)
   325  	return true
   326  }
   327  
   328  // Run runs RunOnce in a loop with a delay until context is canceled
   329  func (c *Controller) Run(ctx context.Context) {
   330  	ticker := time.NewTicker(time.Second)
   331  	defer ticker.Stop()
   332  	for {
   333  		if c.ShouldRunOnce(time.Now()) {
   334  			if err := c.RunOnce(ctx); err != nil {
   335  				if errors.Is(err, provider.SoftError) {
   336  					log.Errorf("Failed to do run once: %v", err)
   337  				} else {
   338  					log.Fatalf("Failed to do run once: %v", err)
   339  				}
   340  			}
   341  		}
   342  		select {
   343  		case <-ticker.C:
   344  		case <-ctx.Done():
   345  			log.Info("Terminating main controller loop")
   346  			return
   347  		}
   348  	}
   349  }