go.ligato.io/vpp-agent/v3@v3.5.0/plugins/telemetry/telemetry.go (about)

     1  //  Copyright (c) 2021 Cisco and/or its affiliates.
     2  //
     3  //  Licensed under the Apache License, Version 2.0 (the "License");
     4  //  you may not use this file except in compliance with the License.
     5  //  You may obtain a copy of the License at:
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  //  Unless required by applicable law or agreed to in writing, software
    10  //  distributed under the License is distributed on an "AS IS" BASIS,
    11  //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  //  See the License for the specific language governing permissions and
    13  //  limitations under the License.
    14  
    15  package telemetry
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"net/http"
    21  	"os"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/gorilla/mux"
    26  	"github.com/pkg/errors"
    27  	"github.com/unrolled/render"
    28  	"go.ligato.io/cn-infra/v2/infra"
    29  	"go.ligato.io/cn-infra/v2/logging"
    30  	"go.ligato.io/cn-infra/v2/rpc/grpc"
    31  	prom "go.ligato.io/cn-infra/v2/rpc/prometheus"
    32  	"go.ligato.io/cn-infra/v2/rpc/rest"
    33  	"go.ligato.io/cn-infra/v2/servicelabel"
    34  
    35  	"go.ligato.io/vpp-agent/v3/pkg/metrics"
    36  	"go.ligato.io/vpp-agent/v3/pkg/models"
    37  	"go.ligato.io/vpp-agent/v3/plugins/govppmux"
    38  	"go.ligato.io/vpp-agent/v3/plugins/telemetry/vppcalls"
    39  	"go.ligato.io/vpp-agent/v3/plugins/vpp/ifplugin/ifaceidx"
    40  	"go.ligato.io/vpp-agent/v3/proto/ligato/configurator"
    41  
    42  	_ "go.ligato.io/vpp-agent/v3/plugins/telemetry/vppcalls/vpp2101"
    43  	_ "go.ligato.io/vpp-agent/v3/plugins/telemetry/vppcalls/vpp2106"
    44  	_ "go.ligato.io/vpp-agent/v3/plugins/telemetry/vppcalls/vpp2202"
    45  	_ "go.ligato.io/vpp-agent/v3/plugins/telemetry/vppcalls/vpp2210"
    46  )
    47  
    48  var debug = os.Getenv("DEBUG_TELEMETRY") != ""
    49  
    50  // Plugin registers Telemetry Plugin
    51  type Plugin struct {
    52  	Deps
    53  
    54  	handler vppcalls.TelemetryVppAPI
    55  
    56  	statsPollerServer
    57  	prometheusMetrics
    58  
    59  	// From config file
    60  	updatePeriod       time.Duration
    61  	disabled           bool
    62  	prometheusDisabled bool
    63  	skipped            map[string]bool
    64  
    65  	wg   sync.WaitGroup
    66  	quit chan struct{}
    67  }
    68  
    69  type InterfaceIndexProvider interface {
    70  	// GetInterfaceIndex gives read-only access to map with metadata of all configured
    71  	// VPP interfaces.
    72  	GetInterfaceIndex() ifaceidx.IfaceMetadataIndex
    73  }
    74  
    75  // Deps represents dependencies of Telemetry Plugin
    76  type Deps struct {
    77  	infra.PluginDeps
    78  	ServiceLabel servicelabel.ReaderAPI
    79  	VPP          govppmux.API
    80  	Prometheus   prom.API
    81  	GRPC         grpc.Server
    82  	HTTPHandlers rest.HTTPHandlers
    83  	IfPlugin     InterfaceIndexProvider
    84  }
    85  
    86  // Init initializes Telemetry Plugin
    87  func (p *Plugin) Init() error {
    88  	p.quit = make(chan struct{})
    89  	p.skipped = make(map[string]bool, 0)
    90  
    91  	// Telemetry config file
    92  	config, err := p.loadConfig()
    93  	if err != nil {
    94  		return err
    95  	}
    96  	if config != nil {
    97  		// If telemetry is not enabled, skip plugin initialization
    98  		if config.Disabled {
    99  			p.Log.Info("Telemetry plugin disabled via config file")
   100  			p.disabled = true
   101  			return nil
   102  		}
   103  		// Disable prometheus metrics if set by config
   104  		if config.PrometheusDisabled {
   105  			p.Log.Info("Prometheus metrics disabled via config file")
   106  			p.prometheusDisabled = true
   107  		} else {
   108  			// This prevents setting the update period to less than 5 seconds,
   109  			// which can have significant performance hit.
   110  			if config.PollingInterval > minimumUpdatePeriod {
   111  				p.updatePeriod = config.PollingInterval
   112  				p.Log.Infof("polling period changed to %v", p.updatePeriod)
   113  			} else if config.PollingInterval > 0 {
   114  				p.Log.Warnf("polling period has to be at least %s, using default: %v",
   115  					minimumUpdatePeriod, defaultUpdatePeriod)
   116  			}
   117  			// Store map of skipped metrics
   118  			for _, skip := range config.Skipped {
   119  				p.skipped[skip] = true
   120  			}
   121  		}
   122  	}
   123  
   124  	// Register prometheus
   125  	if !p.prometheusDisabled {
   126  		if p.updatePeriod == 0 {
   127  			p.updatePeriod = defaultUpdatePeriod
   128  		}
   129  		if err := p.registerPrometheus(); err != nil {
   130  			return err
   131  		}
   132  	}
   133  
   134  	// Setup stats poller
   135  	p.statsPollerServer.log = p.Log.NewLogger("stats-poller")
   136  	if err := p.setupStatsPoller(); err != nil {
   137  		return errors.WithMessage(err, "setting up stats poller failed")
   138  	}
   139  
   140  	if p.HTTPHandlers != nil {
   141  		p.HTTPHandlers.RegisterHTTPHandler("/metrics/{metric}", metricsHandler, "GET")
   142  	}
   143  
   144  	return nil
   145  }
   146  
   147  // AfterInit executes after initializion of Telemetry Plugin
   148  func (p *Plugin) AfterInit() error {
   149  	// Do not start polling if telemetry is disabled
   150  	if p.disabled || p.prometheusDisabled {
   151  		return nil
   152  	}
   153  
   154  	p.startPeriodicUpdates()
   155  
   156  	return nil
   157  }
   158  
   159  func (p *Plugin) setupStatsPoller() error {
   160  	h := vppcalls.CompatibleTelemetryHandler(p.VPP)
   161  	if h == nil {
   162  		p.Log.Warnf("VPP telemetry handler unavailable")
   163  	} else {
   164  		p.statsPollerServer.handler = h
   165  	}
   166  	p.statsPollerServer.ifIndex = p.IfPlugin.GetInterfaceIndex()
   167  
   168  	if p.GRPC != nil && p.GRPC.GetServer() != nil {
   169  		configurator.RegisterStatsPollerServiceServer(p.GRPC.GetServer(), &p.statsPollerServer)
   170  	}
   171  	return nil
   172  }
   173  
   174  // Close is used to clean up resources used by Telemetry Plugin
   175  func (p *Plugin) Close() error {
   176  	close(p.quit)
   177  	p.wg.Wait()
   178  	return nil
   179  }
   180  
   181  func (p *Plugin) startPeriodicUpdates() {
   182  	p.handler = vppcalls.CompatibleTelemetryHandler(p.VPP)
   183  	if p.handler == nil {
   184  		p.Log.Warnf("VPP telemetry handler unavailable, skipping periodic updates")
   185  		return
   186  	}
   187  
   188  	p.wg.Add(1)
   189  	go p.periodicUpdates()
   190  }
   191  
   192  // periodic updates for the metrics data
   193  func (p *Plugin) periodicUpdates() {
   194  	defer p.wg.Done()
   195  
   196  	p.Log.Debugf("starting periodic updates (%v)", p.updatePeriod)
   197  	defer p.Log.Debugf("stopping periodic updates")
   198  
   199  	tick := time.NewTicker(p.updatePeriod)
   200  	for {
   201  		select {
   202  		case <-tick.C:
   203  			ctx := context.Background()
   204  			p.updatePrometheus(ctx)
   205  
   206  		case <-p.quit:
   207  			return
   208  		}
   209  	}
   210  }
   211  
   212  func (p *Plugin) tracef(f string, a ...interface{}) {
   213  	if debug && p.Log.GetLevel() >= logging.DebugLevel {
   214  		s := fmt.Sprintf(f, a...)
   215  		if len(s) > 250 {
   216  			p.Log.Debugf("%s... (%d bytes omitted) ...%s", s[:200], len(s)-250, s[len(s)-50:])
   217  			return
   218  		}
   219  		p.Log.Debug(s)
   220  	}
   221  }
   222  
   223  func metricsHandler(formatter *render.Render) http.HandlerFunc {
   224  	return func(w http.ResponseWriter, req *http.Request) {
   225  		vars := mux.Vars(req)
   226  		if vars == nil {
   227  			_ = formatter.JSON(w, http.StatusNotFound, struct{}{})
   228  			return
   229  		}
   230  		metric := vars["metric"]
   231  		model, err := models.DefaultRegistry.GetModel(metric)
   232  		if err != nil {
   233  			_ = formatter.JSON(w, http.StatusNotFound, struct{ Error string }{err.Error()})
   234  			return
   235  		}
   236  		data := model.NewInstance()
   237  		if err := metrics.Retrieve(data); err != nil {
   238  			_ = formatter.JSON(w, http.StatusInternalServerError, struct{ Error string }{err.Error()})
   239  			return
   240  		}
   241  		_ = formatter.JSON(w, 200, data)
   242  	}
   243  }