go.ligato.io/vpp-agent/v3@v3.5.0/plugins/telemetry/telemetry.go (about) 1 // Copyright (c) 2021 Cisco and/or its affiliates. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at: 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package telemetry 16 17 import ( 18 "context" 19 "fmt" 20 "net/http" 21 "os" 22 "sync" 23 "time" 24 25 "github.com/gorilla/mux" 26 "github.com/pkg/errors" 27 "github.com/unrolled/render" 28 "go.ligato.io/cn-infra/v2/infra" 29 "go.ligato.io/cn-infra/v2/logging" 30 "go.ligato.io/cn-infra/v2/rpc/grpc" 31 prom "go.ligato.io/cn-infra/v2/rpc/prometheus" 32 "go.ligato.io/cn-infra/v2/rpc/rest" 33 "go.ligato.io/cn-infra/v2/servicelabel" 34 35 "go.ligato.io/vpp-agent/v3/pkg/metrics" 36 "go.ligato.io/vpp-agent/v3/pkg/models" 37 "go.ligato.io/vpp-agent/v3/plugins/govppmux" 38 "go.ligato.io/vpp-agent/v3/plugins/telemetry/vppcalls" 39 "go.ligato.io/vpp-agent/v3/plugins/vpp/ifplugin/ifaceidx" 40 "go.ligato.io/vpp-agent/v3/proto/ligato/configurator" 41 42 _ "go.ligato.io/vpp-agent/v3/plugins/telemetry/vppcalls/vpp2101" 43 _ "go.ligato.io/vpp-agent/v3/plugins/telemetry/vppcalls/vpp2106" 44 _ "go.ligato.io/vpp-agent/v3/plugins/telemetry/vppcalls/vpp2202" 45 _ "go.ligato.io/vpp-agent/v3/plugins/telemetry/vppcalls/vpp2210" 46 ) 47 48 var debug = os.Getenv("DEBUG_TELEMETRY") != "" 49 50 // Plugin registers Telemetry Plugin 51 type Plugin struct { 52 Deps 53 54 handler vppcalls.TelemetryVppAPI 55 56 statsPollerServer 57 prometheusMetrics 58 59 // From config file 60 updatePeriod time.Duration 61 disabled bool 62 prometheusDisabled bool 63 skipped map[string]bool 64 65 wg sync.WaitGroup 66 quit chan struct{} 67 } 68 69 type InterfaceIndexProvider interface { 70 // GetInterfaceIndex gives read-only access to map with metadata of all configured 71 // VPP interfaces. 72 GetInterfaceIndex() ifaceidx.IfaceMetadataIndex 73 } 74 75 // Deps represents dependencies of Telemetry Plugin 76 type Deps struct { 77 infra.PluginDeps 78 ServiceLabel servicelabel.ReaderAPI 79 VPP govppmux.API 80 Prometheus prom.API 81 GRPC grpc.Server 82 HTTPHandlers rest.HTTPHandlers 83 IfPlugin InterfaceIndexProvider 84 } 85 86 // Init initializes Telemetry Plugin 87 func (p *Plugin) Init() error { 88 p.quit = make(chan struct{}) 89 p.skipped = make(map[string]bool, 0) 90 91 // Telemetry config file 92 config, err := p.loadConfig() 93 if err != nil { 94 return err 95 } 96 if config != nil { 97 // If telemetry is not enabled, skip plugin initialization 98 if config.Disabled { 99 p.Log.Info("Telemetry plugin disabled via config file") 100 p.disabled = true 101 return nil 102 } 103 // Disable prometheus metrics if set by config 104 if config.PrometheusDisabled { 105 p.Log.Info("Prometheus metrics disabled via config file") 106 p.prometheusDisabled = true 107 } else { 108 // This prevents setting the update period to less than 5 seconds, 109 // which can have significant performance hit. 110 if config.PollingInterval > minimumUpdatePeriod { 111 p.updatePeriod = config.PollingInterval 112 p.Log.Infof("polling period changed to %v", p.updatePeriod) 113 } else if config.PollingInterval > 0 { 114 p.Log.Warnf("polling period has to be at least %s, using default: %v", 115 minimumUpdatePeriod, defaultUpdatePeriod) 116 } 117 // Store map of skipped metrics 118 for _, skip := range config.Skipped { 119 p.skipped[skip] = true 120 } 121 } 122 } 123 124 // Register prometheus 125 if !p.prometheusDisabled { 126 if p.updatePeriod == 0 { 127 p.updatePeriod = defaultUpdatePeriod 128 } 129 if err := p.registerPrometheus(); err != nil { 130 return err 131 } 132 } 133 134 // Setup stats poller 135 p.statsPollerServer.log = p.Log.NewLogger("stats-poller") 136 if err := p.setupStatsPoller(); err != nil { 137 return errors.WithMessage(err, "setting up stats poller failed") 138 } 139 140 if p.HTTPHandlers != nil { 141 p.HTTPHandlers.RegisterHTTPHandler("/metrics/{metric}", metricsHandler, "GET") 142 } 143 144 return nil 145 } 146 147 // AfterInit executes after initializion of Telemetry Plugin 148 func (p *Plugin) AfterInit() error { 149 // Do not start polling if telemetry is disabled 150 if p.disabled || p.prometheusDisabled { 151 return nil 152 } 153 154 p.startPeriodicUpdates() 155 156 return nil 157 } 158 159 func (p *Plugin) setupStatsPoller() error { 160 h := vppcalls.CompatibleTelemetryHandler(p.VPP) 161 if h == nil { 162 p.Log.Warnf("VPP telemetry handler unavailable") 163 } else { 164 p.statsPollerServer.handler = h 165 } 166 p.statsPollerServer.ifIndex = p.IfPlugin.GetInterfaceIndex() 167 168 if p.GRPC != nil && p.GRPC.GetServer() != nil { 169 configurator.RegisterStatsPollerServiceServer(p.GRPC.GetServer(), &p.statsPollerServer) 170 } 171 return nil 172 } 173 174 // Close is used to clean up resources used by Telemetry Plugin 175 func (p *Plugin) Close() error { 176 close(p.quit) 177 p.wg.Wait() 178 return nil 179 } 180 181 func (p *Plugin) startPeriodicUpdates() { 182 p.handler = vppcalls.CompatibleTelemetryHandler(p.VPP) 183 if p.handler == nil { 184 p.Log.Warnf("VPP telemetry handler unavailable, skipping periodic updates") 185 return 186 } 187 188 p.wg.Add(1) 189 go p.periodicUpdates() 190 } 191 192 // periodic updates for the metrics data 193 func (p *Plugin) periodicUpdates() { 194 defer p.wg.Done() 195 196 p.Log.Debugf("starting periodic updates (%v)", p.updatePeriod) 197 defer p.Log.Debugf("stopping periodic updates") 198 199 tick := time.NewTicker(p.updatePeriod) 200 for { 201 select { 202 case <-tick.C: 203 ctx := context.Background() 204 p.updatePrometheus(ctx) 205 206 case <-p.quit: 207 return 208 } 209 } 210 } 211 212 func (p *Plugin) tracef(f string, a ...interface{}) { 213 if debug && p.Log.GetLevel() >= logging.DebugLevel { 214 s := fmt.Sprintf(f, a...) 215 if len(s) > 250 { 216 p.Log.Debugf("%s... (%d bytes omitted) ...%s", s[:200], len(s)-250, s[len(s)-50:]) 217 return 218 } 219 p.Log.Debug(s) 220 } 221 } 222 223 func metricsHandler(formatter *render.Render) http.HandlerFunc { 224 return func(w http.ResponseWriter, req *http.Request) { 225 vars := mux.Vars(req) 226 if vars == nil { 227 _ = formatter.JSON(w, http.StatusNotFound, struct{}{}) 228 return 229 } 230 metric := vars["metric"] 231 model, err := models.DefaultRegistry.GetModel(metric) 232 if err != nil { 233 _ = formatter.JSON(w, http.StatusNotFound, struct{ Error string }{err.Error()}) 234 return 235 } 236 data := model.NewInstance() 237 if err := metrics.Retrieve(data); err != nil { 238 _ = formatter.JSON(w, http.StatusInternalServerError, struct{ Error string }{err.Error()}) 239 return 240 } 241 _ = formatter.JSON(w, 200, data) 242 } 243 }