github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/runtime/grpc/grpc-runtime.go (about) 1 // Copyright 2023-2024 The Inspektor Gadget authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package grpcruntime 16 17 import ( 18 "context" 19 _ "embed" 20 "errors" 21 "fmt" 22 "io" 23 "net" 24 "net/url" 25 "strings" 26 "sync" 27 "time" 28 29 log "github.com/sirupsen/logrus" 30 "google.golang.org/grpc" 31 "google.golang.org/grpc/credentials/insecure" 32 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 "k8s.io/client-go/kubernetes" 34 "k8s.io/client-go/rest" 35 36 "github.com/inspektor-gadget/inspektor-gadget/internal/deployinfo" 37 "github.com/inspektor-gadget/inspektor-gadget/pkg/gadget-service/api" 38 "github.com/inspektor-gadget/inspektor-gadget/pkg/gadgets" 39 "github.com/inspektor-gadget/inspektor-gadget/pkg/logger" 40 "github.com/inspektor-gadget/inspektor-gadget/pkg/operators" 41 "github.com/inspektor-gadget/inspektor-gadget/pkg/params" 42 "github.com/inspektor-gadget/inspektor-gadget/pkg/runtime" 43 ) 44 45 type ConnectionMode int 46 47 const ( 48 // ConnectionModeDirect will connect directly to the remote using the gRPC protocol; the remote side can either 49 // be a tcp or a unix socket endpoint 50 ConnectionModeDirect ConnectionMode = iota 51 52 // ConnectionModeKubernetesProxy will connect to a gRPC endpoint through a kubernetes API server by first looking 53 // up an appropriate target node using the kubernetes API, then using the port forward 54 // endpoint of the Kubernetes API to forward the gRPC connection to the service listener (see gadgettracermgr). 55 ConnectionModeKubernetesProxy 56 ) 57 58 const ( 59 ParamNode = "node" 60 ParamRemoteAddress = "remote-address" 61 ParamConnectionMethod = "connection-method" 62 ParamConnectionTimeout = "connection-timeout" 63 64 // ParamGadgetServiceTCPPort is only used in combination with KubernetesProxyConnectionMethodTCP 65 ParamGadgetServiceTCPPort = "tcp-port" 66 67 // ConnectTimeout is the time in seconds we wait for a connection to the remote to 68 // succeed 69 ConnectTimeout = 5 70 71 // ResultTimeout is the time in seconds we wait for a result to return from the gadget 72 // after sending a Stop command 73 ResultTimeout = 30 74 75 ParamGadgetNamespace string = "gadget-namespace" 76 DefaultGadgetNamespace string = "gadget" 77 ) 78 79 type Runtime struct { 80 info *deployinfo.DeployInfo 81 defaultValues map[string]string 82 globalParams *params.Params 83 restConfig *rest.Config 84 connectionMode ConnectionMode 85 } 86 87 type RunClient interface { 88 Recv() (*api.GadgetEvent, error) 89 } 90 91 // New instantiates the runtime and loads the locally stored gadget info. If no info is stored locally, 92 // it will try to fetch one from one of the gadget nodes and store it locally. It will issue warnings on 93 // failures. 94 func New(options ...Option) *Runtime { 95 r := &Runtime{ 96 defaultValues: map[string]string{}, 97 } 98 for _, option := range options { 99 option(r) 100 } 101 return r 102 } 103 104 func (r *Runtime) Init(runtimeGlobalParams *params.Params) error { 105 if runtimeGlobalParams == nil { 106 runtimeGlobalParams = r.GlobalParamDescs().ToParams() 107 } 108 109 // overwrite only if not yet initialized; for gadgetctl, this initialization happens 110 // already in the main.go to specify a target address 111 if r.globalParams == nil { 112 r.globalParams = runtimeGlobalParams 113 } 114 return nil 115 } 116 117 func (r *Runtime) SetRestConfig(config *rest.Config) { 118 r.restConfig = config 119 } 120 121 func (r *Runtime) Close() error { 122 return nil 123 } 124 125 func checkForDuplicates(subject string) func(value string) error { 126 return func(value string) error { 127 values := strings.Split(value, ",") 128 valueMap := make(map[string]struct{}) 129 for _, v := range values { 130 if _, ok := valueMap[v]; ok { 131 return fmt.Errorf("duplicate %s: %s", subject, v) 132 } 133 valueMap[v] = struct{}{} 134 } 135 return nil 136 } 137 } 138 139 func (r *Runtime) ParamDescs() params.ParamDescs { 140 p := params.ParamDescs{} 141 switch r.connectionMode { 142 case ConnectionModeDirect: 143 return p 144 case ConnectionModeKubernetesProxy: 145 p.Add(params.ParamDescs{ 146 { 147 Key: ParamNode, 148 Description: "Comma-separated list of nodes to run the gadget on", 149 Validator: checkForDuplicates("node"), 150 }, 151 }...) 152 return p 153 } 154 panic("invalid connection mode set for grpc-runtime") 155 } 156 157 func (r *Runtime) GlobalParamDescs() params.ParamDescs { 158 p := params.ParamDescs{ 159 { 160 Key: ParamConnectionTimeout, 161 Description: "Maximum time to establish a connection to remote target in seconds", 162 DefaultValue: fmt.Sprintf("%d", ConnectTimeout), 163 TypeHint: params.TypeUint16, 164 }, 165 } 166 switch r.connectionMode { 167 case ConnectionModeDirect: 168 p.Add(params.ParamDescs{ 169 { 170 Key: ParamRemoteAddress, 171 Description: "Comma-separated list of remote address (gRPC) to connect to", 172 DefaultValue: api.DefaultDaemonPath, 173 Validator: checkForDuplicates("address"), 174 }, 175 }...) 176 return p 177 case ConnectionModeKubernetesProxy: 178 p.Add(params.ParamDescs{ 179 { 180 Key: ParamGadgetServiceTCPPort, 181 Description: "Port used to connect to the gadget service", 182 DefaultValue: fmt.Sprintf("%d", api.GadgetServicePort), 183 TypeHint: params.TypeUint16, 184 }, 185 { 186 Key: ParamGadgetNamespace, 187 Description: "Namespace where the Inspektor Gadget is deployed", 188 DefaultValue: DefaultGadgetNamespace, 189 TypeHint: params.TypeString, 190 }, 191 }...) 192 return p 193 } 194 panic("invalid connection mode set for grpc-runtime") 195 } 196 197 type target struct { 198 addressOrPod string 199 node string 200 } 201 202 func getGadgetPods(ctx context.Context, config *rest.Config, nodes []string, gadgetNamespace string) ([]target, error) { 203 client, err := kubernetes.NewForConfig(config) 204 if err != nil { 205 return nil, fmt.Errorf("setting up trace client: %w", err) 206 } 207 208 opts := metav1.ListOptions{LabelSelector: "k8s-app=gadget"} 209 pods, err := client.CoreV1().Pods(gadgetNamespace).List(ctx, opts) 210 if err != nil { 211 return nil, fmt.Errorf("getting pods: %w", err) 212 } 213 214 if len(pods.Items) == 0 { 215 return nil, fmt.Errorf("no gadget pods found in namespace %q. Is Inspektor Gadget deployed?", gadgetNamespace) 216 } 217 218 if len(nodes) == 0 { 219 res := make([]target, 0, len(pods.Items)) 220 221 for _, pod := range pods.Items { 222 res = append(res, target{addressOrPod: pod.Name, node: pod.Spec.NodeName}) 223 } 224 225 return res, nil 226 } 227 228 res := make([]target, 0, len(nodes)) 229 nodesLoop: 230 for _, node := range nodes { 231 for _, pod := range pods.Items { 232 if node == pod.Spec.NodeName { 233 res = append(res, target{addressOrPod: pod.Name, node: node}) 234 continue nodesLoop 235 } 236 } 237 return nil, fmt.Errorf("node %q does not have a gadget pod", node) 238 } 239 240 return res, nil 241 } 242 243 func (r *Runtime) getTargets(ctx context.Context, params *params.Params) ([]target, error) { 244 switch r.connectionMode { 245 case ConnectionModeKubernetesProxy: 246 // Get nodes to run on 247 nodes := params.Get(ParamNode).AsStringSlice() 248 gadgetNamespace := r.globalParams.Get(ParamGadgetNamespace).AsString() 249 pods, err := getGadgetPods(ctx, r.restConfig, nodes, gadgetNamespace) 250 if err != nil { 251 return nil, fmt.Errorf("get gadget pods: %w", err) 252 } 253 if len(pods) == 0 { 254 return nil, fmt.Errorf("get gadget pods: Inspektor Gadget is not running on the requested node(s): %v", nodes) 255 } 256 return pods, nil 257 case ConnectionModeDirect: 258 inTargets := r.globalParams.Get(ParamRemoteAddress).AsStringSlice() 259 targets := make([]target, 0) 260 for _, t := range inTargets { 261 purl, err := url.Parse(t) 262 if err != nil { 263 return nil, fmt.Errorf("invalid remote address %q: %w", t, err) 264 } 265 tg := target{ 266 addressOrPod: purl.Host, 267 node: purl.Hostname(), 268 } 269 if purl.Scheme == "unix" { 270 // use the whole url in case of a unix socket and "local" as node 271 tg.addressOrPod = t 272 tg.node = "local" 273 } 274 targets = append(targets, tg) 275 } 276 return targets, nil 277 } 278 return nil, fmt.Errorf("unsupported connection mode") 279 } 280 281 func (r *Runtime) RunBuiltInGadget(gadgetCtx runtime.GadgetContext) (runtime.CombinedGadgetResult, error) { 282 paramMap := make(map[string]string) 283 gadgets.ParamsToMap( 284 paramMap, 285 gadgetCtx.GadgetParams(), 286 gadgetCtx.RuntimeParams(), 287 gadgetCtx.OperatorsParamCollection(), 288 ) 289 290 gadgetCtx.Logger().Debugf("Params") 291 for k, v := range paramMap { 292 gadgetCtx.Logger().Debugf("- %s: %q", k, v) 293 } 294 295 targets, err := r.getTargets(gadgetCtx.Context(), gadgetCtx.RuntimeParams()) 296 if err != nil { 297 return nil, fmt.Errorf("getting target nodes: %w", err) 298 } 299 return r.runBuiltInGadgetOnTargets(gadgetCtx, paramMap, targets) 300 } 301 302 func (r *Runtime) getConnToRandomTarget(ctx context.Context, runtimeParams *params.Params) (*grpc.ClientConn, error) { 303 targets, err := r.getTargets(ctx, runtimeParams) 304 if err != nil { 305 return nil, err 306 } 307 if len(targets) == 0 { 308 return nil, fmt.Errorf("no valid targets") 309 } 310 target := targets[0] 311 log.Debugf("using target %q (%q)", target.addressOrPod, target.node) 312 313 timeout := time.Second * time.Duration(r.globalParams.Get(ParamConnectionTimeout).AsUint16()) 314 conn, err := r.dialContext(ctx, target, timeout) 315 if err != nil { 316 return nil, fmt.Errorf("dialing %q (%q): %w", target.addressOrPod, target.node, err) 317 } 318 return conn, nil 319 } 320 321 func (r *Runtime) runBuiltInGadgetOnTargets( 322 gadgetCtx runtime.GadgetContext, 323 paramMap map[string]string, 324 targets []target, 325 ) (runtime.CombinedGadgetResult, error) { 326 gType := gadgetCtx.GadgetDesc().Type() 327 328 if gType == gadgets.TypeTraceIntervals { 329 gadgetCtx.Parser().EnableSnapshots( 330 gadgetCtx.Context(), 331 time.Duration(gadgetCtx.GadgetParams().Get(gadgets.ParamInterval).AsInt32())*time.Second, 332 2, 333 ) 334 defer gadgetCtx.Parser().Flush() 335 } 336 337 if gType == gadgets.TypeOneShot { 338 gadgetCtx.Parser().EnableCombiner() 339 defer gadgetCtx.Parser().Flush() 340 } 341 342 results := make(runtime.CombinedGadgetResult) 343 var resultsLock sync.Mutex 344 345 wg := sync.WaitGroup{} 346 for _, t := range targets { 347 wg.Add(1) 348 go func(target target) { 349 gadgetCtx.Logger().Debugf("running gadget on node %q", target.node) 350 res, err := r.runBuiltInGadget(gadgetCtx, target, paramMap) 351 resultsLock.Lock() 352 results[target.node] = &runtime.GadgetResult{ 353 Payload: res, 354 Error: err, 355 } 356 resultsLock.Unlock() 357 wg.Done() 358 }(t) 359 } 360 361 wg.Wait() 362 return results, results.Err() 363 } 364 365 func (r *Runtime) dialContext(dialCtx context.Context, target target, timeout time.Duration) (*grpc.ClientConn, error) { 366 opts := []grpc.DialOption{ 367 grpc.WithTransportCredentials(insecure.NewCredentials()), 368 grpc.WithBlock(), 369 } 370 371 // If we're in Kubernetes connection mode, we need a custom dialer 372 if r.connectionMode == ConnectionModeKubernetesProxy { 373 opts = append(opts, grpc.WithContextDialer(func(ctx context.Context, s string) (net.Conn, error) { 374 port := r.globalParams.Get(ParamGadgetServiceTCPPort).AsUint16() 375 gadgetNamespace := r.globalParams.Get(ParamGadgetNamespace).AsString() 376 return NewK8SPortFwdConn(ctx, r.restConfig, gadgetNamespace, target, port, timeout) 377 })) 378 } else { 379 newCtx, cancel := context.WithTimeout(dialCtx, timeout) 380 defer cancel() 381 dialCtx = newCtx 382 } 383 384 conn, err := grpc.DialContext(dialCtx, "passthrough:///"+target.addressOrPod, opts...) 385 if err != nil { 386 return nil, fmt.Errorf("dialing %q (%q): %w", target.addressOrPod, target.node, err) 387 } 388 return conn, nil 389 } 390 391 func (r *Runtime) runBuiltInGadget(gadgetCtx runtime.GadgetContext, target target, allParams map[string]string) ([]byte, error) { 392 // Notice that we cannot use gadgetCtx.Context() here, as that would - when cancelled by the user - also cancel the 393 // underlying gRPC connection. That would then lead to results not being received anymore (mostly for profile 394 // gadgets.) 395 connCtx, cancel := context.WithCancel(context.Background()) 396 defer cancel() 397 398 timeout := time.Second * time.Duration(r.globalParams.Get(ParamConnectionTimeout).AsUint16()) 399 dialCtx, cancelDial := context.WithTimeout(gadgetCtx.Context(), timeout) 400 defer cancelDial() 401 402 conn, err := r.dialContext(dialCtx, target, timeout) 403 if err != nil { 404 return nil, fmt.Errorf("dialing target on node %q: %w", target.node, err) 405 } 406 defer conn.Close() 407 client := api.NewBuiltInGadgetManagerClient(conn) 408 409 runRequest := &api.BuiltInGadgetRunRequest{ 410 GadgetName: gadgetCtx.GadgetDesc().Name(), 411 GadgetCategory: gadgetCtx.GadgetDesc().Category(), 412 Params: allParams, 413 Args: gadgetCtx.Args(), 414 Nodes: nil, 415 FanOut: false, 416 LogLevel: uint32(gadgetCtx.Logger().GetLevel()), 417 Timeout: int64(gadgetCtx.Timeout()), 418 } 419 420 runClient, err := client.RunBuiltInGadget(connCtx) 421 if err != nil && !errors.Is(err, context.Canceled) { 422 return nil, err 423 } 424 425 controlRequest := &api.BuiltInGadgetControlRequest{Event: &api.BuiltInGadgetControlRequest_RunRequest{RunRequest: runRequest}} 426 err = runClient.Send(controlRequest) 427 if err != nil { 428 return nil, err 429 } 430 431 parser := gadgetCtx.Parser() 432 433 jsonHandler := func([]byte) {} 434 jsonArrayHandler := func([]byte) {} 435 436 if parser != nil { 437 var enrichers []func(any) error 438 ev := gadgetCtx.GadgetDesc().EventPrototype() 439 if _, ok := ev.(operators.NodeSetter); ok { 440 enrichers = append(enrichers, func(ev any) error { 441 ev.(operators.NodeSetter).SetNode(target.node) 442 return nil 443 }) 444 } 445 446 jsonHandler = parser.JSONHandlerFunc(enrichers...) 447 jsonArrayHandler = parser.JSONHandlerFuncArray(target.node, enrichers...) 448 } 449 450 doneChan := make(chan error) 451 452 var result []byte 453 expectedSeq := uint32(1) 454 455 go func() { 456 for { 457 ev, err := runClient.Recv() 458 if err != nil { 459 gadgetCtx.Logger().Debugf("%-20s | runClient returned with %v", target.node, err) 460 if !errors.Is(err, io.EOF) { 461 doneChan <- err 462 return 463 } 464 doneChan <- nil 465 return 466 } 467 switch ev.Type { 468 case api.EventTypeGadgetPayload: 469 if expectedSeq != ev.Seq { 470 gadgetCtx.Logger().Warnf("%-20s | expected seq %d, got %d, %d messages dropped", target.node, expectedSeq, ev.Seq, ev.Seq-expectedSeq) 471 } 472 expectedSeq = ev.Seq + 1 473 if len(ev.Payload) > 0 && ev.Payload[0] == '[' { 474 jsonArrayHandler(ev.Payload) 475 continue 476 } 477 jsonHandler(ev.Payload) 478 case api.EventTypeGadgetResult: 479 gadgetCtx.Logger().Debugf("%-20s | got result from server", target.node) 480 result = ev.Payload 481 case api.EventTypeGadgetJobID: // not needed right now 482 default: 483 if ev.Type >= 1<<api.EventLogShift { 484 gadgetCtx.Logger().Log(logger.Level(ev.Type>>api.EventLogShift), fmt.Sprintf("%-20s | %s", target.node, string(ev.Payload))) 485 continue 486 } 487 gadgetCtx.Logger().Warnf("unknown payload type %d: %s", ev.Type, ev.Payload) 488 } 489 } 490 }() 491 492 var runErr error 493 select { 494 case doneErr := <-doneChan: 495 gadgetCtx.Logger().Debugf("%-20s | done from server side (%v)", target.node, doneErr) 496 runErr = doneErr 497 case <-gadgetCtx.Context().Done(): 498 // Send stop request 499 gadgetCtx.Logger().Debugf("%-20s | sending stop request", target.node) 500 controlRequest := &api.BuiltInGadgetControlRequest{Event: &api.BuiltInGadgetControlRequest_StopRequest{StopRequest: &api.BuiltInGadgetStopRequest{}}} 501 runClient.Send(controlRequest) 502 503 // Wait for done or timeout 504 select { 505 case doneErr := <-doneChan: 506 gadgetCtx.Logger().Debugf("%-20s | done after cancel request (%v)", target.node, doneErr) 507 runErr = doneErr 508 case <-time.After(ResultTimeout * time.Second): 509 return nil, fmt.Errorf("timed out while getting result") 510 } 511 } 512 return result, runErr 513 } 514 515 func (r *Runtime) GetCatalog() (*runtime.Catalog, error) { 516 if r.info == nil { 517 return nil, nil 518 } 519 return r.info.Catalog, nil 520 } 521 522 func (r *Runtime) SetDefaultValue(key params.ValueHint, value string) { 523 r.defaultValues[strings.ToLower(string(key))] = value 524 } 525 526 func (r *Runtime) GetDefaultValue(key params.ValueHint) (string, bool) { 527 val, ok := r.defaultValues[strings.ToLower(string(key))] 528 return val, ok 529 }