k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/profile.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package common
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	goerrors "github.com/go-errors/errors"
    26  	rbacv1 "k8s.io/api/rbac/v1"
    27  	apierrs "k8s.io/apimachinery/pkg/api/errors"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	clientset "k8s.io/client-go/kubernetes"
    30  	"k8s.io/klog/v2"
    31  	"k8s.io/perf-tests/clusterloader2/pkg/framework/client"
    32  	"k8s.io/perf-tests/clusterloader2/pkg/measurement"
    33  	measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util"
    34  	"k8s.io/perf-tests/clusterloader2/pkg/provider"
    35  	"k8s.io/perf-tests/clusterloader2/pkg/util"
    36  )
    37  
    38  const (
    39  	cpuProfileName    = "CPUProfile"
    40  	memoryProfileName = "MemoryProfile"
    41  	blockProfileName  = "BlockProfile"
    42  )
    43  
    44  func init() {
    45  	if err := measurement.Register(cpuProfileName, createProfileMeasurementFactory(cpuProfileName, "profile")); err != nil {
    46  		klog.Fatalf("Cannot register %s: %v", cpuProfileName, err)
    47  	}
    48  	if err := measurement.Register(memoryProfileName, createProfileMeasurementFactory(memoryProfileName, "heap")); err != nil {
    49  		klog.Fatalf("Cannot register %s: %v", memoryProfileName, err)
    50  	}
    51  	if err := measurement.Register(blockProfileName, createProfileMeasurementFactory(blockProfileName, "block")); err != nil {
    52  		klog.Fatalf("Cannot register %s: %v", blockProfileName, err)
    53  	}
    54  }
    55  
    56  type profileConfig struct {
    57  	componentName string
    58  	provider      provider.Provider
    59  	hosts         []string
    60  	kind          string
    61  }
    62  
    63  func (p *profileMeasurement) populateProfileConfig(config *measurement.Config) error {
    64  	var err error
    65  	if p.config.componentName, err = util.GetString(config.Params, "componentName"); err != nil {
    66  		return err
    67  	}
    68  	p.config.provider = config.ClusterFramework.GetClusterConfig().Provider
    69  	p.config.hosts = config.ClusterFramework.GetClusterConfig().MasterIPs
    70  	return nil
    71  }
    72  
    73  type profileMeasurement struct {
    74  	name      string
    75  	config    *profileConfig
    76  	summaries []measurement.Summary
    77  	isRunning bool
    78  	stopCh    chan struct{}
    79  	wg        sync.WaitGroup
    80  }
    81  
    82  func createProfileMeasurementFactory(name, kind string) func() measurement.Measurement {
    83  	return func() measurement.Measurement {
    84  		return &profileMeasurement{
    85  			name:   name,
    86  			config: &profileConfig{kind: kind},
    87  		}
    88  	}
    89  }
    90  
    91  func (p *profileMeasurement) start(config *measurement.Config, SSHToMasterSupported bool) error {
    92  	if err := p.populateProfileConfig(config); err != nil {
    93  		return err
    94  	}
    95  	if len(p.config.hosts) < 1 {
    96  		klog.Warning("Profile measurements will be disabled due to no MasterIps")
    97  		return nil
    98  	}
    99  	k8sClient := config.ClusterFramework.GetClientSets().GetClient()
   100  	if p.shouldExposeAPIServerDebugEndpoint() {
   101  		if err := exposeAPIServerDebugEndpoint(k8sClient); err != nil {
   102  			klog.Warningf("error while exposing kube-apiserver /debug endpoint: %v", err)
   103  		}
   104  	}
   105  
   106  	p.summaries = make([]measurement.Summary, 0)
   107  	p.isRunning = true
   108  	p.stopCh = make(chan struct{})
   109  	p.wg.Add(1)
   110  
   111  	profileFrequency := 5 * time.Minute
   112  	go func() {
   113  		defer p.wg.Done()
   114  		for {
   115  			select {
   116  			case <-p.stopCh:
   117  				return
   118  			case <-time.After(profileFrequency):
   119  				profileSummaries, err := p.gatherProfile(k8sClient, SSHToMasterSupported, config)
   120  				if err != nil {
   121  					klog.Errorf("failed to gather profile for %#v: %v", *p.config, err)
   122  					continue
   123  				}
   124  				if profileSummaries != nil {
   125  					p.summaries = append(p.summaries, profileSummaries...)
   126  				}
   127  			}
   128  		}
   129  	}()
   130  	return nil
   131  }
   132  
   133  func (p *profileMeasurement) stop() {
   134  	if !p.isRunning {
   135  		return
   136  	}
   137  	close(p.stopCh)
   138  	p.wg.Wait()
   139  }
   140  
   141  // Execute gathers memory profile of a given component.
   142  func (p *profileMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) {
   143  	provider := config.ClusterFramework.GetClusterConfig().Provider
   144  	SSHToMasterSupported := provider.Features().SupportSSHToMaster
   145  	APIServerPprofEnabled := config.ClusterFramework.GetClusterConfig().APIServerPprofByClientEnabled
   146  
   147  	if !SSHToMasterSupported && APIServerPprofEnabled {
   148  		klog.Warningf("fetching profile data from is not possible from provider: %s", provider.Name())
   149  		return nil, nil
   150  	}
   151  
   152  	action, err := util.GetString(config.Params, "action")
   153  	if err != nil {
   154  		return nil, err
   155  	}
   156  
   157  	switch action {
   158  	case "start":
   159  		if p.isRunning {
   160  			klog.V(2).Infof("%s: measurement already running", p)
   161  			return nil, nil
   162  		}
   163  		return nil, p.start(config, SSHToMasterSupported)
   164  	case "gather":
   165  		p.stop()
   166  		return p.summaries, nil
   167  	default:
   168  		return nil, fmt.Errorf("unknown action %v", action)
   169  	}
   170  }
   171  
   172  // Dispose cleans up after the measurement.
   173  func (*profileMeasurement) Dispose() {}
   174  
   175  // String returns string representation of this measurement.
   176  func (p *profileMeasurement) String() string {
   177  	return p.name
   178  }
   179  
   180  func (p *profileMeasurement) gatherProfile(c clientset.Interface, SSHToMasterSupported bool, config *measurement.Config) ([]measurement.Summary, error) {
   181  	getCommand, err := p.getProfileCommand(config)
   182  	if err != nil {
   183  		return nil, goerrors.Errorf("profile gathering failed during retrieving profile command: %v", err)
   184  	}
   185  
   186  	var summaries []measurement.Summary
   187  	for _, host := range p.config.hosts {
   188  		profilePrefix := fmt.Sprintf("%s_%s_%s", host, p.config.componentName, p.name)
   189  
   190  		// Get the profile data over SSH.
   191  		// Start by checking that the provider allows us to do so.
   192  		if !SSHToMasterSupported {
   193  			// SSH to master for this provider is not possible.
   194  			// For kube-apiserver, we can still fetch the profile using a RESTClient and pprof.
   195  			// TODO(#246): This will connect to a random master in HA (multi-master) clusters, fix it.
   196  			if p.config.componentName == "kube-apiserver" {
   197  				body, err := c.CoreV1().RESTClient().Get().AbsPath("/debug/pprof/" + p.config.kind).DoRaw(context.TODO())
   198  				if err != nil {
   199  					return nil, err
   200  				}
   201  				summary := measurement.CreateSummary(profilePrefix, "pprof", string(body))
   202  				summaries = append(summaries, summary)
   203  				break
   204  			}
   205  			// Only logging error for gke. SSHing to gke master is not supported.
   206  			klog.Warningf("%s: failed to execute curl command on master through SSH", p.name)
   207  			return nil, nil
   208  		}
   209  
   210  		sshResult, err := measurementutil.SSH(getCommand, host+":22", p.config.provider)
   211  		if err != nil {
   212  			return nil, fmt.Errorf("failed to execute curl command on master node %s through SSH: %v", host, err)
   213  		}
   214  		summaries = append(summaries, measurement.CreateSummary(profilePrefix, "pprof", sshResult.Stdout))
   215  	}
   216  
   217  	return summaries, nil
   218  }
   219  
   220  func (p *profileMeasurement) shouldExposeAPIServerDebugEndpoint() bool {
   221  	return p.config.componentName == "kube-apiserver"
   222  }
   223  
   224  func (p *profileMeasurement) getProfileCommand(config *measurement.Config) (string, error) {
   225  	profileProtocol, profilePort, err := config.ClusterFramework.GetClusterConfig().Provider.GetComponentProtocolAndPort(p.config.componentName)
   226  	if err != nil {
   227  		return "", goerrors.Errorf("get profile command failed finding component protocol/port: %v", err)
   228  	}
   229  
   230  	var command string
   231  	if p.config.componentName == "etcd" {
   232  		etcdCert := config.ClusterFramework.GetClusterConfig().EtcdCertificatePath
   233  		etcdKey := config.ClusterFramework.GetClusterConfig().EtcdKeyPath
   234  		command = fmt.Sprintf("curl -s -k --cert %s --key %s %slocalhost:%v/debug/pprof/%s", etcdCert, etcdKey, profileProtocol, profilePort, p.config.kind)
   235  	} else {
   236  		command = fmt.Sprintf("curl -s -k %slocalhost:%v/debug/pprof/%s", profileProtocol, profilePort, p.config.kind)
   237  	}
   238  
   239  	return command, nil
   240  }
   241  
   242  func exposeAPIServerDebugEndpoint(c clientset.Interface) error {
   243  	klog.V(2).Info("Exposing kube-apiserver debug endpoint for anonymous access")
   244  	createClusterRole := func() error {
   245  		_, err := c.RbacV1().ClusterRoles().Create(context.TODO(), &rbacv1.ClusterRole{
   246  			ObjectMeta: metav1.ObjectMeta{Name: "apiserver-debug-viewer"},
   247  			Rules: []rbacv1.PolicyRule{
   248  				{Verbs: []string{"get"}, NonResourceURLs: []string{"/debug/*"}},
   249  			},
   250  		}, metav1.CreateOptions{})
   251  		return err
   252  	}
   253  	createClusterRoleBinding := func() error {
   254  		_, err := c.RbacV1().ClusterRoleBindings().Create(context.TODO(), &rbacv1.ClusterRoleBinding{
   255  			ObjectMeta: metav1.ObjectMeta{Name: "anonymous:apiserver-debug-viewer"},
   256  			RoleRef:    rbacv1.RoleRef{Kind: "ClusterRole", Name: "apiserver-debug-viewer"},
   257  			Subjects: []rbacv1.Subject{
   258  				{Kind: "User", Name: "system:anonymous"},
   259  			},
   260  		}, metav1.CreateOptions{})
   261  		return err
   262  	}
   263  	if err := retryCreateFunction(createClusterRole); err != nil {
   264  		return err
   265  	}
   266  	if err := retryCreateFunction(createClusterRoleBinding); err != nil {
   267  		return err
   268  	}
   269  	return nil
   270  }
   271  
   272  func retryCreateFunction(f func() error) error {
   273  	return client.RetryWithExponentialBackOff(
   274  		client.RetryFunction(f, client.Allow(apierrs.IsAlreadyExists)))
   275  }