k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/profile.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package common 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 goerrors "github.com/go-errors/errors" 26 rbacv1 "k8s.io/api/rbac/v1" 27 apierrs "k8s.io/apimachinery/pkg/api/errors" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 clientset "k8s.io/client-go/kubernetes" 30 "k8s.io/klog/v2" 31 "k8s.io/perf-tests/clusterloader2/pkg/framework/client" 32 "k8s.io/perf-tests/clusterloader2/pkg/measurement" 33 measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util" 34 "k8s.io/perf-tests/clusterloader2/pkg/provider" 35 "k8s.io/perf-tests/clusterloader2/pkg/util" 36 ) 37 38 const ( 39 cpuProfileName = "CPUProfile" 40 memoryProfileName = "MemoryProfile" 41 blockProfileName = "BlockProfile" 42 ) 43 44 func init() { 45 if err := measurement.Register(cpuProfileName, createProfileMeasurementFactory(cpuProfileName, "profile")); err != nil { 46 klog.Fatalf("Cannot register %s: %v", cpuProfileName, err) 47 } 48 if err := measurement.Register(memoryProfileName, createProfileMeasurementFactory(memoryProfileName, "heap")); err != nil { 49 klog.Fatalf("Cannot register %s: %v", memoryProfileName, err) 50 } 51 if err := measurement.Register(blockProfileName, createProfileMeasurementFactory(blockProfileName, "block")); err != nil { 52 klog.Fatalf("Cannot register %s: %v", blockProfileName, err) 53 } 54 } 55 56 type profileConfig struct { 57 componentName string 58 provider provider.Provider 59 hosts []string 60 kind string 61 } 62 63 func (p *profileMeasurement) populateProfileConfig(config *measurement.Config) error { 64 var err error 65 if p.config.componentName, err = util.GetString(config.Params, "componentName"); err != nil { 66 return err 67 } 68 p.config.provider = config.ClusterFramework.GetClusterConfig().Provider 69 p.config.hosts = config.ClusterFramework.GetClusterConfig().MasterIPs 70 return nil 71 } 72 73 type profileMeasurement struct { 74 name string 75 config *profileConfig 76 summaries []measurement.Summary 77 isRunning bool 78 stopCh chan struct{} 79 wg sync.WaitGroup 80 } 81 82 func createProfileMeasurementFactory(name, kind string) func() measurement.Measurement { 83 return func() measurement.Measurement { 84 return &profileMeasurement{ 85 name: name, 86 config: &profileConfig{kind: kind}, 87 } 88 } 89 } 90 91 func (p *profileMeasurement) start(config *measurement.Config, SSHToMasterSupported bool) error { 92 if err := p.populateProfileConfig(config); err != nil { 93 return err 94 } 95 if len(p.config.hosts) < 1 { 96 klog.Warning("Profile measurements will be disabled due to no MasterIps") 97 return nil 98 } 99 k8sClient := config.ClusterFramework.GetClientSets().GetClient() 100 if p.shouldExposeAPIServerDebugEndpoint() { 101 if err := exposeAPIServerDebugEndpoint(k8sClient); err != nil { 102 klog.Warningf("error while exposing kube-apiserver /debug endpoint: %v", err) 103 } 104 } 105 106 p.summaries = make([]measurement.Summary, 0) 107 p.isRunning = true 108 p.stopCh = make(chan struct{}) 109 p.wg.Add(1) 110 111 profileFrequency := 5 * time.Minute 112 go func() { 113 defer p.wg.Done() 114 for { 115 select { 116 case <-p.stopCh: 117 return 118 case <-time.After(profileFrequency): 119 profileSummaries, err := p.gatherProfile(k8sClient, SSHToMasterSupported, config) 120 if err != nil { 121 klog.Errorf("failed to gather profile for %#v: %v", *p.config, err) 122 continue 123 } 124 if profileSummaries != nil { 125 p.summaries = append(p.summaries, profileSummaries...) 126 } 127 } 128 } 129 }() 130 return nil 131 } 132 133 func (p *profileMeasurement) stop() { 134 if !p.isRunning { 135 return 136 } 137 close(p.stopCh) 138 p.wg.Wait() 139 } 140 141 // Execute gathers memory profile of a given component. 142 func (p *profileMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) { 143 provider := config.ClusterFramework.GetClusterConfig().Provider 144 SSHToMasterSupported := provider.Features().SupportSSHToMaster 145 APIServerPprofEnabled := config.ClusterFramework.GetClusterConfig().APIServerPprofByClientEnabled 146 147 if !SSHToMasterSupported && APIServerPprofEnabled { 148 klog.Warningf("fetching profile data from is not possible from provider: %s", provider.Name()) 149 return nil, nil 150 } 151 152 action, err := util.GetString(config.Params, "action") 153 if err != nil { 154 return nil, err 155 } 156 157 switch action { 158 case "start": 159 if p.isRunning { 160 klog.V(2).Infof("%s: measurement already running", p) 161 return nil, nil 162 } 163 return nil, p.start(config, SSHToMasterSupported) 164 case "gather": 165 p.stop() 166 return p.summaries, nil 167 default: 168 return nil, fmt.Errorf("unknown action %v", action) 169 } 170 } 171 172 // Dispose cleans up after the measurement. 173 func (*profileMeasurement) Dispose() {} 174 175 // String returns string representation of this measurement. 176 func (p *profileMeasurement) String() string { 177 return p.name 178 } 179 180 func (p *profileMeasurement) gatherProfile(c clientset.Interface, SSHToMasterSupported bool, config *measurement.Config) ([]measurement.Summary, error) { 181 getCommand, err := p.getProfileCommand(config) 182 if err != nil { 183 return nil, goerrors.Errorf("profile gathering failed during retrieving profile command: %v", err) 184 } 185 186 var summaries []measurement.Summary 187 for _, host := range p.config.hosts { 188 profilePrefix := fmt.Sprintf("%s_%s_%s", host, p.config.componentName, p.name) 189 190 // Get the profile data over SSH. 191 // Start by checking that the provider allows us to do so. 192 if !SSHToMasterSupported { 193 // SSH to master for this provider is not possible. 194 // For kube-apiserver, we can still fetch the profile using a RESTClient and pprof. 195 // TODO(#246): This will connect to a random master in HA (multi-master) clusters, fix it. 196 if p.config.componentName == "kube-apiserver" { 197 body, err := c.CoreV1().RESTClient().Get().AbsPath("/debug/pprof/" + p.config.kind).DoRaw(context.TODO()) 198 if err != nil { 199 return nil, err 200 } 201 summary := measurement.CreateSummary(profilePrefix, "pprof", string(body)) 202 summaries = append(summaries, summary) 203 break 204 } 205 // Only logging error for gke. SSHing to gke master is not supported. 206 klog.Warningf("%s: failed to execute curl command on master through SSH", p.name) 207 return nil, nil 208 } 209 210 sshResult, err := measurementutil.SSH(getCommand, host+":22", p.config.provider) 211 if err != nil { 212 return nil, fmt.Errorf("failed to execute curl command on master node %s through SSH: %v", host, err) 213 } 214 summaries = append(summaries, measurement.CreateSummary(profilePrefix, "pprof", sshResult.Stdout)) 215 } 216 217 return summaries, nil 218 } 219 220 func (p *profileMeasurement) shouldExposeAPIServerDebugEndpoint() bool { 221 return p.config.componentName == "kube-apiserver" 222 } 223 224 func (p *profileMeasurement) getProfileCommand(config *measurement.Config) (string, error) { 225 profileProtocol, profilePort, err := config.ClusterFramework.GetClusterConfig().Provider.GetComponentProtocolAndPort(p.config.componentName) 226 if err != nil { 227 return "", goerrors.Errorf("get profile command failed finding component protocol/port: %v", err) 228 } 229 230 var command string 231 if p.config.componentName == "etcd" { 232 etcdCert := config.ClusterFramework.GetClusterConfig().EtcdCertificatePath 233 etcdKey := config.ClusterFramework.GetClusterConfig().EtcdKeyPath 234 command = fmt.Sprintf("curl -s -k --cert %s --key %s %slocalhost:%v/debug/pprof/%s", etcdCert, etcdKey, profileProtocol, profilePort, p.config.kind) 235 } else { 236 command = fmt.Sprintf("curl -s -k %slocalhost:%v/debug/pprof/%s", profileProtocol, profilePort, p.config.kind) 237 } 238 239 return command, nil 240 } 241 242 func exposeAPIServerDebugEndpoint(c clientset.Interface) error { 243 klog.V(2).Info("Exposing kube-apiserver debug endpoint for anonymous access") 244 createClusterRole := func() error { 245 _, err := c.RbacV1().ClusterRoles().Create(context.TODO(), &rbacv1.ClusterRole{ 246 ObjectMeta: metav1.ObjectMeta{Name: "apiserver-debug-viewer"}, 247 Rules: []rbacv1.PolicyRule{ 248 {Verbs: []string{"get"}, NonResourceURLs: []string{"/debug/*"}}, 249 }, 250 }, metav1.CreateOptions{}) 251 return err 252 } 253 createClusterRoleBinding := func() error { 254 _, err := c.RbacV1().ClusterRoleBindings().Create(context.TODO(), &rbacv1.ClusterRoleBinding{ 255 ObjectMeta: metav1.ObjectMeta{Name: "anonymous:apiserver-debug-viewer"}, 256 RoleRef: rbacv1.RoleRef{Kind: "ClusterRole", Name: "apiserver-debug-viewer"}, 257 Subjects: []rbacv1.Subject{ 258 {Kind: "User", Name: "system:anonymous"}, 259 }, 260 }, metav1.CreateOptions{}) 261 return err 262 } 263 if err := retryCreateFunction(createClusterRole); err != nil { 264 return err 265 } 266 if err := retryCreateFunction(createClusterRoleBinding); err != nil { 267 return err 268 } 269 return nil 270 } 271 272 func retryCreateFunction(f func() error) error { 273 return client.RetryWithExponentialBackOff( 274 client.RetryFunction(f, client.Allow(apierrs.IsAlreadyExists))) 275 }