github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/orm/pluginhandler.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package orm
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"os"
    23  	"path/filepath"
    24  
    25  	"k8s.io/apimachinery/pkg/util/errors"
    26  	"k8s.io/klog/v2"
    27  	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
    28  
    29  	endpoint2 "github.com/kubewharf/katalyst-core/pkg/agent/orm/endpoint"
    30  )
    31  
    32  func (m *ManagerImpl) removeContents(dir string) error {
    33  	d, err := os.Open(dir)
    34  	if err != nil {
    35  		return err
    36  	}
    37  	defer d.Close()
    38  	names, err := d.Readdirnames(-1)
    39  	if err != nil {
    40  		return err
    41  	}
    42  	var errs []error
    43  	for _, name := range names {
    44  		filePath := filepath.Join(dir, name)
    45  		if filePath == m.checkpointFile() {
    46  			continue
    47  		}
    48  		stat, err := os.Stat(filePath)
    49  		if err != nil {
    50  			klog.Errorf("[ORM] Failed to stat file %s: %v", filePath, err)
    51  			continue
    52  		}
    53  		if stat.IsDir() {
    54  			continue
    55  		}
    56  		err = os.RemoveAll(filePath)
    57  		if err != nil {
    58  			errs = append(errs, err)
    59  			klog.Errorf("[ORM] Failed to remove file %s: %v", filePath, err)
    60  			continue
    61  		}
    62  	}
    63  	return errors.NewAggregate(errs)
    64  }
    65  
    66  // ValidatePlugin validates a plugin if the version is correct and the name has the format of an extended resource
    67  func (m *ManagerImpl) ValidatePlugin(pluginName string, endpoint string, versions []string) error {
    68  	klog.V(2).Infof("Got Plugin %s at endpoint %s with versions %v", pluginName, endpoint, versions)
    69  
    70  	if !m.isVersionCompatibleWithPlugin(versions) {
    71  		return fmt.Errorf("manager version, %s, is not among plugin supported versions %v", pluginapi.Version, versions)
    72  	}
    73  
    74  	return nil
    75  }
    76  
    77  // RegisterPlugin starts the endpoint and registers it
    78  func (m *ManagerImpl) RegisterPlugin(pluginName string, endpoint string, versions []string) error {
    79  	klog.V(2).Infof("[ORM] Registering Plugin %s at endpoint %s", pluginName, endpoint)
    80  
    81  	e, err := endpoint2.NewEndpointImpl(endpoint, pluginName)
    82  	if err != nil {
    83  		return fmt.Errorf("[ORM] failed to dial resource plugin with socketPath %s: %v", endpoint, err)
    84  	}
    85  
    86  	options, err := e.GetResourcePluginOptions(context.Background(), &pluginapi.Empty{})
    87  	if err != nil {
    88  		return fmt.Errorf("[ORM] failed to get resource plugin options: %v", err)
    89  	}
    90  
    91  	m.registerEndpoint(pluginName, options, e)
    92  
    93  	return nil
    94  }
    95  
    96  // DeRegisterPlugin deregisters the plugin
    97  func (m *ManagerImpl) DeRegisterPlugin(pluginName string) {
    98  	m.mutex.Lock()
    99  	defer m.mutex.Unlock()
   100  
   101  	if eI, ok := m.endpoints[pluginName]; ok {
   102  		eI.E.Stop()
   103  	}
   104  }
   105  
   106  func (m *ManagerImpl) registerEndpoint(resourceName string, options *pluginapi.ResourcePluginOptions, e endpoint2.Endpoint) {
   107  	m.mutex.Lock()
   108  	defer m.mutex.Unlock()
   109  
   110  	old, ok := m.endpoints[resourceName]
   111  
   112  	if ok && !old.E.IsStopped() {
   113  		klog.V(2).Infof("[ORM] stop old endpoint: %v", old.E)
   114  		old.E.Stop()
   115  	}
   116  
   117  	m.endpoints[resourceName] = endpoint2.EndpointInfo{E: e, Opts: options}
   118  	klog.V(2).Infof("[ORM] Registered endpoint %v", e)
   119  }
   120  
   121  func (m *ManagerImpl) isVersionCompatibleWithPlugin(versions []string) bool {
   122  	for _, version := range versions {
   123  		for _, supportedVersion := range pluginapi.SupportedVersions {
   124  			if version == supportedVersion {
   125  				return true
   126  			}
   127  		}
   128  	}
   129  	return false
   130  }
   131  
   132  // Register registers a resource plugin.
   133  func (m *ManagerImpl) Register(ctx context.Context, r *pluginapi.RegisterRequest) (*pluginapi.Empty, error) {
   134  	klog.Infof("[ORM] Got registration request from resource plugin with resource name %q", r.ResourceName)
   135  	var versionCompatible bool
   136  	for _, v := range pluginapi.SupportedVersions {
   137  		if r.Version == v {
   138  			versionCompatible = true
   139  			break
   140  		}
   141  	}
   142  	if !versionCompatible {
   143  		errorString := fmt.Sprintf(errUnsupportedVersion, r.Version, pluginapi.SupportedVersions)
   144  		klog.Infof("Bad registration request from resource plugin with resource name %q: %s", r.ResourceName, errorString)
   145  		return &pluginapi.Empty{}, fmt.Errorf(errorString)
   146  	}
   147  
   148  	// TODO: for now, always accepts newest resource plugin. Later may consider to
   149  	// add some policies here, e.g., verify whether an old resource plugin with the
   150  	// same resource name is still alive to determine whether we want to accept
   151  	// the new registration.
   152  	success := make(chan bool)
   153  	go m.addEndpoint(r, success)
   154  	select {
   155  	case pass := <-success:
   156  		if pass {
   157  			klog.Infof("[ORM] Register resource plugin for %s success", r.ResourceName)
   158  			return &pluginapi.Empty{}, nil
   159  		}
   160  		klog.Errorf("[ORM] Register resource plugin for %s fail", r.ResourceName)
   161  		return &pluginapi.Empty{}, fmt.Errorf("failed to register resource %s", r.ResourceName)
   162  	case <-ctx.Done():
   163  		klog.Errorf("[ORM] Register resource plugin for %s timeout", r.ResourceName)
   164  		return &pluginapi.Empty{}, fmt.Errorf("timeout to register resource %s", r.ResourceName)
   165  	}
   166  }
   167  
   168  func (m *ManagerImpl) addEndpoint(r *pluginapi.RegisterRequest, success chan<- bool) {
   169  	new, err := endpoint2.NewEndpointImpl(filepath.Join(m.socketdir, r.Endpoint), r.ResourceName)
   170  	if err != nil {
   171  		klog.Errorf("[ORM] Failed to dial resource plugin with request %v: %v", r, err)
   172  		success <- false
   173  		return
   174  	}
   175  	m.registerEndpoint(r.ResourceName, r.Options, new)
   176  	success <- true
   177  }