github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/evictionmanager/endpoint/endpoint.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package endpoint
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	"google.golang.org/grpc"
    26  	"k8s.io/klog/v2"
    27  
    28  	pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1"
    29  	"github.com/kubewharf/katalyst-core/pkg/consts"
    30  	"github.com/kubewharf/katalyst-core/pkg/util/process"
    31  )
    32  
    33  const (
    34  	dialRemoteEndpointTimeout = 10 * time.Second
    35  )
    36  
    37  const (
    38  	errFailedToDialEvictionPlugin = "failed to dial resource plugin:"
    39  
    40  	errEndpointStopped      = "endpoint %v has been stopped"
    41  	endpointStopGracePeriod = 5 * time.Minute
    42  )
    43  
    44  // Endpoint represents a single registered plugin. It is responsible
    45  // for managing gRPC communications with the eviction plugin and caching eviction states.
    46  type Endpoint interface {
    47  	ThresholdMet(c context.Context) (*pluginapi.ThresholdMetResponse, error)
    48  	// GetTopEvictionPods notice: this function only be called when plugin's threshold is met
    49  	GetTopEvictionPods(c context.Context, request *pluginapi.GetTopEvictionPodsRequest) (*pluginapi.GetTopEvictionPodsResponse, error)
    50  	GetEvictPods(c context.Context, request *pluginapi.GetEvictPodsRequest) (*pluginapi.GetEvictPodsResponse, error)
    51  	Start()
    52  	Stop()
    53  	IsStopped() bool
    54  	StopGracePeriodExpired() bool
    55  }
    56  
    57  // RemoteEndpointImpl is implement of a remote eviction plugin endpoint
    58  type RemoteEndpointImpl struct {
    59  	client     pluginapi.EvictionPluginClient
    60  	clientConn *grpc.ClientConn
    61  
    62  	socketPath string
    63  	pluginName string
    64  	stopTime   time.Time
    65  
    66  	mutex sync.Mutex
    67  }
    68  
    69  // NewRemoteEndpointImpl new a remote eviction plugin endpoint
    70  func NewRemoteEndpointImpl(socketPath, pluginName string) (*RemoteEndpointImpl, error) {
    71  	c, err := process.Dial(socketPath, dialRemoteEndpointTimeout)
    72  	if err != nil {
    73  		klog.Errorf("[eviction manager] can't create new endpoint with path %s err %v", socketPath, err)
    74  		return nil, fmt.Errorf(errFailedToDialEvictionPlugin+" %v", err)
    75  	}
    76  
    77  	return &RemoteEndpointImpl{
    78  		client:     pluginapi.NewEvictionPluginClient(c),
    79  		clientConn: c,
    80  
    81  		socketPath: socketPath,
    82  		pluginName: pluginName,
    83  	}, nil
    84  }
    85  
    86  // IsStopped check this Endpoint whether be called stop function before
    87  func (e *RemoteEndpointImpl) IsStopped() bool {
    88  	e.mutex.Lock()
    89  	defer e.mutex.Unlock()
    90  	return !e.stopTime.IsZero()
    91  }
    92  
    93  // StopGracePeriodExpired check if this Endpoint has been stopped and exceeded the
    94  // grace period since the stop timestamp
    95  func (e *RemoteEndpointImpl) StopGracePeriodExpired() bool {
    96  	e.mutex.Lock()
    97  	defer e.mutex.Unlock()
    98  	return !e.stopTime.IsZero() && time.Since(e.stopTime) > endpointStopGracePeriod
    99  }
   100  
   101  // SetStopTime is used for testing only
   102  func (e *RemoteEndpointImpl) SetStopTime(t time.Time) {
   103  	e.mutex.Lock()
   104  	defer e.mutex.Unlock()
   105  	e.stopTime = t
   106  }
   107  
   108  // ThresholdMet is used to call remote endpoint ThresholdMet
   109  func (e *RemoteEndpointImpl) ThresholdMet(c context.Context) (*pluginapi.ThresholdMetResponse, error) {
   110  	if e.IsStopped() {
   111  		return nil, fmt.Errorf(errEndpointStopped, e)
   112  	}
   113  	ctx, cancel := context.WithTimeout(c, consts.EvictionPluginThresholdMetRPCTimeoutInSecs*time.Second)
   114  	defer cancel()
   115  	return e.client.ThresholdMet(ctx, &pluginapi.Empty{})
   116  }
   117  
   118  // GetTopEvictionPods is used to call remote endpoint GetTopEvictionPods
   119  func (e *RemoteEndpointImpl) GetTopEvictionPods(c context.Context, request *pluginapi.GetTopEvictionPodsRequest) (*pluginapi.GetTopEvictionPodsResponse, error) {
   120  	if e.IsStopped() {
   121  		return nil, fmt.Errorf(errEndpointStopped, e)
   122  	}
   123  	ctx, cancel := context.WithTimeout(c, consts.EvictionPluginGetTopEvictionPodsRPCTimeoutInSecs*time.Second)
   124  	defer cancel()
   125  	return e.client.GetTopEvictionPods(ctx, request)
   126  }
   127  
   128  // GetEvictPods is used to call remote endpoint GetEvictPods
   129  func (e *RemoteEndpointImpl) GetEvictPods(c context.Context, request *pluginapi.GetEvictPodsRequest) (*pluginapi.GetEvictPodsResponse, error) {
   130  	if e.IsStopped() {
   131  		return nil, fmt.Errorf(errEndpointStopped, e)
   132  	}
   133  	ctx, cancel := context.WithTimeout(c, consts.EvictionPluginGetEvictPodsRPCTimeoutInSecs*time.Second)
   134  	defer cancel()
   135  	return e.client.GetEvictPods(ctx, request)
   136  }
   137  
   138  func (e *RemoteEndpointImpl) GetToken(c context.Context) (*pluginapi.GetTokenResponse, error) {
   139  	if e.IsStopped() {
   140  		return nil, fmt.Errorf(errEndpointStopped, e)
   141  	}
   142  	ctx, cancel := context.WithTimeout(c, consts.EvictionPluginGetEvictPodsRPCTimeoutInSecs*time.Second)
   143  	defer cancel()
   144  	return e.client.GetToken(ctx, &pluginapi.Empty{})
   145  }
   146  
   147  // Stop is used to stop this remote endpoint
   148  func (e *RemoteEndpointImpl) Stop() {
   149  	e.mutex.Lock()
   150  	defer e.mutex.Unlock()
   151  	if e.clientConn != nil {
   152  		e.clientConn.Close()
   153  	}
   154  	e.stopTime = time.Now()
   155  }
   156  
   157  // Start has no need do anything for now
   158  func (e *RemoteEndpointImpl) Start() {
   159  	return
   160  }