github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/orm/endpoint/endpoint.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package endpoint
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net"
    23  	"sync"
    24  	"time"
    25  
    26  	"google.golang.org/grpc/credentials/insecure"
    27  
    28  	"google.golang.org/grpc"
    29  	"k8s.io/klog/v2"
    30  	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
    31  )
    32  
    33  // Endpoint maps to a single registered resource plugin. It is responsible
    34  // for managing gRPC communications with the resource plugin and caching
    35  // resource states reported by the resource plugin.
    36  type Endpoint interface {
    37  	Stop()
    38  	Allocate(c context.Context, resourceRequest *pluginapi.ResourceRequest) (*pluginapi.ResourceAllocationResponse, error)
    39  	GetTopologyHints(c context.Context, resourceRequest *pluginapi.ResourceRequest) (*pluginapi.ResourceHintsResponse, error)
    40  	GetResourceAllocation(c context.Context, request *pluginapi.GetResourcesAllocationRequest) (*pluginapi.GetResourcesAllocationResponse, error)
    41  	RemovePod(c context.Context, removePodRequest *pluginapi.RemovePodRequest) (*pluginapi.RemovePodResponse, error)
    42  	IsStopped() bool
    43  	StopGracePeriodExpired() bool
    44  	GetResourcePluginOptions(ctx context.Context, in *pluginapi.Empty, opts ...grpc.CallOption) (*pluginapi.ResourcePluginOptions, error)
    45  	GetTopologyAwareAllocatableResources(c context.Context, request *pluginapi.GetTopologyAwareAllocatableResourcesRequest) (*pluginapi.GetTopologyAwareAllocatableResourcesResponse, error)
    46  	GetTopologyAwareResources(c context.Context, request *pluginapi.GetTopologyAwareResourcesRequest) (*pluginapi.GetTopologyAwareResourcesResponse, error)
    47  }
    48  
    49  type EndpointInfo struct {
    50  	E    Endpoint
    51  	Opts *pluginapi.ResourcePluginOptions
    52  }
    53  
    54  type EndpointImpl struct {
    55  	client     pluginapi.ResourcePluginClient
    56  	clientConn *grpc.ClientConn
    57  
    58  	socketPath   string
    59  	resourceName string
    60  	stopTime     time.Time
    61  
    62  	mutex sync.Mutex
    63  }
    64  
    65  // NewEndpointImpl creates a new endpoint for the given resourceName.
    66  // This is to be used during normal resource plugin registration.
    67  func NewEndpointImpl(socketPath, resourceName string) (*EndpointImpl, error) {
    68  	client, c, err := dial(socketPath)
    69  	if err != nil {
    70  		klog.Errorf("[ORM] Can't create new endpoint with path %s err %v", socketPath, err)
    71  		return nil, err
    72  	}
    73  
    74  	return &EndpointImpl{
    75  		client:     client,
    76  		clientConn: c,
    77  
    78  		socketPath:   socketPath,
    79  		resourceName: resourceName,
    80  	}, nil
    81  }
    82  
    83  func (e *EndpointImpl) Client() pluginapi.ResourcePluginClient {
    84  	return e.client
    85  }
    86  
    87  // NewStoppedEndpointImpl creates a new endpoint for the given resourceName with stopTime set.
    88  // This is to be used during Kubelet restart, before the actual resource plugin re-registers.
    89  func NewStoppedEndpointImpl(resourceName string) *EndpointImpl {
    90  	return &EndpointImpl{
    91  		resourceName: resourceName,
    92  		stopTime:     time.Now(),
    93  	}
    94  }
    95  
    96  func (e *EndpointImpl) IsStopped() bool {
    97  	e.mutex.Lock()
    98  	defer e.mutex.Unlock()
    99  	return !e.stopTime.IsZero()
   100  }
   101  
   102  func (e *EndpointImpl) StopGracePeriodExpired() bool {
   103  	e.mutex.Lock()
   104  	defer e.mutex.Unlock()
   105  	return !e.stopTime.IsZero() && time.Since(e.stopTime) > endpointStopGracePeriod
   106  }
   107  
   108  // used for testing only
   109  func (e *EndpointImpl) setStopTime(t time.Time) {
   110  	e.mutex.Lock()
   111  	defer e.mutex.Unlock()
   112  	e.stopTime = t
   113  }
   114  
   115  // allocate issues Allocate gRPC call to the resource plugin.
   116  func (e *EndpointImpl) Allocate(c context.Context, resourceRequest *pluginapi.ResourceRequest) (*pluginapi.ResourceAllocationResponse, error) {
   117  	if e.IsStopped() {
   118  		return nil, fmt.Errorf(errEndpointStopped, e)
   119  	}
   120  	ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginAllocateRPCTimeoutInSecs*time.Second)
   121  	defer cancel()
   122  	return e.client.Allocate(ctx, resourceRequest)
   123  }
   124  
   125  func (e *EndpointImpl) Stop() {
   126  	e.mutex.Lock()
   127  	defer e.mutex.Unlock()
   128  	if e.clientConn != nil {
   129  		e.clientConn.Close()
   130  	}
   131  	e.stopTime = time.Now()
   132  }
   133  
   134  func (e *EndpointImpl) GetResourceAllocation(c context.Context, request *pluginapi.GetResourcesAllocationRequest) (*pluginapi.GetResourcesAllocationResponse, error) {
   135  	if e.IsStopped() {
   136  		return nil, fmt.Errorf(errEndpointStopped, e)
   137  	}
   138  	ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginGetResourcesAllocationRPCTimeoutInSecs*time.Second)
   139  	defer cancel()
   140  	return e.client.GetResourcesAllocation(ctx, request)
   141  }
   142  
   143  func (e *EndpointImpl) RemovePod(c context.Context, removePodRequest *pluginapi.RemovePodRequest) (*pluginapi.RemovePodResponse, error) {
   144  	if e.IsStopped() {
   145  		return nil, fmt.Errorf(errEndpointStopped, e)
   146  	}
   147  	ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginRemovePodRPCTimeoutInSecs*time.Second)
   148  	defer cancel()
   149  	return e.client.RemovePod(ctx, removePodRequest)
   150  }
   151  
   152  func (e *EndpointImpl) GetResourcePluginOptions(ctx context.Context, in *pluginapi.Empty, opts ...grpc.CallOption) (*pluginapi.ResourcePluginOptions, error) {
   153  	return e.client.GetResourcePluginOptions(ctx, in, opts...)
   154  }
   155  
   156  func (e *EndpointImpl) GetTopologyHints(c context.Context, resourceRequest *pluginapi.ResourceRequest) (*pluginapi.ResourceHintsResponse, error) {
   157  	if e.IsStopped() {
   158  		return nil, fmt.Errorf(errEndpointStopped, e)
   159  	}
   160  	ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginGetTopologyHintsRPCTimeoutInSecs*time.Second)
   161  	defer cancel()
   162  
   163  	return e.client.GetTopologyHints(ctx, resourceRequest)
   164  }
   165  
   166  func (e *EndpointImpl) GetTopologyAwareAllocatableResources(c context.Context, request *pluginapi.GetTopologyAwareAllocatableResourcesRequest) (*pluginapi.GetTopologyAwareAllocatableResourcesResponse, error) {
   167  	if e.IsStopped() {
   168  		return nil, fmt.Errorf(errEndpointStopped, e)
   169  	}
   170  	ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginGetTopologyAwareAllocatableResourcesRPCTimeoutInSecs*time.Second)
   171  	defer cancel()
   172  	return e.client.GetTopologyAwareAllocatableResources(ctx, request)
   173  }
   174  
   175  func (e *EndpointImpl) GetTopologyAwareResources(c context.Context, request *pluginapi.GetTopologyAwareResourcesRequest) (*pluginapi.GetTopologyAwareResourcesResponse, error) {
   176  	if e.IsStopped() {
   177  		return nil, fmt.Errorf(errEndpointStopped, e)
   178  	}
   179  	ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginGetTopologyAwareResourcesRPCTimeoutInSecs*time.Second)
   180  	defer cancel()
   181  	return e.client.GetTopologyAwareResources(ctx, request)
   182  }
   183  
   184  // dial establishes the gRPC communication with the registered resource plugin. https://godoc.org/google.golang.org/grpc#Dial
   185  func dial(unixSocketPath string) (pluginapi.ResourcePluginClient, *grpc.ClientConn, error) {
   186  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   187  	defer cancel()
   188  
   189  	c, err := grpc.DialContext(ctx, unixSocketPath, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock(),
   190  		grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) {
   191  			return (&net.Dialer{}).DialContext(ctx, "unix", addr)
   192  		}),
   193  	)
   194  	if err != nil {
   195  		return nil, nil, fmt.Errorf(errFailedToDialResourcePlugin+" %v", err)
   196  	}
   197  
   198  	return pluginapi.NewResourcePluginClient(c), c, nil
   199  }