github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/orm/endpoint/endpoint.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package endpoint 18 19 import ( 20 "context" 21 "fmt" 22 "net" 23 "sync" 24 "time" 25 26 "google.golang.org/grpc/credentials/insecure" 27 28 "google.golang.org/grpc" 29 "k8s.io/klog/v2" 30 pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" 31 ) 32 33 // Endpoint maps to a single registered resource plugin. It is responsible 34 // for managing gRPC communications with the resource plugin and caching 35 // resource states reported by the resource plugin. 36 type Endpoint interface { 37 Stop() 38 Allocate(c context.Context, resourceRequest *pluginapi.ResourceRequest) (*pluginapi.ResourceAllocationResponse, error) 39 GetTopologyHints(c context.Context, resourceRequest *pluginapi.ResourceRequest) (*pluginapi.ResourceHintsResponse, error) 40 GetResourceAllocation(c context.Context, request *pluginapi.GetResourcesAllocationRequest) (*pluginapi.GetResourcesAllocationResponse, error) 41 RemovePod(c context.Context, removePodRequest *pluginapi.RemovePodRequest) (*pluginapi.RemovePodResponse, error) 42 IsStopped() bool 43 StopGracePeriodExpired() bool 44 GetResourcePluginOptions(ctx context.Context, in *pluginapi.Empty, opts ...grpc.CallOption) (*pluginapi.ResourcePluginOptions, error) 45 GetTopologyAwareAllocatableResources(c context.Context, request *pluginapi.GetTopologyAwareAllocatableResourcesRequest) (*pluginapi.GetTopologyAwareAllocatableResourcesResponse, error) 46 GetTopologyAwareResources(c context.Context, request *pluginapi.GetTopologyAwareResourcesRequest) (*pluginapi.GetTopologyAwareResourcesResponse, error) 47 } 48 49 type EndpointInfo struct { 50 E Endpoint 51 Opts *pluginapi.ResourcePluginOptions 52 } 53 54 type EndpointImpl struct { 55 client pluginapi.ResourcePluginClient 56 clientConn *grpc.ClientConn 57 58 socketPath string 59 resourceName string 60 stopTime time.Time 61 62 mutex sync.Mutex 63 } 64 65 // NewEndpointImpl creates a new endpoint for the given resourceName. 66 // This is to be used during normal resource plugin registration. 67 func NewEndpointImpl(socketPath, resourceName string) (*EndpointImpl, error) { 68 client, c, err := dial(socketPath) 69 if err != nil { 70 klog.Errorf("[ORM] Can't create new endpoint with path %s err %v", socketPath, err) 71 return nil, err 72 } 73 74 return &EndpointImpl{ 75 client: client, 76 clientConn: c, 77 78 socketPath: socketPath, 79 resourceName: resourceName, 80 }, nil 81 } 82 83 func (e *EndpointImpl) Client() pluginapi.ResourcePluginClient { 84 return e.client 85 } 86 87 // NewStoppedEndpointImpl creates a new endpoint for the given resourceName with stopTime set. 88 // This is to be used during Kubelet restart, before the actual resource plugin re-registers. 89 func NewStoppedEndpointImpl(resourceName string) *EndpointImpl { 90 return &EndpointImpl{ 91 resourceName: resourceName, 92 stopTime: time.Now(), 93 } 94 } 95 96 func (e *EndpointImpl) IsStopped() bool { 97 e.mutex.Lock() 98 defer e.mutex.Unlock() 99 return !e.stopTime.IsZero() 100 } 101 102 func (e *EndpointImpl) StopGracePeriodExpired() bool { 103 e.mutex.Lock() 104 defer e.mutex.Unlock() 105 return !e.stopTime.IsZero() && time.Since(e.stopTime) > endpointStopGracePeriod 106 } 107 108 // used for testing only 109 func (e *EndpointImpl) setStopTime(t time.Time) { 110 e.mutex.Lock() 111 defer e.mutex.Unlock() 112 e.stopTime = t 113 } 114 115 // allocate issues Allocate gRPC call to the resource plugin. 116 func (e *EndpointImpl) Allocate(c context.Context, resourceRequest *pluginapi.ResourceRequest) (*pluginapi.ResourceAllocationResponse, error) { 117 if e.IsStopped() { 118 return nil, fmt.Errorf(errEndpointStopped, e) 119 } 120 ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginAllocateRPCTimeoutInSecs*time.Second) 121 defer cancel() 122 return e.client.Allocate(ctx, resourceRequest) 123 } 124 125 func (e *EndpointImpl) Stop() { 126 e.mutex.Lock() 127 defer e.mutex.Unlock() 128 if e.clientConn != nil { 129 e.clientConn.Close() 130 } 131 e.stopTime = time.Now() 132 } 133 134 func (e *EndpointImpl) GetResourceAllocation(c context.Context, request *pluginapi.GetResourcesAllocationRequest) (*pluginapi.GetResourcesAllocationResponse, error) { 135 if e.IsStopped() { 136 return nil, fmt.Errorf(errEndpointStopped, e) 137 } 138 ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginGetResourcesAllocationRPCTimeoutInSecs*time.Second) 139 defer cancel() 140 return e.client.GetResourcesAllocation(ctx, request) 141 } 142 143 func (e *EndpointImpl) RemovePod(c context.Context, removePodRequest *pluginapi.RemovePodRequest) (*pluginapi.RemovePodResponse, error) { 144 if e.IsStopped() { 145 return nil, fmt.Errorf(errEndpointStopped, e) 146 } 147 ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginRemovePodRPCTimeoutInSecs*time.Second) 148 defer cancel() 149 return e.client.RemovePod(ctx, removePodRequest) 150 } 151 152 func (e *EndpointImpl) GetResourcePluginOptions(ctx context.Context, in *pluginapi.Empty, opts ...grpc.CallOption) (*pluginapi.ResourcePluginOptions, error) { 153 return e.client.GetResourcePluginOptions(ctx, in, opts...) 154 } 155 156 func (e *EndpointImpl) GetTopologyHints(c context.Context, resourceRequest *pluginapi.ResourceRequest) (*pluginapi.ResourceHintsResponse, error) { 157 if e.IsStopped() { 158 return nil, fmt.Errorf(errEndpointStopped, e) 159 } 160 ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginGetTopologyHintsRPCTimeoutInSecs*time.Second) 161 defer cancel() 162 163 return e.client.GetTopologyHints(ctx, resourceRequest) 164 } 165 166 func (e *EndpointImpl) GetTopologyAwareAllocatableResources(c context.Context, request *pluginapi.GetTopologyAwareAllocatableResourcesRequest) (*pluginapi.GetTopologyAwareAllocatableResourcesResponse, error) { 167 if e.IsStopped() { 168 return nil, fmt.Errorf(errEndpointStopped, e) 169 } 170 ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginGetTopologyAwareAllocatableResourcesRPCTimeoutInSecs*time.Second) 171 defer cancel() 172 return e.client.GetTopologyAwareAllocatableResources(ctx, request) 173 } 174 175 func (e *EndpointImpl) GetTopologyAwareResources(c context.Context, request *pluginapi.GetTopologyAwareResourcesRequest) (*pluginapi.GetTopologyAwareResourcesResponse, error) { 176 if e.IsStopped() { 177 return nil, fmt.Errorf(errEndpointStopped, e) 178 } 179 ctx, cancel := context.WithTimeout(c, pluginapi.KubeletResourcePluginGetTopologyAwareResourcesRPCTimeoutInSecs*time.Second) 180 defer cancel() 181 return e.client.GetTopologyAwareResources(ctx, request) 182 } 183 184 // dial establishes the gRPC communication with the registered resource plugin. https://godoc.org/google.golang.org/grpc#Dial 185 func dial(unixSocketPath string) (pluginapi.ResourcePluginClient, *grpc.ClientConn, error) { 186 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 187 defer cancel() 188 189 c, err := grpc.DialContext(ctx, unixSocketPath, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock(), 190 grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) { 191 return (&net.Dialer{}).DialContext(ctx, "unix", addr) 192 }), 193 ) 194 if err != nil { 195 return nil, nil, fmt.Errorf(errFailedToDialResourcePlugin+" %v", err) 196 } 197 198 return pluginapi.NewResourcePluginClient(c), c, nil 199 }