github.com/mirantis/virtlet@v1.5.2-0.20191204181327-1659b8a48e9b/pkg/manager/manager.go (about) 1 /* 2 Copyright 2016-2018 Mirantis 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package manager 18 19 import ( 20 "fmt" 21 "strings" 22 "time" 23 24 "github.com/golang/glog" 25 "k8s.io/client-go/tools/clientcmd" 26 27 "github.com/Mirantis/virtlet/pkg/api/virtlet.k8s/v1" 28 "github.com/Mirantis/virtlet/pkg/diag" 29 "github.com/Mirantis/virtlet/pkg/fs" 30 "github.com/Mirantis/virtlet/pkg/image" 31 "github.com/Mirantis/virtlet/pkg/imagetranslation" 32 "github.com/Mirantis/virtlet/pkg/libvirttools" 33 "github.com/Mirantis/virtlet/pkg/metadata" 34 "github.com/Mirantis/virtlet/pkg/metadata/types" 35 "github.com/Mirantis/virtlet/pkg/stream" 36 "github.com/Mirantis/virtlet/pkg/tapmanager" 37 "github.com/Mirantis/virtlet/pkg/utils" 38 "google.golang.org/grpc/health" 39 "google.golang.org/grpc/health/grpc_health_v1" 40 ) 41 42 const ( 43 tapManagerConnectInterval = 200 * time.Millisecond 44 tapManagerAttemptCount = 50 45 streamerSocketPath = "/var/lib/libvirt/streamer.sock" 46 volumePoolName = "volumes" 47 virtletSharedFsDir = "/var/lib/virtlet/fs" 48 ) 49 50 // VirtletManager wraps the Virtlet's Runtime and Image CRI services, 51 // as well as a gRPC server that provides access to them. 52 type VirtletManager struct { 53 config *v1.VirtletConfig 54 metadataStore metadata.Store 55 fdManager tapmanager.FDManager 56 diagSet *diag.Set 57 clientCfg clientcmd.ClientConfig 58 virtTool *libvirttools.VirtualizationTool 59 imageStore image.Store 60 runtimeService *VirtletRuntimeService 61 imageService *VirtletImageService 62 server *Server 63 } 64 65 // NewVirtletManager creates a new VirtletManager. 66 func NewVirtletManager(config *v1.VirtletConfig, fdManager tapmanager.FDManager, clientCfg clientcmd.ClientConfig, diagSet *diag.Set) *VirtletManager { 67 return &VirtletManager{config: config, fdManager: fdManager, diagSet: diagSet, clientCfg: clientCfg} 68 } 69 70 // Run sets up the environment for the runtime and image services and 71 // starts the gRPC listener. It doesn't return until the server is 72 // stopped or an error occurs. 73 func (v *VirtletManager) Run() error { 74 var err error 75 if v.fdManager == nil { 76 client := tapmanager.NewFDClient(*v.config.FDServerSocketPath) 77 for i := 0; i < tapManagerAttemptCount; i++ { 78 if err = client.IsRunning(); err == nil { 79 break 80 } 81 time.Sleep(tapManagerConnectInterval) 82 } 83 if err != nil { 84 return fmt.Errorf("failed to connect to tapmanager: %v", err) 85 } 86 v.fdManager = client 87 } 88 89 v.metadataStore, err = metadata.NewStore(*v.config.DatabasePath) 90 if err != nil { 91 return fmt.Errorf("failed to create metadata store: %v", err) 92 } 93 v.diagSet.RegisterDiagSource("metadata", metadata.GetMetadataDumpSource(v.metadataStore)) 94 95 downloader := image.NewDownloader(*v.config.DownloadProtocol) 96 v.imageStore = image.NewFileStore(*v.config.ImageDir, downloader, nil) 97 v.imageStore.SetRefGetter(v.metadataStore.ImagesInUse) 98 99 var translator image.Translator 100 if !*v.config.SkipImageTranslation { 101 translator = imagetranslation.GetDefaultImageTranslator(*v.config.ImageTranslationConfigsDir, *v.config.EnableRegexpImageTranslation, v.clientCfg) 102 } else { 103 translator = imagetranslation.GetEmptyImageTranslator() 104 } 105 106 conn, err := libvirttools.NewConnection(*v.config.LibvirtURI) 107 if err != nil { 108 return fmt.Errorf("error establishing libvirt connection: %v", err) 109 } 110 v.diagSet.RegisterDiagSource("libvirt-xml", libvirttools.NewLibvirtDiagSource(conn, conn)) 111 112 virtConfig := libvirttools.VirtualizationConfig{ 113 DisableKVM: *v.config.DisableKVM, 114 EnableSriov: *v.config.EnableSriov, 115 CPUModel: *v.config.CPUModel, 116 VolumePoolName: volumePoolName, 117 SharedFilesystemPath: virtletSharedFsDir, 118 KubeletRootDir: *v.config.KubeletRootDir, 119 } 120 if *v.config.RawDevices != "" { 121 virtConfig.RawDevices = strings.Split(*v.config.RawDevices, ",") 122 } 123 124 var streamServer StreamServer 125 if !*v.config.DisableLogging { 126 s, err := stream.NewServer(streamerSocketPath, v.metadataStore, *v.config.StreamPort) 127 if err != nil { 128 return fmt.Errorf("couldn't create stream server: %v", err) 129 } 130 131 err = s.Start() 132 if err != nil { 133 glog.Warningf("Could not start stream server: %v", err) 134 135 } 136 streamServer = s 137 virtConfig.StreamerSocketPath = streamerSocketPath 138 } 139 140 volSrc := libvirttools.GetDefaultVolumeSource() 141 v.virtTool = libvirttools.NewVirtualizationTool( 142 conn, conn, v.imageStore, v.metadataStore, volSrc, virtConfig, 143 fs.RealFileSystem, utils.DefaultCommander) 144 145 runtimeService := NewVirtletRuntimeService(v.virtTool, v.metadataStore, v.fdManager, streamServer, v.imageStore, nil) 146 imageService := NewVirtletImageService(v.imageStore, translator, nil) 147 148 v.server = NewServer() 149 v.server.Register(runtimeService, imageService) 150 151 healthServer := health.NewServer() 152 grpc_health_v1.RegisterHealthServer(v.server.server, healthServer) 153 154 if err := v.recoverAndGC(); err != nil { 155 // we consider recover / gc errors non-fatal 156 glog.Warning(err) 157 } 158 159 glog.V(1).Infof("Starting server on socket %s", *v.config.CRISocketPath) 160 if err = v.server.Serve(*v.config.CRISocketPath); err != nil { 161 return fmt.Errorf("serving failed: %v", err) 162 } 163 164 return nil 165 } 166 167 // Stop stops the gRPC listener of the VirtletManager, if it's active. 168 func (v *VirtletManager) Stop() { 169 if v.server != nil { 170 v.server.Stop() 171 } 172 } 173 174 // recoverAndGC performs the initial actions during VirtletManager 175 // startup, including recovering network namespaces and performing 176 // garbage collection for both libvirt and the image store. 177 func (v *VirtletManager) recoverAndGC() error { 178 var errors []string 179 180 for _, err := range v.virtTool.GarbageCollect() { 181 errors = append(errors, fmt.Sprintf("* error performing libvirt GC: %v", err)) 182 } 183 184 // recover network namespace after VM GC 185 for _, err := range v.recoverNetworkNamespaces() { 186 errors = append(errors, fmt.Sprintf("* error recovering VM network namespaces: %v", err)) 187 } 188 189 if err := v.imageStore.GC(); err != nil { 190 errors = append(errors, fmt.Sprintf("* error during image GC: %v", err)) 191 } 192 193 if len(errors) == 0 { 194 return nil 195 } 196 197 return fmt.Errorf("errors encountered during recover / GC:\n%s", strings.Join(errors, "\n")) 198 } 199 200 // recoverNetworkNamespaces recovers all the active VM network namespaces 201 // from previous Virtlet run by scanning the metadata store and starting 202 // dhcp server for each namespace that's still active 203 func (v *VirtletManager) recoverNetworkNamespaces() (allErrors []error) { 204 sandboxes, err := v.metadataStore.ListPodSandboxes(nil) 205 if err != nil { 206 allErrors = append(allErrors, err) 207 return 208 } 209 210 OUTER: 211 for _, s := range sandboxes { 212 psi, err := s.Retrieve() 213 if err != nil { 214 allErrors = append(allErrors, fmt.Errorf("can't retrieve PodSandboxInfo for sandbox id %q: %v", s.GetID(), err)) 215 continue 216 } 217 if psi == nil { 218 allErrors = append(allErrors, fmt.Errorf("inconsistent database. Found pod %q sandbox but can not retrive its metadata", s.GetID())) 219 continue 220 } 221 // Don't recover if sandbox is not ready 222 if psi.State != types.PodSandboxState_SANDBOX_READY { 223 continue 224 } 225 226 haveRunningContainers := false 227 containers, err := v.metadataStore.ListPodContainers(s.GetID()) 228 if err != nil { 229 allErrors = append(allErrors, fmt.Errorf("can't retrieve ContainerMetadata list for pod %q: %v", s.GetID(), err)) 230 continue 231 } 232 for _, c := range containers { 233 ci, err := v.virtTool.ContainerInfo(c.GetID()) 234 if err != nil { 235 allErrors = append(allErrors, fmt.Errorf("can't verify container status for container %q in pod %q: %v", c.GetID(), s.GetID(), err)) 236 continue OUTER 237 } 238 if ci.State == types.ContainerState_CONTAINER_RUNNING { 239 haveRunningContainers = true 240 } 241 } 242 243 if err := v.fdManager.Recover( 244 s.GetID(), 245 tapmanager.RecoverPayload{ 246 Description: &tapmanager.PodNetworkDesc{ 247 PodID: s.GetID(), 248 PodNs: psi.Config.Namespace, 249 PodName: psi.Config.Name, 250 }, 251 ContainerSideNetwork: psi.ContainerSideNetwork, 252 HaveRunningContainers: haveRunningContainers, 253 }, 254 ); err != nil { 255 allErrors = append(allErrors, fmt.Errorf("error recovering netns for %q pod: %v", s.GetID(), err)) 256 } 257 } 258 return 259 }