github.com/mirantis/virtlet@v1.5.2-0.20191204181327-1659b8a48e9b/pkg/manager/manager.go (about)

     1  /*
     2  Copyright 2016-2018 Mirantis
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package manager
    18  
    19  import (
    20  	"fmt"
    21  	"strings"
    22  	"time"
    23  
    24  	"github.com/golang/glog"
    25  	"k8s.io/client-go/tools/clientcmd"
    26  
    27  	"github.com/Mirantis/virtlet/pkg/api/virtlet.k8s/v1"
    28  	"github.com/Mirantis/virtlet/pkg/diag"
    29  	"github.com/Mirantis/virtlet/pkg/fs"
    30  	"github.com/Mirantis/virtlet/pkg/image"
    31  	"github.com/Mirantis/virtlet/pkg/imagetranslation"
    32  	"github.com/Mirantis/virtlet/pkg/libvirttools"
    33  	"github.com/Mirantis/virtlet/pkg/metadata"
    34  	"github.com/Mirantis/virtlet/pkg/metadata/types"
    35  	"github.com/Mirantis/virtlet/pkg/stream"
    36  	"github.com/Mirantis/virtlet/pkg/tapmanager"
    37  	"github.com/Mirantis/virtlet/pkg/utils"
    38  	"google.golang.org/grpc/health"
    39  	"google.golang.org/grpc/health/grpc_health_v1"
    40  )
    41  
    42  const (
    43  	tapManagerConnectInterval = 200 * time.Millisecond
    44  	tapManagerAttemptCount    = 50
    45  	streamerSocketPath        = "/var/lib/libvirt/streamer.sock"
    46  	volumePoolName            = "volumes"
    47  	virtletSharedFsDir        = "/var/lib/virtlet/fs"
    48  )
    49  
    50  // VirtletManager wraps the Virtlet's Runtime and Image CRI services,
    51  // as well as a gRPC server that provides access to them.
    52  type VirtletManager struct {
    53  	config         *v1.VirtletConfig
    54  	metadataStore  metadata.Store
    55  	fdManager      tapmanager.FDManager
    56  	diagSet        *diag.Set
    57  	clientCfg      clientcmd.ClientConfig
    58  	virtTool       *libvirttools.VirtualizationTool
    59  	imageStore     image.Store
    60  	runtimeService *VirtletRuntimeService
    61  	imageService   *VirtletImageService
    62  	server         *Server
    63  }
    64  
    65  // NewVirtletManager creates a new VirtletManager.
    66  func NewVirtletManager(config *v1.VirtletConfig, fdManager tapmanager.FDManager, clientCfg clientcmd.ClientConfig, diagSet *diag.Set) *VirtletManager {
    67  	return &VirtletManager{config: config, fdManager: fdManager, diagSet: diagSet, clientCfg: clientCfg}
    68  }
    69  
    70  // Run sets up the environment for the runtime and image services and
    71  // starts the gRPC listener. It doesn't return until the server is
    72  // stopped or an error occurs.
    73  func (v *VirtletManager) Run() error {
    74  	var err error
    75  	if v.fdManager == nil {
    76  		client := tapmanager.NewFDClient(*v.config.FDServerSocketPath)
    77  		for i := 0; i < tapManagerAttemptCount; i++ {
    78  			if err = client.IsRunning(); err == nil {
    79  				break
    80  			}
    81  			time.Sleep(tapManagerConnectInterval)
    82  		}
    83  		if err != nil {
    84  			return fmt.Errorf("failed to connect to tapmanager: %v", err)
    85  		}
    86  		v.fdManager = client
    87  	}
    88  
    89  	v.metadataStore, err = metadata.NewStore(*v.config.DatabasePath)
    90  	if err != nil {
    91  		return fmt.Errorf("failed to create metadata store: %v", err)
    92  	}
    93  	v.diagSet.RegisterDiagSource("metadata", metadata.GetMetadataDumpSource(v.metadataStore))
    94  
    95  	downloader := image.NewDownloader(*v.config.DownloadProtocol)
    96  	v.imageStore = image.NewFileStore(*v.config.ImageDir, downloader, nil)
    97  	v.imageStore.SetRefGetter(v.metadataStore.ImagesInUse)
    98  
    99  	var translator image.Translator
   100  	if !*v.config.SkipImageTranslation {
   101  		translator = imagetranslation.GetDefaultImageTranslator(*v.config.ImageTranslationConfigsDir, *v.config.EnableRegexpImageTranslation, v.clientCfg)
   102  	} else {
   103  		translator = imagetranslation.GetEmptyImageTranslator()
   104  	}
   105  
   106  	conn, err := libvirttools.NewConnection(*v.config.LibvirtURI)
   107  	if err != nil {
   108  		return fmt.Errorf("error establishing libvirt connection: %v", err)
   109  	}
   110  	v.diagSet.RegisterDiagSource("libvirt-xml", libvirttools.NewLibvirtDiagSource(conn, conn))
   111  
   112  	virtConfig := libvirttools.VirtualizationConfig{
   113  		DisableKVM:           *v.config.DisableKVM,
   114  		EnableSriov:          *v.config.EnableSriov,
   115  		CPUModel:             *v.config.CPUModel,
   116  		VolumePoolName:       volumePoolName,
   117  		SharedFilesystemPath: virtletSharedFsDir,
   118  		KubeletRootDir:       *v.config.KubeletRootDir,
   119  	}
   120  	if *v.config.RawDevices != "" {
   121  		virtConfig.RawDevices = strings.Split(*v.config.RawDevices, ",")
   122  	}
   123  
   124  	var streamServer StreamServer
   125  	if !*v.config.DisableLogging {
   126  		s, err := stream.NewServer(streamerSocketPath, v.metadataStore, *v.config.StreamPort)
   127  		if err != nil {
   128  			return fmt.Errorf("couldn't create stream server: %v", err)
   129  		}
   130  
   131  		err = s.Start()
   132  		if err != nil {
   133  			glog.Warningf("Could not start stream server: %v", err)
   134  
   135  		}
   136  		streamServer = s
   137  		virtConfig.StreamerSocketPath = streamerSocketPath
   138  	}
   139  
   140  	volSrc := libvirttools.GetDefaultVolumeSource()
   141  	v.virtTool = libvirttools.NewVirtualizationTool(
   142  		conn, conn, v.imageStore, v.metadataStore, volSrc, virtConfig,
   143  		fs.RealFileSystem, utils.DefaultCommander)
   144  
   145  	runtimeService := NewVirtletRuntimeService(v.virtTool, v.metadataStore, v.fdManager, streamServer, v.imageStore, nil)
   146  	imageService := NewVirtletImageService(v.imageStore, translator, nil)
   147  
   148  	v.server = NewServer()
   149  	v.server.Register(runtimeService, imageService)
   150  
   151  	healthServer := health.NewServer()
   152  	grpc_health_v1.RegisterHealthServer(v.server.server, healthServer)
   153  
   154  	if err := v.recoverAndGC(); err != nil {
   155  		// we consider recover / gc errors non-fatal
   156  		glog.Warning(err)
   157  	}
   158  
   159  	glog.V(1).Infof("Starting server on socket %s", *v.config.CRISocketPath)
   160  	if err = v.server.Serve(*v.config.CRISocketPath); err != nil {
   161  		return fmt.Errorf("serving failed: %v", err)
   162  	}
   163  
   164  	return nil
   165  }
   166  
   167  // Stop stops the gRPC listener of the VirtletManager, if it's active.
   168  func (v *VirtletManager) Stop() {
   169  	if v.server != nil {
   170  		v.server.Stop()
   171  	}
   172  }
   173  
   174  // recoverAndGC performs the initial actions during VirtletManager
   175  // startup, including recovering network namespaces and performing
   176  // garbage collection for both libvirt and the image store.
   177  func (v *VirtletManager) recoverAndGC() error {
   178  	var errors []string
   179  
   180  	for _, err := range v.virtTool.GarbageCollect() {
   181  		errors = append(errors, fmt.Sprintf("* error performing libvirt GC: %v", err))
   182  	}
   183  
   184  	// recover network namespace after VM GC
   185  	for _, err := range v.recoverNetworkNamespaces() {
   186  		errors = append(errors, fmt.Sprintf("* error recovering VM network namespaces: %v", err))
   187  	}
   188  
   189  	if err := v.imageStore.GC(); err != nil {
   190  		errors = append(errors, fmt.Sprintf("* error during image GC: %v", err))
   191  	}
   192  
   193  	if len(errors) == 0 {
   194  		return nil
   195  	}
   196  
   197  	return fmt.Errorf("errors encountered during recover / GC:\n%s", strings.Join(errors, "\n"))
   198  }
   199  
   200  // recoverNetworkNamespaces recovers all the active VM network namespaces
   201  // from previous Virtlet run by scanning the metadata store and starting
   202  // dhcp server for each namespace that's still active
   203  func (v *VirtletManager) recoverNetworkNamespaces() (allErrors []error) {
   204  	sandboxes, err := v.metadataStore.ListPodSandboxes(nil)
   205  	if err != nil {
   206  		allErrors = append(allErrors, err)
   207  		return
   208  	}
   209  
   210  OUTER:
   211  	for _, s := range sandboxes {
   212  		psi, err := s.Retrieve()
   213  		if err != nil {
   214  			allErrors = append(allErrors, fmt.Errorf("can't retrieve PodSandboxInfo for sandbox id %q: %v", s.GetID(), err))
   215  			continue
   216  		}
   217  		if psi == nil {
   218  			allErrors = append(allErrors, fmt.Errorf("inconsistent database. Found pod %q sandbox but can not retrive its metadata", s.GetID()))
   219  			continue
   220  		}
   221  		// Don't recover if sandbox is not ready
   222  		if psi.State != types.PodSandboxState_SANDBOX_READY {
   223  			continue
   224  		}
   225  
   226  		haveRunningContainers := false
   227  		containers, err := v.metadataStore.ListPodContainers(s.GetID())
   228  		if err != nil {
   229  			allErrors = append(allErrors, fmt.Errorf("can't retrieve ContainerMetadata list for pod %q: %v", s.GetID(), err))
   230  			continue
   231  		}
   232  		for _, c := range containers {
   233  			ci, err := v.virtTool.ContainerInfo(c.GetID())
   234  			if err != nil {
   235  				allErrors = append(allErrors, fmt.Errorf("can't verify container status for container %q in pod %q: %v", c.GetID(), s.GetID(), err))
   236  				continue OUTER
   237  			}
   238  			if ci.State == types.ContainerState_CONTAINER_RUNNING {
   239  				haveRunningContainers = true
   240  			}
   241  		}
   242  
   243  		if err := v.fdManager.Recover(
   244  			s.GetID(),
   245  			tapmanager.RecoverPayload{
   246  				Description: &tapmanager.PodNetworkDesc{
   247  					PodID:   s.GetID(),
   248  					PodNs:   psi.Config.Namespace,
   249  					PodName: psi.Config.Name,
   250  				},
   251  				ContainerSideNetwork:  psi.ContainerSideNetwork,
   252  				HaveRunningContainers: haveRunningContainers,
   253  			},
   254  		); err != nil {
   255  			allErrors = append(allErrors, fmt.Errorf("error recovering netns for %q pod: %v", s.GetID(), err))
   256  		}
   257  	}
   258  	return
   259  }