github.com/Cloud-Foundations/Dominator@v0.3.4/hypervisor/manager/start.go (about)

     1  package manager
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/rand"
     6  	"errors"
     7  	"fmt"
     8  	"os"
     9  	"path/filepath"
    10  	"runtime"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/Cloud-Foundations/Dominator/lib/fsutil"
    15  	"github.com/Cloud-Foundations/Dominator/lib/json"
    16  	"github.com/Cloud-Foundations/Dominator/lib/lockwatcher"
    17  	"github.com/Cloud-Foundations/Dominator/lib/log/prefixlogger"
    18  	"github.com/Cloud-Foundations/Dominator/lib/meminfo"
    19  	"github.com/Cloud-Foundations/Dominator/lib/objectserver/cachingreader"
    20  	"github.com/Cloud-Foundations/Dominator/lib/rpcclientpool"
    21  	"github.com/Cloud-Foundations/Dominator/lib/stringutil"
    22  	proto "github.com/Cloud-Foundations/Dominator/proto/hypervisor"
    23  	"github.com/Cloud-Foundations/tricorder/go/tricorder/messages"
    24  	trimsg "github.com/Cloud-Foundations/tricorder/go/tricorder/messages"
    25  )
    26  
    27  const (
    28  	productSerialFile = "/sys/class/dmi/id/product_serial"
    29  
    30  	uuidLength = 16
    31  )
    32  
    33  func getUUID(stateDir string) (string, error) {
    34  	filename := filepath.Join(stateDir, "uuid")
    35  	if file, err := os.Open(filename); err == nil {
    36  		defer file.Close()
    37  		buffer := make([]byte, uuidLength*2)
    38  		if length, err := file.Read(buffer); err != nil {
    39  			return "", err
    40  		} else if length < len(buffer) {
    41  			return "", errors.New("unable to read enough UUID data")
    42  		} else {
    43  			return string(buffer), nil
    44  		}
    45  	}
    46  	if uuid, err := randString(uuidLength); err != nil {
    47  		return "", err
    48  	} else {
    49  		os.Remove(filename)
    50  		if file, err := os.Create(filename); err != nil {
    51  			return "", err
    52  		} else {
    53  			defer file.Close()
    54  			if _, err := fmt.Fprintln(file, uuid); err != nil {
    55  				return "", err
    56  			}
    57  			return uuid, nil
    58  		}
    59  	}
    60  }
    61  
    62  func newManager(startOptions StartOptions) (*Manager, error) {
    63  	memInfo, err := meminfo.GetMemInfo()
    64  	if err != nil {
    65  		return nil, err
    66  	}
    67  	rootCookie := make([]byte, 32)
    68  	if _, err := rand.Read(rootCookie); err != nil {
    69  		return nil, err
    70  	}
    71  	uuid, err := getUUID(startOptions.StateDir)
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  	manager := &Manager{
    76  		StartOptions:  startOptions,
    77  		rootCookie:    rootCookie,
    78  		memTotalInMiB: memInfo.Total >> 20,
    79  		notifiers:     make(map[<-chan proto.Update]chan<- proto.Update),
    80  		numCPUs:       uint(runtime.NumCPU()),
    81  		serialNumber:  readProductSerial(),
    82  		vms:           make(map[string]*vmInfoType),
    83  		uuid:          uuid,
    84  	}
    85  	err = fsutil.CopyToFile(manager.GetRootCookiePath(),
    86  		fsutil.PrivateFilePerms, bytes.NewReader(rootCookie), 0)
    87  	if err != nil {
    88  		return nil, err
    89  	}
    90  	_, err = os.Stat(filepath.Join(startOptions.StateDir, "disabled"))
    91  	if err == nil {
    92  		manager.disabled = true
    93  	}
    94  	if err := manager.setupVolumes(startOptions); err != nil {
    95  		return nil, err
    96  	}
    97  	if err := manager.checkVsockets(); err != nil {
    98  		return nil, err
    99  	}
   100  	if err := manager.loadSubnets(); err != nil {
   101  		return nil, err
   102  	}
   103  	if err := manager.loadAddressPool(); err != nil {
   104  		return nil, err
   105  	}
   106  	dirname := filepath.Join(manager.StateDir, "VMs")
   107  	dir, err := os.Open(dirname)
   108  	if err != nil {
   109  		if os.IsNotExist(err) {
   110  			if err := os.Mkdir(dirname, fsutil.DirPerms); err != nil {
   111  				return nil, errors.New(
   112  					"error making: " + dirname + ": " + err.Error())
   113  			}
   114  			dir, err = os.Open(dirname)
   115  		}
   116  	}
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  	defer dir.Close()
   121  	names, err := dir.Readdirnames(-1)
   122  	if err != nil {
   123  		return nil, errors.New(
   124  			"error reading directory: " + dirname + ": " + err.Error())
   125  	}
   126  	for _, ipAddr := range names {
   127  		vmDirname := filepath.Join(dirname, ipAddr)
   128  		filename := filepath.Join(vmDirname, "info.json")
   129  		var vmInfo vmInfoType
   130  		if err := json.ReadFromFile(filename, &vmInfo); err != nil {
   131  			manager.Logger.Println(err)
   132  			if err := os.Remove(vmDirname); err != nil {
   133  				manager.Logger.Println(err)
   134  			}
   135  			continue
   136  		}
   137  		vmInfo.Address.Shrink()
   138  		vmInfo.manager = manager
   139  		vmInfo.dirname = vmDirname
   140  		vmInfo.ipAddress = ipAddr
   141  		vmInfo.ownerUsers = stringutil.ConvertListToMap(vmInfo.OwnerUsers,
   142  			false)
   143  		vmInfo.logger = prefixlogger.New(ipAddr+": ", manager.Logger)
   144  		vmInfo.metadataChannels = make(map[chan<- string]struct{})
   145  		manager.vms[ipAddr] = &vmInfo
   146  		vmInfo.setupLockWatcher()
   147  		if _, err := vmInfo.startManaging(0, false, false); err != nil {
   148  			manager.Logger.Println(err)
   149  			if ipAddr == "0.0.0.0" {
   150  				delete(manager.vms, ipAddr)
   151  				vmInfo.destroy()
   152  			}
   153  		}
   154  	}
   155  	// Check address pool for used addresses with no VM, and remove.
   156  	freeIPs := make(map[string]struct{}, len(manager.addressPool.Free))
   157  	for _, addr := range manager.addressPool.Free {
   158  		freeIPs[addr.IpAddress.String()] = struct{}{}
   159  	}
   160  	secondaryIPs := make(map[string]struct{})
   161  	for _, vm := range manager.vms {
   162  		for _, addr := range vm.SecondaryAddresses {
   163  			secondaryIPs[addr.IpAddress.String()] = struct{}{}
   164  		}
   165  	}
   166  	var addressesToKeep []proto.Address
   167  	for _, addr := range manager.addressPool.Registered {
   168  		ipAddr := addr.IpAddress.String()
   169  		if _, ok := freeIPs[ipAddr]; ok {
   170  			addressesToKeep = append(addressesToKeep, addr)
   171  			continue
   172  		}
   173  		if _, ok := manager.vms[ipAddr]; ok {
   174  			addressesToKeep = append(addressesToKeep, addr)
   175  			continue
   176  		}
   177  		if _, ok := secondaryIPs[ipAddr]; ok {
   178  			addressesToKeep = append(addressesToKeep, addr)
   179  			continue
   180  		}
   181  		manager.Logger.Printf(
   182  			"%s shown as used but no corresponding VM, removing\n", ipAddr)
   183  	}
   184  	var changedPool bool
   185  	if len(manager.addressPool.Registered) != len(addressesToKeep) {
   186  		manager.addressPool.Registered = addressesToKeep
   187  		changedPool = true
   188  	}
   189  	// Check address pool for free addresses which are not registered and remove
   190  	addressesToKeep = nil
   191  	registeredIPs := make(map[string]struct{},
   192  		len(manager.addressPool.Registered))
   193  	for _, addr := range manager.addressPool.Registered {
   194  		registeredIPs[addr.IpAddress.String()] = struct{}{}
   195  	}
   196  	for _, addr := range manager.addressPool.Free {
   197  		ipAddr := addr.IpAddress.String()
   198  		if _, ok := registeredIPs[ipAddr]; ok {
   199  			addressesToKeep = append(addressesToKeep, addr)
   200  		} else {
   201  			manager.Logger.Printf(
   202  				"%s shown as free but not registered, removing\n", ipAddr)
   203  		}
   204  	}
   205  	if len(manager.addressPool.Free) != len(addressesToKeep) {
   206  		manager.addressPool.Free = addressesToKeep
   207  		changedPool = true
   208  	}
   209  	if changedPool {
   210  		manager.writeAddressPoolWithLock(manager.addressPool, false)
   211  	}
   212  	if startOptions.ObjectCacheBytes >= 1<<20 {
   213  		dirname := filepath.Join(filepath.Dir(manager.volumeDirectories[0]),
   214  			"objectcache")
   215  		if err := os.MkdirAll(dirname, fsutil.DirPerms); err != nil {
   216  			return nil, err
   217  		}
   218  		objSrv, err := cachingreader.NewObjectServer(dirname,
   219  			startOptions.ObjectCacheBytes, startOptions.ImageServerAddress,
   220  			startOptions.Logger)
   221  		if err != nil {
   222  			return nil, err
   223  		}
   224  		manager.objectCache = objSrv
   225  	}
   226  	go manager.loopCheckHealthStatus()
   227  	lockCheckInterval := startOptions.LockCheckInterval
   228  	if lockCheckInterval > time.Second {
   229  		// Leveraged for dashboard, so keep it fresh.
   230  		lockCheckInterval = time.Second
   231  	}
   232  	manager.lockWatcher = lockwatcher.New(&manager.mutex,
   233  		lockwatcher.LockWatcherOptions{
   234  			CheckInterval: lockCheckInterval,
   235  			Logger:        startOptions.Logger,
   236  			LogTimeout:    startOptions.LockLogTimeout,
   237  			RFunction:     manager.updateSummaryWithMainRLock,
   238  		})
   239  	return manager, nil
   240  }
   241  
   242  func randString(length uint) (string, error) {
   243  	buffer := make([]byte, length)
   244  	if length, err := rand.Read(buffer); err != nil {
   245  		return "", err
   246  	} else if length < uuidLength {
   247  		return "", errors.New("unable to read enough random UUID data")
   248  	} else {
   249  		return fmt.Sprintf("%x", buffer), nil
   250  	}
   251  }
   252  
   253  func readProductSerial() string {
   254  	if file, err := os.Open(productSerialFile); err != nil {
   255  		return ""
   256  	} else {
   257  		defer file.Close()
   258  		buffer := make([]byte, 256)
   259  		if nRead, err := file.Read(buffer); err != nil {
   260  			return ""
   261  		} else if nRead < 1 {
   262  			return ""
   263  		} else {
   264  			serial := strings.TrimSpace(string(buffer[:nRead]))
   265  			if serial == "System Serial Number" {
   266  				serial = ""
   267  			}
   268  			return serial
   269  		}
   270  	}
   271  }
   272  
   273  func (m *Manager) loopCheckHealthStatus() {
   274  	cr := rpcclientpool.New("tcp", ":6910", true, "")
   275  	for ; ; time.Sleep(time.Second * 10) {
   276  		healthStatus := m.checkHealthStatus(cr)
   277  		m.healthStatusMutex.Lock()
   278  		if m.healthStatus == healthStatus {
   279  			m.healthStatusMutex.Unlock()
   280  			continue
   281  		}
   282  		m.healthStatus = healthStatus
   283  		m.healthStatusMutex.Unlock()
   284  		m.mutex.RLock()
   285  		numFreeAddresses, err := m.computeNumFreeAddressesMap(m.addressPool)
   286  		m.mutex.RUnlock()
   287  		if err != nil {
   288  			m.Logger.Println(err)
   289  		}
   290  		m.sendUpdate(proto.Update{
   291  			NumFreeAddresses: numFreeAddresses,
   292  		})
   293  	}
   294  }
   295  
   296  func (m *Manager) checkHealthStatus(cr *rpcclientpool.ClientResource) string {
   297  	client, err := cr.Get(nil)
   298  	if err != nil {
   299  		m.Logger.Printf("error getting health-agent client: %s", err)
   300  		return "bad health-agent"
   301  	}
   302  	defer client.Put()
   303  	var metric messages.Metric
   304  	err = client.Call("MetricsServer.GetMetric", "/sys/storage/health", &metric)
   305  	if err != nil {
   306  		if strings.Contains(err.Error(), trimsg.ErrMetricNotFound.Error()) {
   307  			return ""
   308  		}
   309  		m.Logger.Printf("error getting health-agent metrics: %s", err)
   310  		client.Close()
   311  		return "failed getting health metrics"
   312  	}
   313  	if healthStatus, ok := metric.Value.(string); !ok {
   314  		m.Logger.Println("list metric is not string")
   315  		return "bad health metric type"
   316  	} else if healthStatus == "good" {
   317  		return "healthy"
   318  	} else {
   319  		return healthStatus
   320  	}
   321  }