github.com/Cloud-Foundations/Dominator@v0.3.4/hypervisor/manager/start.go (about) 1 package manager 2 3 import ( 4 "bytes" 5 "crypto/rand" 6 "errors" 7 "fmt" 8 "os" 9 "path/filepath" 10 "runtime" 11 "strings" 12 "time" 13 14 "github.com/Cloud-Foundations/Dominator/lib/fsutil" 15 "github.com/Cloud-Foundations/Dominator/lib/json" 16 "github.com/Cloud-Foundations/Dominator/lib/lockwatcher" 17 "github.com/Cloud-Foundations/Dominator/lib/log/prefixlogger" 18 "github.com/Cloud-Foundations/Dominator/lib/meminfo" 19 "github.com/Cloud-Foundations/Dominator/lib/objectserver/cachingreader" 20 "github.com/Cloud-Foundations/Dominator/lib/rpcclientpool" 21 "github.com/Cloud-Foundations/Dominator/lib/stringutil" 22 proto "github.com/Cloud-Foundations/Dominator/proto/hypervisor" 23 "github.com/Cloud-Foundations/tricorder/go/tricorder/messages" 24 trimsg "github.com/Cloud-Foundations/tricorder/go/tricorder/messages" 25 ) 26 27 const ( 28 productSerialFile = "/sys/class/dmi/id/product_serial" 29 30 uuidLength = 16 31 ) 32 33 func getUUID(stateDir string) (string, error) { 34 filename := filepath.Join(stateDir, "uuid") 35 if file, err := os.Open(filename); err == nil { 36 defer file.Close() 37 buffer := make([]byte, uuidLength*2) 38 if length, err := file.Read(buffer); err != nil { 39 return "", err 40 } else if length < len(buffer) { 41 return "", errors.New("unable to read enough UUID data") 42 } else { 43 return string(buffer), nil 44 } 45 } 46 if uuid, err := randString(uuidLength); err != nil { 47 return "", err 48 } else { 49 os.Remove(filename) 50 if file, err := os.Create(filename); err != nil { 51 return "", err 52 } else { 53 defer file.Close() 54 if _, err := fmt.Fprintln(file, uuid); err != nil { 55 return "", err 56 } 57 return uuid, nil 58 } 59 } 60 } 61 62 func newManager(startOptions StartOptions) (*Manager, error) { 63 memInfo, err := meminfo.GetMemInfo() 64 if err != nil { 65 return nil, err 66 } 67 rootCookie := make([]byte, 32) 68 if _, err := rand.Read(rootCookie); err != nil { 69 return nil, err 70 } 71 uuid, err := getUUID(startOptions.StateDir) 72 if err != nil { 73 return nil, err 74 } 75 manager := &Manager{ 76 StartOptions: startOptions, 77 rootCookie: rootCookie, 78 memTotalInMiB: memInfo.Total >> 20, 79 notifiers: make(map[<-chan proto.Update]chan<- proto.Update), 80 numCPUs: uint(runtime.NumCPU()), 81 serialNumber: readProductSerial(), 82 vms: make(map[string]*vmInfoType), 83 uuid: uuid, 84 } 85 err = fsutil.CopyToFile(manager.GetRootCookiePath(), 86 fsutil.PrivateFilePerms, bytes.NewReader(rootCookie), 0) 87 if err != nil { 88 return nil, err 89 } 90 _, err = os.Stat(filepath.Join(startOptions.StateDir, "disabled")) 91 if err == nil { 92 manager.disabled = true 93 } 94 if err := manager.setupVolumes(startOptions); err != nil { 95 return nil, err 96 } 97 if err := manager.checkVsockets(); err != nil { 98 return nil, err 99 } 100 if err := manager.loadSubnets(); err != nil { 101 return nil, err 102 } 103 if err := manager.loadAddressPool(); err != nil { 104 return nil, err 105 } 106 dirname := filepath.Join(manager.StateDir, "VMs") 107 dir, err := os.Open(dirname) 108 if err != nil { 109 if os.IsNotExist(err) { 110 if err := os.Mkdir(dirname, fsutil.DirPerms); err != nil { 111 return nil, errors.New( 112 "error making: " + dirname + ": " + err.Error()) 113 } 114 dir, err = os.Open(dirname) 115 } 116 } 117 if err != nil { 118 return nil, err 119 } 120 defer dir.Close() 121 names, err := dir.Readdirnames(-1) 122 if err != nil { 123 return nil, errors.New( 124 "error reading directory: " + dirname + ": " + err.Error()) 125 } 126 for _, ipAddr := range names { 127 vmDirname := filepath.Join(dirname, ipAddr) 128 filename := filepath.Join(vmDirname, "info.json") 129 var vmInfo vmInfoType 130 if err := json.ReadFromFile(filename, &vmInfo); err != nil { 131 manager.Logger.Println(err) 132 if err := os.Remove(vmDirname); err != nil { 133 manager.Logger.Println(err) 134 } 135 continue 136 } 137 vmInfo.Address.Shrink() 138 vmInfo.manager = manager 139 vmInfo.dirname = vmDirname 140 vmInfo.ipAddress = ipAddr 141 vmInfo.ownerUsers = stringutil.ConvertListToMap(vmInfo.OwnerUsers, 142 false) 143 vmInfo.logger = prefixlogger.New(ipAddr+": ", manager.Logger) 144 vmInfo.metadataChannels = make(map[chan<- string]struct{}) 145 manager.vms[ipAddr] = &vmInfo 146 vmInfo.setupLockWatcher() 147 if _, err := vmInfo.startManaging(0, false, false); err != nil { 148 manager.Logger.Println(err) 149 if ipAddr == "0.0.0.0" { 150 delete(manager.vms, ipAddr) 151 vmInfo.destroy() 152 } 153 } 154 } 155 // Check address pool for used addresses with no VM, and remove. 156 freeIPs := make(map[string]struct{}, len(manager.addressPool.Free)) 157 for _, addr := range manager.addressPool.Free { 158 freeIPs[addr.IpAddress.String()] = struct{}{} 159 } 160 secondaryIPs := make(map[string]struct{}) 161 for _, vm := range manager.vms { 162 for _, addr := range vm.SecondaryAddresses { 163 secondaryIPs[addr.IpAddress.String()] = struct{}{} 164 } 165 } 166 var addressesToKeep []proto.Address 167 for _, addr := range manager.addressPool.Registered { 168 ipAddr := addr.IpAddress.String() 169 if _, ok := freeIPs[ipAddr]; ok { 170 addressesToKeep = append(addressesToKeep, addr) 171 continue 172 } 173 if _, ok := manager.vms[ipAddr]; ok { 174 addressesToKeep = append(addressesToKeep, addr) 175 continue 176 } 177 if _, ok := secondaryIPs[ipAddr]; ok { 178 addressesToKeep = append(addressesToKeep, addr) 179 continue 180 } 181 manager.Logger.Printf( 182 "%s shown as used but no corresponding VM, removing\n", ipAddr) 183 } 184 var changedPool bool 185 if len(manager.addressPool.Registered) != len(addressesToKeep) { 186 manager.addressPool.Registered = addressesToKeep 187 changedPool = true 188 } 189 // Check address pool for free addresses which are not registered and remove 190 addressesToKeep = nil 191 registeredIPs := make(map[string]struct{}, 192 len(manager.addressPool.Registered)) 193 for _, addr := range manager.addressPool.Registered { 194 registeredIPs[addr.IpAddress.String()] = struct{}{} 195 } 196 for _, addr := range manager.addressPool.Free { 197 ipAddr := addr.IpAddress.String() 198 if _, ok := registeredIPs[ipAddr]; ok { 199 addressesToKeep = append(addressesToKeep, addr) 200 } else { 201 manager.Logger.Printf( 202 "%s shown as free but not registered, removing\n", ipAddr) 203 } 204 } 205 if len(manager.addressPool.Free) != len(addressesToKeep) { 206 manager.addressPool.Free = addressesToKeep 207 changedPool = true 208 } 209 if changedPool { 210 manager.writeAddressPoolWithLock(manager.addressPool, false) 211 } 212 if startOptions.ObjectCacheBytes >= 1<<20 { 213 dirname := filepath.Join(filepath.Dir(manager.volumeDirectories[0]), 214 "objectcache") 215 if err := os.MkdirAll(dirname, fsutil.DirPerms); err != nil { 216 return nil, err 217 } 218 objSrv, err := cachingreader.NewObjectServer(dirname, 219 startOptions.ObjectCacheBytes, startOptions.ImageServerAddress, 220 startOptions.Logger) 221 if err != nil { 222 return nil, err 223 } 224 manager.objectCache = objSrv 225 } 226 go manager.loopCheckHealthStatus() 227 lockCheckInterval := startOptions.LockCheckInterval 228 if lockCheckInterval > time.Second { 229 // Leveraged for dashboard, so keep it fresh. 230 lockCheckInterval = time.Second 231 } 232 manager.lockWatcher = lockwatcher.New(&manager.mutex, 233 lockwatcher.LockWatcherOptions{ 234 CheckInterval: lockCheckInterval, 235 Logger: startOptions.Logger, 236 LogTimeout: startOptions.LockLogTimeout, 237 RFunction: manager.updateSummaryWithMainRLock, 238 }) 239 return manager, nil 240 } 241 242 func randString(length uint) (string, error) { 243 buffer := make([]byte, length) 244 if length, err := rand.Read(buffer); err != nil { 245 return "", err 246 } else if length < uuidLength { 247 return "", errors.New("unable to read enough random UUID data") 248 } else { 249 return fmt.Sprintf("%x", buffer), nil 250 } 251 } 252 253 func readProductSerial() string { 254 if file, err := os.Open(productSerialFile); err != nil { 255 return "" 256 } else { 257 defer file.Close() 258 buffer := make([]byte, 256) 259 if nRead, err := file.Read(buffer); err != nil { 260 return "" 261 } else if nRead < 1 { 262 return "" 263 } else { 264 serial := strings.TrimSpace(string(buffer[:nRead])) 265 if serial == "System Serial Number" { 266 serial = "" 267 } 268 return serial 269 } 270 } 271 } 272 273 func (m *Manager) loopCheckHealthStatus() { 274 cr := rpcclientpool.New("tcp", ":6910", true, "") 275 for ; ; time.Sleep(time.Second * 10) { 276 healthStatus := m.checkHealthStatus(cr) 277 m.healthStatusMutex.Lock() 278 if m.healthStatus == healthStatus { 279 m.healthStatusMutex.Unlock() 280 continue 281 } 282 m.healthStatus = healthStatus 283 m.healthStatusMutex.Unlock() 284 m.mutex.RLock() 285 numFreeAddresses, err := m.computeNumFreeAddressesMap(m.addressPool) 286 m.mutex.RUnlock() 287 if err != nil { 288 m.Logger.Println(err) 289 } 290 m.sendUpdate(proto.Update{ 291 NumFreeAddresses: numFreeAddresses, 292 }) 293 } 294 } 295 296 func (m *Manager) checkHealthStatus(cr *rpcclientpool.ClientResource) string { 297 client, err := cr.Get(nil) 298 if err != nil { 299 m.Logger.Printf("error getting health-agent client: %s", err) 300 return "bad health-agent" 301 } 302 defer client.Put() 303 var metric messages.Metric 304 err = client.Call("MetricsServer.GetMetric", "/sys/storage/health", &metric) 305 if err != nil { 306 if strings.Contains(err.Error(), trimsg.ErrMetricNotFound.Error()) { 307 return "" 308 } 309 m.Logger.Printf("error getting health-agent metrics: %s", err) 310 client.Close() 311 return "failed getting health metrics" 312 } 313 if healthStatus, ok := metric.Value.(string); !ok { 314 m.Logger.Println("list metric is not string") 315 return "bad health metric type" 316 } else if healthStatus == "good" { 317 return "healthy" 318 } else { 319 return healthStatus 320 } 321 }