github.com/Cloud-Foundations/Dominator@v0.3.4/lib/slavedriver/impl.go (about)

     1  package slavedriver
     2  
     3  import (
     4  	"container/list"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"time"
     9  
    10  	"github.com/Cloud-Foundations/Dominator/lib/backoffdelay"
    11  	"github.com/Cloud-Foundations/Dominator/lib/format"
    12  	"github.com/Cloud-Foundations/Dominator/lib/fsutil"
    13  	"github.com/Cloud-Foundations/Dominator/lib/json"
    14  	"github.com/Cloud-Foundations/Dominator/lib/log"
    15  	"github.com/Cloud-Foundations/Dominator/lib/srpc"
    16  )
    17  
    18  type jsonDatabase struct {
    19  	filename string
    20  }
    21  
    22  func dialWithRetry(network, address string,
    23  	timeout time.Duration) (*srpc.Client, error) {
    24  	stopTime := time.Now().Add(timeout)
    25  	sleeper := backoffdelay.NewExponential(100*time.Millisecond, time.Second, 1)
    26  	for ; time.Until(stopTime) >= 0; sleeper.Sleep() {
    27  		client, err := srpc.DialHTTP(network, address, time.Second)
    28  		if err != nil {
    29  			continue
    30  		}
    31  		if err := client.SetKeepAlivePeriod(time.Second * 30); err != nil {
    32  			client.Close()
    33  			return nil, err
    34  		}
    35  		return client, nil
    36  
    37  	}
    38  	return nil, fmt.Errorf("timed out connecting to: %s", address)
    39  }
    40  
    41  func listSlaves(slaves map[*Slave]struct{}) []SlaveInfo {
    42  	list := make([]SlaveInfo, 0, len(slaves))
    43  	for slave := range slaves {
    44  		list = append(list, slave.info)
    45  	}
    46  	return list
    47  }
    48  
    49  func newSlaveDriver(options SlaveDriverOptions, slaveTrader SlaveTrader,
    50  	clientDialer clientDialerFunc, databaseDriver databaseLoadSaver,
    51  	logger log.DebugLogger) (*SlaveDriver, error) {
    52  	if options.MinimumIdleSlaves < 1 {
    53  		options.MinimumIdleSlaves = 1
    54  	}
    55  	if options.MaximumIdleSlaves < 1 {
    56  		options.MaximumIdleSlaves = 1
    57  	}
    58  	if options.MaximumIdleSlaves < options.MinimumIdleSlaves {
    59  		options.MaximumIdleSlaves = options.MinimumIdleSlaves
    60  	}
    61  	destroySlaveChannel := make(chan *Slave, 1)
    62  	getSlaveChannel := make(chan requestSlaveMessage)
    63  	getSlavesChannel := make(chan chan<- slaveRoll)
    64  	releaseSlaveChannel := make(chan *Slave, 1)
    65  	replaceIdleChannel := make(chan bool)
    66  	publicDriver := &SlaveDriver{
    67  		options:             options,
    68  		destroySlaveChannel: destroySlaveChannel,
    69  		getSlaveChannel:     getSlaveChannel,
    70  		getSlavesChannel:    getSlavesChannel,
    71  		logger:              logger,
    72  		releaseSlaveChannel: releaseSlaveChannel,
    73  		replaceIdleChannel:  replaceIdleChannel,
    74  	}
    75  	driver := &slaveDriver{
    76  		options:             options,
    77  		busySlaves:          make(map[*Slave]struct{}),
    78  		clientDialer:        clientDialer,
    79  		destroySlaveChannel: destroySlaveChannel,
    80  		databaseDriver:      databaseDriver,
    81  		getSlaveChannel:     getSlaveChannel,
    82  		getSlavesChannel:    getSlavesChannel,
    83  		getterList:          list.New(),
    84  		logger:              logger,
    85  		pingResponseChannel: make(chan pingResponseMessage, 1),
    86  		publicDriver:        publicDriver,
    87  		slaveTrader:         slaveTrader,
    88  		releaseSlaveChannel: releaseSlaveChannel,
    89  		replaceIdleChannel:  replaceIdleChannel,
    90  	}
    91  	if err := driver.loadSlaves(); err != nil {
    92  		driver.slaveTrader.Close()
    93  		return nil, err
    94  	}
    95  	go driver.watchRoll()
    96  	return publicDriver, nil
    97  }
    98  
    99  func (db *jsonDatabase) load() (*slaveRoll, error) {
   100  	var slaves slaveRoll
   101  	err := json.ReadFromFile(db.filename, &slaves)
   102  	if err != nil {
   103  		if os.IsNotExist(err) {
   104  			return nil, nil
   105  		}
   106  		return nil, err
   107  	}
   108  	return &slaves, nil
   109  }
   110  
   111  func (db *jsonDatabase) save(slaves slaveRoll) error {
   112  	return json.WriteToFile(db.filename, fsutil.PublicFilePerms, "    ", slaves)
   113  }
   114  
   115  func (slave *Slave) acknowledge(logger log.DebugLogger) error {
   116  	if slave.acknowledgeChannel == nil {
   117  		return nil
   118  	}
   119  	errorChannel := make(chan error, 1)
   120  	slave.acknowledgeChannel <- errorChannel
   121  	slave.acknowledgeChannel = nil
   122  	timer := time.NewTimer(15 * time.Second)
   123  	select {
   124  	case err := <-errorChannel:
   125  		if err != nil {
   126  			return err
   127  		} else {
   128  			logger.Debugf(0, "acknowledged slave: %s\n", slave)
   129  			return nil
   130  		}
   131  	case <-timer.C:
   132  		return fmt.Errorf("timed out")
   133  	}
   134  }
   135  
   136  func (slave *Slave) getClient() *srpc.Client {
   137  	return slave.client
   138  }
   139  
   140  func (slave *Slave) ping(pingResponseChannel chan<- pingResponseMessage) {
   141  	errorChannel := make(chan error, 1)
   142  	timer := time.NewTimer(5 * time.Second)
   143  	go func() {
   144  		errorChannel <- slave.client.Ping()
   145  		slave.driver.logger.Debugf(1, "ping(%s) goroutine returning\n", slave)
   146  	}()
   147  	select {
   148  	case err := <-errorChannel:
   149  		pingResponseChannel <- pingResponseMessage{
   150  			error: err,
   151  			slave: slave,
   152  		}
   153  	case <-timer.C:
   154  		pingResponseChannel <- pingResponseMessage{
   155  			error: fmt.Errorf("timed out"),
   156  			slave: slave,
   157  		}
   158  	}
   159  }
   160  
   161  func (driver *SlaveDriver) getSlave(timeout time.Duration) (*Slave, error) {
   162  	driver.logger.Debugln(0, "getSlave() starting")
   163  	if timeout < 0 {
   164  		timeout = time.Hour
   165  	}
   166  	slaveChannel := make(chan *Slave)
   167  	driver.getSlaveChannel <- requestSlaveMessage{
   168  		slaveChannel: slaveChannel,
   169  		timeout:      time.Now().Add(timeout),
   170  	}
   171  	if slave := <-slaveChannel; slave == nil {
   172  		return nil, fmt.Errorf("timed out getting slave")
   173  	} else {
   174  		return slave, nil
   175  	}
   176  }
   177  
   178  func (driver *slaveDriver) createSlave(responseChannel chan<- *Slave) {
   179  	driver.logger.Debugln(0, "creating slave")
   180  	sleeper := backoffdelay.NewExponential(time.Second, time.Minute, 1)
   181  	for ; ; sleeper.Sleep() {
   182  		slaveInfo, acknowledgeChannel, err := driver.createSlaveMachine()
   183  		if err != nil {
   184  			driver.logger.Println(err)
   185  			continue
   186  		}
   187  		slave := &Slave{
   188  			acknowledgeChannel: acknowledgeChannel,
   189  			clientAddress: fmt.Sprintf("%s:%d", slaveInfo.IpAddress,
   190  				driver.options.PortNumber),
   191  			info:       slaveInfo,
   192  			driver:     driver.publicDriver,
   193  			timeToPing: time.Now().Add(time.Minute),
   194  		}
   195  		slave.client, err = driver.clientDialer("tcp", slave.clientAddress,
   196  			time.Minute)
   197  		if err != nil {
   198  			e := driver.slaveTrader.DestroySlave(slaveInfo.Identifier)
   199  			if e != nil {
   200  				driver.logger.Printf("error destroying: %s: %s\n",
   201  					slaveInfo.Identifier, e)
   202  			}
   203  			driver.logger.Printf("error dialing: %s: %s\n",
   204  				slave.clientAddress, err)
   205  			continue
   206  		}
   207  		driver.logger.Printf("created slave: %s\n", slaveInfo.Identifier)
   208  		responseChannel <- slave
   209  		return
   210  	}
   211  }
   212  
   213  func (driver *slaveDriver) createSlaveMachine() (SlaveInfo, chan<- chan<- error,
   214  	error) {
   215  	if creator, ok := driver.slaveTrader.(SlaveTraderAcknowledger); ok {
   216  		acknowledgeChannel := make(chan chan<- error, 1)
   217  		slaveInfo, err := creator.CreateSlaveWithAcknowledger(
   218  			acknowledgeChannel)
   219  		if err != nil {
   220  			close(acknowledgeChannel)
   221  			return SlaveInfo{}, nil, err
   222  		}
   223  		return slaveInfo, acknowledgeChannel, err
   224  	}
   225  	slaveInfo, err := driver.slaveTrader.CreateSlave()
   226  	return slaveInfo, nil, err
   227  }
   228  
   229  func (driver *slaveDriver) destroySlave(slave *Slave,
   230  	responseChannel chan<- *Slave) {
   231  	driver.logger.Printf("destroying slave: %s\n", slave.info.Identifier)
   232  	startTime := time.Now()
   233  	err := driver.slaveTrader.DestroySlave(slave.info.Identifier)
   234  	if err != nil {
   235  		driver.logger.Printf("error destroying: %s: %s\n",
   236  			slave.info.Identifier, err)
   237  		responseChannel <- nil
   238  		return
   239  	}
   240  	if duration := time.Since(startTime); duration > 5*time.Second {
   241  		driver.logger.Printf("destroyed slave: %s in %s\n",
   242  			slave.info.Identifier, format.Duration(duration))
   243  	}
   244  	responseChannel <- slave
   245  }
   246  
   247  func (driver *slaveDriver) getSlaves() slaveRoll {
   248  	return slaveRoll{
   249  		BusySlaves: listSlaves(driver.busySlaves),
   250  		IdleSlaves: listSlaves(driver.idleSlaves),
   251  		Zombies:    listSlaves(driver.zombies),
   252  	}
   253  }
   254  
   255  func (driver *slaveDriver) loadSlaves() error {
   256  	slavesFromDB, err := driver.databaseDriver.load()
   257  	if err != nil {
   258  		return err
   259  	}
   260  	if slavesFromDB == nil {
   261  		driver.idleSlaves = make(map[*Slave]struct{})
   262  		driver.zombies = make(map[*Slave]struct{})
   263  		return nil
   264  	}
   265  	slavesFromDB.BusySlaves = append(slavesFromDB.BusySlaves,
   266  		slavesFromDB.Zombies...)
   267  	driver.idleSlaves = make(map[*Slave]struct{}, len(slavesFromDB.IdleSlaves))
   268  	driver.zombies = make(map[*Slave]struct{}, len(slavesFromDB.BusySlaves))
   269  	for _, slaveInfo := range slavesFromDB.BusySlaves {
   270  		driver.zombies[&Slave{
   271  			driver: driver.publicDriver,
   272  			info:   slaveInfo,
   273  		}] = struct{}{}
   274  	}
   275  	for _, slaveInfo := range slavesFromDB.IdleSlaves {
   276  		slave := &Slave{
   277  			clientAddress: fmt.Sprintf("%s:%d", slaveInfo.IpAddress,
   278  				driver.options.PortNumber),
   279  			info:   slaveInfo,
   280  			driver: driver.publicDriver,
   281  		}
   282  		slave.client, err = driver.clientDialer("tcp", slave.clientAddress,
   283  			time.Minute)
   284  		if err != nil {
   285  			driver.logger.Printf("error dialing: %s: %s\n", slave.clientAddress,
   286  				err)
   287  			driver.zombies[slave] = struct{}{}
   288  		} else {
   289  			slave.timeToPing = time.Now().Add(time.Minute)
   290  			driver.idleSlaves[slave] = struct{}{}
   291  		}
   292  	}
   293  	return nil
   294  }
   295  
   296  // rollCall manages all the internal state. It should be called from a forever
   297  // goroutine.
   298  func (driver *slaveDriver) rollCall() {
   299  	driver.logger.Debugf(0, "rollCall(): %d idle, %d getters\n",
   300  		len(driver.idleSlaves), driver.getterList.Len())
   301  	// First: if there is an idle slave, dispatch to a getter.
   302  	if len(driver.idleSlaves) > 0 && driver.getterList.Len() > 0 {
   303  		entry := driver.getterList.Front()
   304  		request := entry.Value.(requestSlaveMessage)
   305  		driver.getterList.Remove(entry)
   306  		if time.Since(request.timeout) > 0 {
   307  			request.slaveChannel <- nil // Getter wanted to give up by now.
   308  			close(request.slaveChannel)
   309  			return
   310  		}
   311  		for slave := range driver.idleSlaves {
   312  			if time.Since(slave.timeToPing) >= 0 || slave.pinging {
   313  				continue
   314  			}
   315  			request.slaveChannel <- slave // Consumed by getter.
   316  			close(request.slaveChannel)
   317  			delete(driver.idleSlaves, slave)
   318  			driver.busySlaves[slave] = struct{}{}
   319  			driver.writeState = true
   320  			driver.logger.Debugf(0, "sent slave: %s to getter\n", slave)
   321  			return
   322  		}
   323  	}
   324  	// Clean up expired getters and set timeout on when to next check.
   325  	wakeTimeout := time.Hour
   326  	var nextEntry *list.Element
   327  	for entry := driver.getterList.Front(); entry != nil; entry = nextEntry {
   328  		nextEntry = entry.Next()
   329  		request := entry.Value.(requestSlaveMessage)
   330  		if timeout := time.Until(request.timeout); timeout <= 0 {
   331  			request.slaveChannel <- nil // Getter wanted to give up by now.
   332  			close(request.slaveChannel)
   333  			driver.getterList.Remove(entry)
   334  		} else if timeout < wakeTimeout {
   335  			wakeTimeout = timeout
   336  		}
   337  	}
   338  	if driver.getterList.Len() > 0 ||
   339  		uint(len(driver.idleSlaves)) < driver.options.MinimumIdleSlaves {
   340  		if driver.createdSlaveChannel == nil {
   341  			ch := make(chan *Slave, 1)
   342  			driver.createdSlaveChannel = ch
   343  			go driver.createSlave(ch)
   344  		}
   345  	}
   346  	if uint(len(driver.idleSlaves)) > driver.options.MaximumIdleSlaves &&
   347  		driver.getterList.Len() < 1 {
   348  		for slave := range driver.idleSlaves {
   349  			if uint(len(driver.idleSlaves)) <=
   350  				driver.options.MaximumIdleSlaves {
   351  				break
   352  			}
   353  			delete(driver.idleSlaves, slave)
   354  			driver.zombies[slave] = struct{}{}
   355  			driver.writeState = true
   356  		}
   357  	}
   358  	for slave := range driver.zombies { // Close any connections.
   359  		if slave.client != nil {
   360  			if err := slave.client.Close(); err != nil {
   361  				driver.logger.Printf("error closing Client for slave: %s: %s\n",
   362  					slave, err)
   363  			}
   364  			slave.client = nil
   365  		}
   366  	}
   367  	for slave := range driver.zombies { // Destroy one zombie at a time.
   368  		if driver.destroyedSlaveChannel == nil {
   369  			ch := make(chan *Slave, 1)
   370  			driver.destroyedSlaveChannel = ch
   371  			go driver.destroySlave(slave, ch)
   372  		}
   373  		break
   374  	}
   375  	if driver.writeState {
   376  		if err := driver.databaseDriver.save(driver.getSlaves()); err != nil {
   377  			driver.logger.Println(err)
   378  		} else {
   379  			driver.writeState = false
   380  		}
   381  	}
   382  	for slave := range driver.idleSlaves {
   383  		if slave.pinging {
   384  			continue
   385  		}
   386  		if timeToPing := slave.timeToPing; time.Since(timeToPing) >= 0 {
   387  			slave.pinging = true
   388  			go slave.ping(driver.pingResponseChannel)
   389  		} else if timeout := time.Until(timeToPing); timeout < wakeTimeout {
   390  			wakeTimeout = timeout
   391  		}
   392  	}
   393  	if wakeTimeout < 0 {
   394  		wakeTimeout = 0
   395  	}
   396  	wakeTimer := time.NewTimer(wakeTimeout)
   397  	select {
   398  	case slave := <-driver.createdSlaveChannel:
   399  		driver.createdSlaveChannel = nil
   400  		if err := slave.acknowledge(driver.logger); err != nil {
   401  			driver.logger.Printf("error acknowledging slave: %s: %s\n",
   402  				slave, err)
   403  			break
   404  		}
   405  		driver.idleSlaves[slave] = struct{}{}
   406  		// Write state now to reduce chance of forgetting about this slave.
   407  		if err := driver.databaseDriver.save(driver.getSlaves()); err != nil {
   408  			driver.logger.Println(err)
   409  			driver.writeState = true
   410  		} else {
   411  			driver.writeState = false
   412  		}
   413  		return // Return now so that new slave can be sent to a getter quickly.
   414  	case slave := <-driver.destroySlaveChannel:
   415  		if _, ok := driver.idleSlaves[slave]; ok {
   416  			panic("destroying idle slave")
   417  		}
   418  		if _, ok := driver.zombies[slave]; ok {
   419  			panic("destroying zombie")
   420  		}
   421  		if _, ok := driver.busySlaves[slave]; !ok {
   422  			panic("destroying unknown slave")
   423  		}
   424  		delete(driver.busySlaves, slave)
   425  		driver.zombies[slave] = struct{}{}
   426  		driver.writeState = true
   427  	case slave := <-driver.destroyedSlaveChannel:
   428  		driver.destroyedSlaveChannel = nil
   429  		if slave != nil {
   430  			delete(driver.zombies, slave)
   431  			driver.writeState = true
   432  		}
   433  	case slaveChannel := <-driver.getSlaveChannel:
   434  		driver.getterList.PushBack(slaveChannel)
   435  	case slavesChannel := <-driver.getSlavesChannel:
   436  		slavesChannel <- driver.getSlaves()
   437  	case pingResponse := <-driver.pingResponseChannel:
   438  		slave := pingResponse.slave
   439  		slave.pinging = false
   440  		if err := pingResponse.error; err == nil {
   441  			slave.timeToPing = time.Now().Add(time.Minute)
   442  			driver.logger.Debugf(0, "ping: %s succeeded\n", slave)
   443  		} else {
   444  			driver.logger.Printf("error pinging: %s: %s\n", slave, err)
   445  			delete(driver.idleSlaves, slave)
   446  			driver.zombies[slave] = struct{}{}
   447  			driver.writeState = true
   448  		}
   449  	case slave := <-driver.releaseSlaveChannel:
   450  		if _, ok := driver.idleSlaves[slave]; ok {
   451  			panic("releasing idle slave")
   452  		}
   453  		if _, ok := driver.zombies[slave]; ok {
   454  			panic("releasing zombie")
   455  		}
   456  		if _, ok := driver.busySlaves[slave]; !ok {
   457  			panic("releasing unknown slave")
   458  		}
   459  		delete(driver.busySlaves, slave)
   460  		driver.idleSlaves[slave] = struct{}{}
   461  		driver.writeState = true
   462  		slave.timeToPing = time.Now().Add(100 * time.Millisecond)
   463  	case createIfNeeded := <-driver.replaceIdleChannel:
   464  		for slave := range driver.idleSlaves {
   465  			delete(driver.idleSlaves, slave)
   466  			driver.zombies[slave] = struct{}{}
   467  			driver.writeState = true
   468  		}
   469  		if createIfNeeded && driver.createdSlaveChannel == nil {
   470  			ch := make(chan *Slave, 1)
   471  			driver.createdSlaveChannel = ch
   472  			go driver.createSlave(ch)
   473  		}
   474  	case <-wakeTimer.C:
   475  	}
   476  	wakeTimer.Stop()
   477  	select {
   478  	case <-wakeTimer.C:
   479  	default:
   480  	}
   481  }
   482  
   483  func (driver *slaveDriver) watchRoll() {
   484  	for {
   485  		driver.rollCall()
   486  	}
   487  }
   488  
   489  func (driver *SlaveDriver) writeHtml(writer io.Writer) {
   490  	slavesChannel := make(chan slaveRoll)
   491  	driver.getSlavesChannel <- slavesChannel
   492  	slaves := <-slavesChannel
   493  	if len(slaves.BusySlaves) < 1 && len(slaves.IdleSlaves) < 1 &&
   494  		len(slaves.Zombies) < 1 {
   495  		fmt.Fprintf(writer, "No slaves for %s<br>\n", driver.options.Purpose)
   496  		return
   497  	}
   498  	fmt.Fprintf(writer, "Slaves for %s:<br>\n", driver.options.Purpose)
   499  	for _, slave := range slaves.BusySlaves {
   500  		fmt.Fprintf(writer,
   501  			"&nbsp;&nbsp;<a href=\"http://%s:%d/\">%s</a> (busy)<br>\n",
   502  			slave.IpAddress, driver.options.PortNumber, slave)
   503  	}
   504  	for _, slave := range slaves.IdleSlaves {
   505  		fmt.Fprintf(writer,
   506  			"&nbsp;&nbsp;<a href=\"http://%s:%d/\">%s</a> (idle)<br>\n",
   507  			slave.IpAddress, driver.options.PortNumber, slave)
   508  	}
   509  	for _, slave := range slaves.Zombies {
   510  		fmt.Fprintf(writer,
   511  			"&nbsp;&nbsp;<a href=\"http://%s:%d/\">%s</a> (zombie)<br>\n",
   512  			slave.IpAddress, driver.options.PortNumber, slave)
   513  	}
   514  }