gitlab.com/jfprevost/gitlab-runner-notlscheck@v11.11.4+incompatible/executors/docker/machine/provider_test.go (about)

     1  package machine
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"strings"
     7  	"sync"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/stretchr/testify/assert"
    12  	"gitlab.com/gitlab-org/gitlab-runner/common"
    13  	"gitlab.com/gitlab-org/gitlab-runner/helpers/docker"
    14  )
    15  
    16  var machineDefaultConfig = &common.RunnerConfig{
    17  	RunnerSettings: common.RunnerSettings{
    18  		Machine: &common.DockerMachine{
    19  			MachineName: "%s",
    20  			IdleTime:    5,
    21  		},
    22  	},
    23  }
    24  
    25  var machineCreateFail = &common.RunnerConfig{
    26  	RunnerSettings: common.RunnerSettings{
    27  		Machine: &common.DockerMachine{
    28  			MachineName: "create-fail-%s",
    29  			IdleTime:    5,
    30  		},
    31  	},
    32  }
    33  
    34  var machineProvisionFail = &common.RunnerConfig{
    35  	RunnerSettings: common.RunnerSettings{
    36  		Machine: &common.DockerMachine{
    37  			MachineName: "provision-fail-%s",
    38  			IdleTime:    5,
    39  		},
    40  	},
    41  }
    42  
    43  var machineSecondFail = &common.RunnerConfig{
    44  	RunnerSettings: common.RunnerSettings{
    45  		Machine: &common.DockerMachine{
    46  			MachineName: "second-fail-%s",
    47  			IdleTime:    5,
    48  		},
    49  	},
    50  }
    51  
    52  var machineNoConnect = &common.RunnerConfig{
    53  	RunnerSettings: common.RunnerSettings{
    54  		Machine: &common.DockerMachine{
    55  			MachineName: "no-connect-%s",
    56  			IdleTime:    5,
    57  		},
    58  	},
    59  }
    60  
    61  func createMachineConfig(idleCount int, idleTime int) *common.RunnerConfig {
    62  	return &common.RunnerConfig{
    63  		RunnerSettings: common.RunnerSettings{
    64  			Machine: &common.DockerMachine{
    65  				MachineName: "test-machine-%s",
    66  				IdleCount:   idleCount,
    67  				IdleTime:    idleTime,
    68  			},
    69  		},
    70  	}
    71  }
    72  
    73  func createMachineOffPeakIdleConfig(offPeakPeriod string) *common.RunnerConfig {
    74  	return &common.RunnerConfig{
    75  		RunnerSettings: common.RunnerSettings{
    76  			Machine: &common.DockerMachine{
    77  				MachineName:      "test-machine-%s",
    78  				IdleCount:        2,
    79  				IdleTime:         0,
    80  				OffPeakIdleCount: 0,
    81  				OffPeakIdleTime:  0,
    82  				OffPeakPeriods:   []string{offPeakPeriod},
    83  			},
    84  		},
    85  	}
    86  }
    87  
    88  type testMachine struct {
    89  	machines []string
    90  	second   bool
    91  
    92  	Created chan bool
    93  	Removed chan bool
    94  	Stopped chan bool
    95  
    96  	mutex sync.Mutex
    97  }
    98  
    99  func (m *testMachine) Create(driver, name string, opts ...string) error {
   100  	m.mutex.Lock()
   101  	defer m.mutex.Unlock()
   102  
   103  	if strings.Contains(name, "second-fail") {
   104  		if !m.second {
   105  			m.second = true
   106  			return errors.New("Failed to create")
   107  		}
   108  	} else if strings.Contains(name, "create-fail") || strings.Contains(name, "provision-fail") {
   109  		return errors.New("Failed to create")
   110  	}
   111  	m.machines = append(m.machines, name)
   112  	m.Created <- true
   113  
   114  	return nil
   115  }
   116  
   117  func (m *testMachine) Provision(name string) error {
   118  	m.mutex.Lock()
   119  	defer m.mutex.Unlock()
   120  
   121  	if strings.Contains(name, "provision-fail") || strings.Contains(name, "second-fail") {
   122  		return errors.New("Failed to provision")
   123  	}
   124  	m.machines = append(m.machines, name)
   125  	return nil
   126  }
   127  
   128  func (m *testMachine) Stop(name string, timeout time.Duration) error {
   129  	m.Stopped <- true
   130  
   131  	return nil
   132  }
   133  
   134  func (m *testMachine) Remove(name string) error {
   135  	m.mutex.Lock()
   136  	defer m.mutex.Unlock()
   137  
   138  	if name == "remove-fail" {
   139  		return errors.New("failed to remove")
   140  	}
   141  	var machines []string
   142  	for _, machine := range m.machines {
   143  		if machine != name {
   144  			machines = append(machines, machine)
   145  		}
   146  	}
   147  	m.machines = machines
   148  	m.Removed <- true
   149  
   150  	return nil
   151  }
   152  
   153  func (m *testMachine) Exist(name string) bool {
   154  	m.mutex.Lock()
   155  	defer m.mutex.Unlock()
   156  
   157  	for _, machine := range m.machines {
   158  		if machine == name {
   159  			return true
   160  		}
   161  	}
   162  	return false
   163  }
   164  
   165  func (m *testMachine) List() (machines []string, err error) {
   166  	m.mutex.Lock()
   167  	defer m.mutex.Unlock()
   168  
   169  	return m.machines, nil
   170  }
   171  
   172  func (m *testMachine) CanConnect(name string, skipCache bool) bool {
   173  	if strings.Contains(name, "no-can-connect") {
   174  		return false
   175  	}
   176  	return true
   177  }
   178  
   179  func (m *testMachine) Credentials(name string) (dc docker_helpers.DockerCredentials, err error) {
   180  	if strings.Contains(name, "no-connect") {
   181  		err = errors.New("Failed to connect")
   182  	}
   183  	return
   184  }
   185  
   186  func countIdleMachines(p *machineProvider) (count int) {
   187  	p.lock.RLock()
   188  	defer p.lock.RUnlock()
   189  
   190  	for _, details := range p.details {
   191  		if details.State == machineStateIdle {
   192  			count++
   193  		}
   194  	}
   195  	return
   196  }
   197  
   198  func assertIdleMachines(t *testing.T, p *machineProvider, expected int, msgAndArgs ...interface{}) bool {
   199  	var idle int
   200  	for i := 0; i < 10; i++ {
   201  		idle = countIdleMachines(p)
   202  
   203  		if expected == idle {
   204  			return true
   205  		}
   206  
   207  		time.Sleep(50 * time.Microsecond)
   208  	}
   209  
   210  	result := fmt.Sprintf("should have %d idle, but has %d", expected, idle)
   211  	assert.Fail(t, result, msgAndArgs...)
   212  	return false
   213  }
   214  
   215  func countTotalMachines(p *machineProvider) (count int) {
   216  	p.lock.RLock()
   217  	defer p.lock.RUnlock()
   218  
   219  	for _, details := range p.details {
   220  		if details.State != machineStateRemoving {
   221  			count++
   222  		}
   223  	}
   224  	return
   225  }
   226  
   227  func assertTotalMachines(t *testing.T, p *machineProvider, expected int, msgAndArgs ...interface{}) bool {
   228  	var total int
   229  	for i := 0; i < 10; i++ {
   230  		total = countTotalMachines(p)
   231  
   232  		if expected == total {
   233  			return true
   234  		}
   235  
   236  		time.Sleep(50 * time.Microsecond)
   237  	}
   238  
   239  	result := fmt.Sprintf("should have %d total, but has %d", expected, total)
   240  	assert.Fail(t, result, msgAndArgs...)
   241  	return false
   242  }
   243  
   244  func testMachineProvider(machine ...string) (*machineProvider, *testMachine) {
   245  	t := &testMachine{
   246  		machines: machine,
   247  		Created:  make(chan bool, 10),
   248  		Removed:  make(chan bool, 10),
   249  		Stopped:  make(chan bool, 10),
   250  	}
   251  	p := newMachineProvider("docker+machine", "docker")
   252  	p.machine = t
   253  	return p, t
   254  }
   255  
   256  func TestMachineDetails(t *testing.T) {
   257  	p, _ := testMachineProvider()
   258  	m1 := p.machineDetails("test", false)
   259  	assert.NotNil(t, m1, "returns a new machine")
   260  	assert.Equal(t, machineStateIdle, m1.State)
   261  	assert.Equal(t, 1, m1.UsedCount)
   262  
   263  	m2 := p.machineDetails("test", false)
   264  	assert.Equal(t, m1, m2, "returns the same machine")
   265  
   266  	m3 := p.machineDetails("test", true)
   267  	assert.Equal(t, machineStateAcquired, m3.State, "acquires machine")
   268  
   269  	m4 := p.machineDetails("test", true)
   270  	assert.Nil(t, m4, "fails to return re-acquired machine")
   271  
   272  	m5 := p.machineDetails("test", false)
   273  	assert.Equal(t, m1, m5, "returns acquired machine")
   274  	assert.Equal(t, machineStateAcquired, m5.State, "machine is acquired")
   275  }
   276  
   277  func TestMachineFindFree(t *testing.T) {
   278  	p, tm := testMachineProvider("no-can-connect")
   279  	d1 := p.findFreeMachine(false)
   280  	assert.Nil(t, d1, "no machines, return nil")
   281  
   282  	d2 := p.findFreeMachine(false, "machine1")
   283  	assert.NotNil(t, d2, "acquire one machine")
   284  
   285  	d3 := p.findFreeMachine(false, "machine1")
   286  	assert.Nil(t, d3, "fail to acquire that machine")
   287  
   288  	d4 := p.findFreeMachine(false, "machine1", "machine2")
   289  	assert.NotNil(t, d4, "acquire a new machine")
   290  	assert.NotEqual(t, d2, d4, "and it's a different machine")
   291  
   292  	assert.Len(t, tm.machines, 1, "has one machine")
   293  	d5 := p.findFreeMachine(false, "machine1", "no-can-connect")
   294  	assert.Nil(t, d5, "fails to acquire machine to which he can't connect")
   295  }
   296  
   297  func TestMachineCreationAndRemoval(t *testing.T) {
   298  	provisionRetryInterval = 0
   299  
   300  	p, _ := testMachineProvider()
   301  	d, errCh := p.create(machineDefaultConfig, machineStateUsed)
   302  	assert.NotNil(t, d)
   303  	assert.NoError(t, <-errCh)
   304  	assert.Equal(t, machineStateUsed, d.State)
   305  	assert.Equal(t, 0, d.UsedCount)
   306  	assert.NotNil(t, p.details[d.Name])
   307  
   308  	d2, errCh := p.create(machineProvisionFail, machineStateUsed)
   309  	assert.NotNil(t, d2)
   310  	assert.Error(t, <-errCh, "Fails, because it fails to provision machine")
   311  	assert.Equal(t, machineStateRemoving, d2.State)
   312  
   313  	d3, errCh := p.create(machineCreateFail, machineStateUsed)
   314  	assert.NotNil(t, d3)
   315  	assert.NoError(t, <-errCh)
   316  	assert.Equal(t, machineStateUsed, d3.State)
   317  
   318  	p.remove(d.Name)
   319  	assert.Equal(t, machineStateRemoving, d.State)
   320  }
   321  
   322  func TestMachineUse(t *testing.T) {
   323  	provisionRetryInterval = 0
   324  
   325  	p, _ := testMachineProvider("machine1")
   326  
   327  	d1, err := p.useMachine(machineDefaultConfig)
   328  	assert.NotNil(t, d1)
   329  	assert.NoError(t, err)
   330  	assert.Equal(t, machineStateAcquired, d1.State)
   331  	assert.Equal(t, "machine1", d1.Name, "finds a free machine1")
   332  
   333  	d2, err := p.useMachine(machineDefaultConfig)
   334  	assert.NotNil(t, d2)
   335  	assert.NoError(t, err)
   336  	assert.Equal(t, machineStateAcquired, d2.State)
   337  	assert.NotEqual(t, "machine1", d2.Name, "creates a new machine")
   338  
   339  	_, err = p.useMachine(machineProvisionFail)
   340  	assert.Error(t, err, "fails to create a new machine")
   341  }
   342  
   343  func TestMachineTestRetry(t *testing.T) {
   344  	provisionRetryInterval = 0
   345  
   346  	p, _ := testMachineProvider()
   347  	_, err := p.useMachine(machineSecondFail)
   348  	assert.Error(t, err, "fails to create a new machine")
   349  
   350  	p, _ = testMachineProvider()
   351  	d1, err := p.retryUseMachine(machineSecondFail)
   352  	assert.NoError(t, err, "after replying the same test scenario and using retry it succeeds")
   353  	assert.Equal(t, machineStateAcquired, d1.State)
   354  }
   355  
   356  func TestMachineAcquireAndRelease(t *testing.T) {
   357  	p, _ := testMachineProvider("test-machine")
   358  
   359  	d1, err := p.Acquire(machineDefaultConfig)
   360  	assert.NoError(t, err)
   361  	assert.NotNil(t, d1, "acquires machine")
   362  
   363  	d2, _ := p.Acquire(machineDefaultConfig)
   364  	assert.Nil(t, d2, "fails to acquire a machine")
   365  
   366  	p.Release(machineDefaultConfig, d1)
   367  
   368  	d3, err := p.Acquire(machineDefaultConfig)
   369  	assert.NoError(t, err)
   370  	assert.Equal(t, d1, d3, "acquires released machine")
   371  }
   372  
   373  func TestMachineOnDemandMode(t *testing.T) {
   374  	p, _ := testMachineProvider()
   375  
   376  	config := createMachineConfig(0, 1)
   377  	_, err := p.Acquire(config)
   378  	assert.NoError(t, err)
   379  }
   380  
   381  func TestMachinePreCreateMode(t *testing.T) {
   382  	p, m := testMachineProvider()
   383  
   384  	config := createMachineConfig(1, 5)
   385  	d, err := p.Acquire(config)
   386  	assert.Error(t, err, "it should fail with message that currently there's no free machines")
   387  	assert.Nil(t, d)
   388  
   389  	<-m.Created
   390  	assertIdleMachines(t, p, 1, "it should contain exactly one machine")
   391  
   392  	d, err = p.Acquire(config)
   393  	assert.NoError(t, err, "it should be ready to process builds")
   394  	assertIdleMachines(t, p, 0, "it should acquire the free node")
   395  	p.Release(config, d)
   396  	assertIdleMachines(t, p, 1, "after releasing it should have one free node")
   397  
   398  	config = createMachineConfig(2, 5)
   399  	d, err = p.Acquire(config)
   400  	assert.NoError(t, err)
   401  	p.Release(config, d)
   402  
   403  	<-m.Created
   404  	assertIdleMachines(t, p, 2, "it should start creating a second machine")
   405  
   406  	config = createMachineConfig(1, 0)
   407  	config.Limit = 1
   408  	d, err = p.Acquire(config)
   409  	assert.NoError(t, err)
   410  	p.Release(config, d)
   411  
   412  	<-m.Stopped
   413  	<-m.Removed
   414  	assertIdleMachines(t, p, 1, "it should downscale to single machine")
   415  
   416  	d, err = p.Acquire(config)
   417  	assert.NoError(t, err, "we should acquire single machine")
   418  
   419  	_, err = p.Acquire(config)
   420  	assert.Error(t, err, "it should fail with message that currently there's no free machines")
   421  	p.Release(config, d)
   422  	assertIdleMachines(t, p, 1, "it should leave one idle")
   423  }
   424  
   425  func TestMachineLimitMax(t *testing.T) {
   426  	p, _ := testMachineProvider()
   427  
   428  	config := createMachineConfig(10, 5)
   429  	config.Limit = 5
   430  
   431  	d, err := p.Acquire(config)
   432  	assert.Error(t, err, "it should fail with message that currently there's no free machines")
   433  	assert.Nil(t, d)
   434  	assertIdleMachines(t, p, 5, "it should contain exactly a maximum of 5 nodes")
   435  
   436  	config.Limit = 8
   437  	d, err = p.Acquire(config)
   438  	p.Release(config, d)
   439  	assertIdleMachines(t, p, 8, "it should upscale to 8 nodes")
   440  
   441  	config.Limit = 2
   442  	d, err = p.Acquire(config)
   443  	p.Release(config, d)
   444  	assertIdleMachines(t, p, 2, "it should downscale to 2 nodes")
   445  }
   446  
   447  func TestMachineMaxBuildsForExistingMachines(t *testing.T) {
   448  	provisionRetryInterval = 0
   449  
   450  	p, _ := testMachineProvider("remove-fail")
   451  	config := createMachineConfig(1, 5)
   452  	config.Machine.MaxBuilds = 1
   453  	d, err := p.Acquire(config)
   454  	assert.Error(t, err)
   455  	assert.Nil(t, d)
   456  }
   457  
   458  func TestMachineMaxBuilds(t *testing.T) {
   459  	config := createMachineConfig(1, 5)
   460  	p, _ := testMachineProvider(newMachineName(config))
   461  	config.Machine.MaxBuilds = 2 // by default we set it to 1
   462  	d, err := p.Acquire(config)
   463  	assert.NoError(t, err)
   464  	assert.NotNil(t, d)
   465  
   466  	_, nd, err := p.Use(config, d)
   467  	assert.NoError(t, err)
   468  	assert.Nil(t, nd, "we passed the data, we should not get the data now")
   469  
   470  	p.Release(config, d)
   471  
   472  	dd := d.(*machineDetails)
   473  	assert.Equal(t, machineStateRemoving, dd.State, "the machine should be removed due to too many builds")
   474  	assert.Equal(t, "Too many builds", dd.Reason, "the machine should be removed due to too many builds")
   475  }
   476  
   477  func TestMachineIdleLimits(t *testing.T) {
   478  	p, _ := testMachineProvider()
   479  
   480  	config := createMachineConfig(2, 1)
   481  	d, errCh := p.create(config, machineStateIdle)
   482  	assert.NoError(t, <-errCh, "machine creation should not fail")
   483  
   484  	d2, err := p.Acquire(config)
   485  	p.Release(config, d2)
   486  	assert.NoError(t, err)
   487  	assert.Equal(t, machineStateIdle, d.State, "machine should not be removed, because is still in idle time")
   488  
   489  	config = createMachineConfig(2, 0)
   490  	d3, err := p.Acquire(config)
   491  	p.Release(config, d3)
   492  	assert.NoError(t, err)
   493  	assert.Equal(t, machineStateIdle, d.State, "machine should not be removed, because no more than two idle")
   494  
   495  	config = createMachineConfig(0, 0)
   496  	d4, err := p.Acquire(config)
   497  	p.Release(config, d4)
   498  	assert.NoError(t, err)
   499  	assert.Equal(t, machineStateRemoving, d.State, "machine should not be removed, because no more than two idle")
   500  	assert.Equal(t, "Too many idle machines", d.Reason)
   501  }
   502  
   503  func TestMachineOffPeakIdleLimits(t *testing.T) {
   504  	daysOfWeek := map[time.Weekday]string{
   505  		time.Monday:    "mon",
   506  		time.Tuesday:   "tue",
   507  		time.Wednesday: "wed",
   508  		time.Thursday:  "thu",
   509  		time.Friday:    "fri",
   510  		time.Saturday:  "sat",
   511  		time.Sunday:    "sun",
   512  	}
   513  	now := time.Now()
   514  	offPeakEnabledPeriod := fmt.Sprintf("* * * * * %s *", daysOfWeek[now.Weekday()])
   515  	offPeakDisabledPeriod := fmt.Sprintf("* * * * * %s *", daysOfWeek[now.Add(time.Hour*48).Weekday()])
   516  
   517  	p, _ := testMachineProvider()
   518  
   519  	config := createMachineOffPeakIdleConfig(offPeakDisabledPeriod)
   520  	d, errCh := p.create(config, machineStateIdle)
   521  	assert.NoError(t, <-errCh, "machine creation should not fail")
   522  
   523  	d2, err := p.Acquire(config)
   524  	assert.NoError(t, err)
   525  	p.Release(config, d2)
   526  	assert.Equal(t, machineStateIdle, d.State, "machine should not be removed, because not in OffPeak time mode")
   527  
   528  	config = createMachineOffPeakIdleConfig(offPeakEnabledPeriod)
   529  	d3, err := p.Acquire(config)
   530  	p.Release(config, d3)
   531  	assert.NoError(t, err)
   532  	assert.Equal(t, machineStateRemoving, d.State, "machine should be removed, because in OffPeak time mode")
   533  }
   534  
   535  func TestMachineUseOnDemand(t *testing.T) {
   536  	provisionRetryInterval = 0
   537  
   538  	p, _ := testMachineProvider()
   539  
   540  	_, nd, err := p.Use(machineDefaultConfig, nil)
   541  	assert.NoError(t, err, "it create a new machine")
   542  	assert.NotNil(t, nd)
   543  	assertTotalMachines(t, p, 1, "it creates one machine")
   544  
   545  	_, nd2, err := p.Use(machineDefaultConfig, nil)
   546  	assert.NoError(t, err, "it create a new machine")
   547  	assert.NotNil(t, nd2)
   548  	assertTotalMachines(t, p, 2, "it creates two machines")
   549  
   550  	_, _, err = p.Use(machineProvisionFail, nil)
   551  	assert.Error(t, err, "fail to create a new machine")
   552  	assertTotalMachines(t, p, 2, "it fails to create a third machine")
   553  
   554  	_, _, err = p.Use(machineNoConnect, nil)
   555  	assert.Error(t, err, "fail to create a new machine on connect")
   556  	assertTotalMachines(t, p, 3, "it fails on no-connect, but we leave the machine created")
   557  }
   558  
   559  func TestMachineReleaseIfInvalidDataArePassed(t *testing.T) {
   560  	p, _ := testMachineProvider()
   561  
   562  	_, nd, err := p.Use(machineDefaultConfig, nil)
   563  	assert.NoError(t, err, "it create a new machine")
   564  	assert.NotNil(t, nd)
   565  	assertTotalMachines(t, p, 1, "it creates one machine")
   566  
   567  	p.Release(nil, nd)
   568  }
   569  
   570  func TestMachineCreationIfFailedToConnect(t *testing.T) {
   571  	p, _ := testMachineProvider()
   572  
   573  	_, nd, err := p.Use(machineNoConnect, nil)
   574  	assert.Error(t, err, "it create a new machine")
   575  	assert.Nil(t, nd)
   576  }