github.com/secure-build/gitlab-runner@v12.5.0+incompatible/executors/docker/machine/provider_test.go (about)

     1  package machine
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"strings"
     7  	"sync"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/stretchr/testify/assert"
    12  
    13  	"gitlab.com/gitlab-org/gitlab-runner/common"
    14  	docker_helpers "gitlab.com/gitlab-org/gitlab-runner/helpers/docker"
    15  )
    16  
    17  var machineDefaultConfig = &common.RunnerConfig{
    18  	RunnerSettings: common.RunnerSettings{
    19  		Machine: &common.DockerMachine{
    20  			MachineName: "%s",
    21  			IdleTime:    5,
    22  		},
    23  	},
    24  }
    25  
    26  var machineCreateFail = &common.RunnerConfig{
    27  	RunnerSettings: common.RunnerSettings{
    28  		Machine: &common.DockerMachine{
    29  			MachineName: "create-fail-%s",
    30  			IdleTime:    5,
    31  		},
    32  	},
    33  }
    34  
    35  var machineProvisionFail = &common.RunnerConfig{
    36  	RunnerSettings: common.RunnerSettings{
    37  		Machine: &common.DockerMachine{
    38  			MachineName: "provision-fail-%s",
    39  			IdleTime:    5,
    40  		},
    41  	},
    42  }
    43  
    44  var machineSecondFail = &common.RunnerConfig{
    45  	RunnerSettings: common.RunnerSettings{
    46  		Machine: &common.DockerMachine{
    47  			MachineName: "second-fail-%s",
    48  			IdleTime:    5,
    49  		},
    50  	},
    51  }
    52  
    53  var machineNoConnect = &common.RunnerConfig{
    54  	RunnerSettings: common.RunnerSettings{
    55  		Machine: &common.DockerMachine{
    56  			MachineName: "no-connect-%s",
    57  			IdleTime:    5,
    58  		},
    59  	},
    60  }
    61  
    62  func createMachineConfig(idleCount int, idleTime int) *common.RunnerConfig {
    63  	return &common.RunnerConfig{
    64  		RunnerSettings: common.RunnerSettings{
    65  			Machine: &common.DockerMachine{
    66  				MachineName: "test-machine-%s",
    67  				IdleCount:   idleCount,
    68  				IdleTime:    idleTime,
    69  			},
    70  		},
    71  	}
    72  }
    73  
    74  func createMachineOffPeakIdleConfig(offPeakPeriod string) *common.RunnerConfig {
    75  	return &common.RunnerConfig{
    76  		RunnerSettings: common.RunnerSettings{
    77  			Machine: &common.DockerMachine{
    78  				MachineName:      "test-machine-%s",
    79  				IdleCount:        2,
    80  				IdleTime:         0,
    81  				OffPeakIdleCount: 0,
    82  				OffPeakIdleTime:  0,
    83  				OffPeakPeriods:   []string{offPeakPeriod},
    84  			},
    85  		},
    86  	}
    87  }
    88  
    89  type testMachine struct {
    90  	machines []string
    91  	second   bool
    92  
    93  	Created chan bool
    94  	Removed chan bool
    95  	Stopped chan bool
    96  
    97  	mutex sync.Mutex
    98  }
    99  
   100  func (m *testMachine) Create(driver, name string, opts ...string) error {
   101  	m.mutex.Lock()
   102  	defer m.mutex.Unlock()
   103  
   104  	if strings.Contains(name, "second-fail") {
   105  		if !m.second {
   106  			m.second = true
   107  			return errors.New("failed to create")
   108  		}
   109  	} else if strings.Contains(name, "create-fail") || strings.Contains(name, "provision-fail") {
   110  		return errors.New("failed to create")
   111  	}
   112  	m.machines = append(m.machines, name)
   113  	m.Created <- true
   114  
   115  	return nil
   116  }
   117  
   118  func (m *testMachine) Provision(name string) error {
   119  	m.mutex.Lock()
   120  	defer m.mutex.Unlock()
   121  
   122  	if strings.Contains(name, "provision-fail") || strings.Contains(name, "second-fail") {
   123  		return errors.New("failed to provision")
   124  	}
   125  	m.machines = append(m.machines, name)
   126  	return nil
   127  }
   128  
   129  func (m *testMachine) Stop(name string, timeout time.Duration) error {
   130  	m.Stopped <- true
   131  
   132  	return nil
   133  }
   134  
   135  func (m *testMachine) Remove(name string) error {
   136  	m.mutex.Lock()
   137  	defer m.mutex.Unlock()
   138  
   139  	if name == "remove-fail" {
   140  		return errors.New("failed to remove")
   141  	}
   142  	var machines []string
   143  	for _, machine := range m.machines {
   144  		if machine != name {
   145  			machines = append(machines, machine)
   146  		}
   147  	}
   148  	m.machines = machines
   149  	m.Removed <- true
   150  
   151  	return nil
   152  }
   153  
   154  func (m *testMachine) Exist(name string) bool {
   155  	m.mutex.Lock()
   156  	defer m.mutex.Unlock()
   157  
   158  	for _, machine := range m.machines {
   159  		if machine == name {
   160  			return true
   161  		}
   162  	}
   163  	return false
   164  }
   165  
   166  func (m *testMachine) List() (machines []string, err error) {
   167  	m.mutex.Lock()
   168  	defer m.mutex.Unlock()
   169  
   170  	return m.machines, nil
   171  }
   172  
   173  func (m *testMachine) CanConnect(name string, skipCache bool) bool {
   174  	if strings.Contains(name, "no-can-connect") {
   175  		return false
   176  	}
   177  	return true
   178  }
   179  
   180  func (m *testMachine) Credentials(name string) (dc docker_helpers.DockerCredentials, err error) {
   181  	if strings.Contains(name, "no-connect") {
   182  		err = errors.New("failed to connect")
   183  	}
   184  	return
   185  }
   186  
   187  func countIdleMachines(p *machineProvider) (count int) {
   188  	p.lock.RLock()
   189  	defer p.lock.RUnlock()
   190  
   191  	for _, details := range p.details {
   192  		if details.State == machineStateIdle {
   193  			count++
   194  		}
   195  	}
   196  	return
   197  }
   198  
   199  func assertIdleMachines(t *testing.T, p *machineProvider, expected int, msgAndArgs ...interface{}) bool {
   200  	var idle int
   201  	for i := 0; i < 10; i++ {
   202  		idle = countIdleMachines(p)
   203  
   204  		if expected == idle {
   205  			return true
   206  		}
   207  
   208  		time.Sleep(50 * time.Microsecond)
   209  	}
   210  
   211  	result := fmt.Sprintf("should have %d idle, but has %d", expected, idle)
   212  	assert.Fail(t, result, msgAndArgs...)
   213  	return false
   214  }
   215  
   216  func countTotalMachines(p *machineProvider) (count int) {
   217  	p.lock.RLock()
   218  	defer p.lock.RUnlock()
   219  
   220  	for _, details := range p.details {
   221  		if details.State != machineStateRemoving {
   222  			count++
   223  		}
   224  	}
   225  	return
   226  }
   227  
   228  func assertTotalMachines(t *testing.T, p *machineProvider, expected int, msgAndArgs ...interface{}) bool {
   229  	var total int
   230  	for i := 0; i < 10; i++ {
   231  		total = countTotalMachines(p)
   232  
   233  		if expected == total {
   234  			return true
   235  		}
   236  
   237  		time.Sleep(50 * time.Microsecond)
   238  	}
   239  
   240  	result := fmt.Sprintf("should have %d total, but has %d", expected, total)
   241  	assert.Fail(t, result, msgAndArgs...)
   242  	return false
   243  }
   244  
   245  func testMachineProvider(machine ...string) (*machineProvider, *testMachine) {
   246  	t := &testMachine{
   247  		machines: machine,
   248  		Created:  make(chan bool, 10),
   249  		Removed:  make(chan bool, 10),
   250  		Stopped:  make(chan bool, 10),
   251  	}
   252  	p := newMachineProvider("docker+machine", "docker")
   253  	p.machine = t
   254  	return p, t
   255  }
   256  
   257  func TestMachineDetails(t *testing.T) {
   258  	p, _ := testMachineProvider()
   259  	m1 := p.machineDetails("test", false)
   260  	assert.NotNil(t, m1, "returns a new machine")
   261  	assert.Equal(t, machineStateIdle, m1.State)
   262  	assert.Equal(t, 1, m1.UsedCount)
   263  
   264  	m2 := p.machineDetails("test", false)
   265  	assert.Equal(t, m1, m2, "returns the same machine")
   266  
   267  	m3 := p.machineDetails("test", true)
   268  	assert.Equal(t, machineStateAcquired, m3.State, "acquires machine")
   269  
   270  	m4 := p.machineDetails("test", true)
   271  	assert.Nil(t, m4, "fails to return re-acquired machine")
   272  
   273  	m5 := p.machineDetails("test", false)
   274  	assert.Equal(t, m1, m5, "returns acquired machine")
   275  	assert.Equal(t, machineStateAcquired, m5.State, "machine is acquired")
   276  }
   277  
   278  func TestMachineFindFree(t *testing.T) {
   279  	p, tm := testMachineProvider("no-can-connect")
   280  	d1 := p.findFreeMachine(false)
   281  	assert.Nil(t, d1, "no machines, return nil")
   282  
   283  	d2 := p.findFreeMachine(false, "machine1")
   284  	assert.NotNil(t, d2, "acquire one machine")
   285  
   286  	d3 := p.findFreeMachine(false, "machine1")
   287  	assert.Nil(t, d3, "fail to acquire that machine")
   288  
   289  	d4 := p.findFreeMachine(false, "machine1", "machine2")
   290  	assert.NotNil(t, d4, "acquire a new machine")
   291  	assert.NotEqual(t, d2, d4, "and it's a different machine")
   292  
   293  	assert.Len(t, tm.machines, 1, "has one machine")
   294  	d5 := p.findFreeMachine(false, "machine1", "no-can-connect")
   295  	assert.Nil(t, d5, "fails to acquire machine to which he can't connect")
   296  }
   297  
   298  func TestMachineCreationAndRemoval(t *testing.T) {
   299  	provisionRetryInterval = 0
   300  
   301  	p, _ := testMachineProvider()
   302  	d, errCh := p.create(machineDefaultConfig, machineStateUsed)
   303  	assert.NotNil(t, d)
   304  	assert.NoError(t, <-errCh)
   305  	assert.Equal(t, machineStateUsed, d.State)
   306  	assert.Equal(t, 0, d.UsedCount)
   307  	assert.NotNil(t, p.details[d.Name])
   308  
   309  	d2, errCh := p.create(machineProvisionFail, machineStateUsed)
   310  	assert.NotNil(t, d2)
   311  	assert.Error(t, <-errCh, "Fails, because it fails to provision machine")
   312  	assert.Equal(t, machineStateRemoving, d2.State)
   313  
   314  	d3, errCh := p.create(machineCreateFail, machineStateUsed)
   315  	assert.NotNil(t, d3)
   316  	assert.NoError(t, <-errCh)
   317  	assert.Equal(t, machineStateUsed, d3.State)
   318  
   319  	err := p.remove(d.Name)
   320  	assert.NoError(t, err)
   321  	assert.Equal(t, machineStateRemoving, d.State)
   322  }
   323  
   324  func TestMachineUse(t *testing.T) {
   325  	provisionRetryInterval = 0
   326  
   327  	p, _ := testMachineProvider("machine1")
   328  
   329  	d1, err := p.useMachine(machineDefaultConfig)
   330  	assert.NotNil(t, d1)
   331  	assert.NoError(t, err)
   332  	assert.Equal(t, machineStateAcquired, d1.State)
   333  	assert.Equal(t, "machine1", d1.Name, "finds a free machine1")
   334  
   335  	d2, err := p.useMachine(machineDefaultConfig)
   336  	assert.NotNil(t, d2)
   337  	assert.NoError(t, err)
   338  	assert.Equal(t, machineStateAcquired, d2.State)
   339  	assert.NotEqual(t, "machine1", d2.Name, "creates a new machine")
   340  
   341  	_, err = p.useMachine(machineProvisionFail)
   342  	assert.Error(t, err, "fails to create a new machine")
   343  }
   344  
   345  func TestMachineTestRetry(t *testing.T) {
   346  	provisionRetryInterval = 0
   347  
   348  	p, _ := testMachineProvider()
   349  	_, err := p.useMachine(machineSecondFail)
   350  	assert.Error(t, err, "fails to create a new machine")
   351  
   352  	p, _ = testMachineProvider()
   353  	d1, err := p.retryUseMachine(machineSecondFail)
   354  	assert.NoError(t, err, "after replying the same test scenario and using retry it succeeds")
   355  	assert.Equal(t, machineStateAcquired, d1.State)
   356  }
   357  
   358  func TestMachineAcquireAndRelease(t *testing.T) {
   359  	p, _ := testMachineProvider("test-machine")
   360  
   361  	d1, err := p.Acquire(machineDefaultConfig)
   362  	assert.NoError(t, err)
   363  	assert.NotNil(t, d1, "acquires machine")
   364  
   365  	d2, _ := p.Acquire(machineDefaultConfig)
   366  	assert.Nil(t, d2, "fails to acquire a machine")
   367  
   368  	p.Release(machineDefaultConfig, d1)
   369  
   370  	d3, err := p.Acquire(machineDefaultConfig)
   371  	assert.NoError(t, err)
   372  	assert.Equal(t, d1, d3, "acquires released machine")
   373  }
   374  
   375  func TestMachineOnDemandMode(t *testing.T) {
   376  	p, _ := testMachineProvider()
   377  
   378  	config := createMachineConfig(0, 1)
   379  	_, err := p.Acquire(config)
   380  	assert.NoError(t, err)
   381  }
   382  
   383  func TestMachinePreCreateMode(t *testing.T) {
   384  	p, m := testMachineProvider()
   385  
   386  	config := createMachineConfig(1, 5)
   387  	d, err := p.Acquire(config)
   388  	assert.Error(t, err, "it should fail with message that currently there's no free machines")
   389  	assert.Nil(t, d)
   390  
   391  	<-m.Created
   392  	assertIdleMachines(t, p, 1, "it should contain exactly one machine")
   393  
   394  	d, err = p.Acquire(config)
   395  	assert.NoError(t, err, "it should be ready to process builds")
   396  	assertIdleMachines(t, p, 0, "it should acquire the free node")
   397  	p.Release(config, d)
   398  	assertIdleMachines(t, p, 1, "after releasing it should have one free node")
   399  
   400  	config = createMachineConfig(2, 5)
   401  	d, err = p.Acquire(config)
   402  	assert.NoError(t, err)
   403  	p.Release(config, d)
   404  
   405  	<-m.Created
   406  	assertIdleMachines(t, p, 2, "it should start creating a second machine")
   407  
   408  	config = createMachineConfig(1, 0)
   409  	config.Limit = 1
   410  	d, err = p.Acquire(config)
   411  	assert.NoError(t, err)
   412  	p.Release(config, d)
   413  
   414  	<-m.Stopped
   415  	<-m.Removed
   416  	assertIdleMachines(t, p, 1, "it should downscale to single machine")
   417  
   418  	d, err = p.Acquire(config)
   419  	assert.NoError(t, err, "we should acquire single machine")
   420  
   421  	_, err = p.Acquire(config)
   422  	assert.Error(t, err, "it should fail with message that currently there's no free machines")
   423  	p.Release(config, d)
   424  	assertIdleMachines(t, p, 1, "it should leave one idle")
   425  }
   426  
   427  func TestMachineLimitMax(t *testing.T) {
   428  	p, _ := testMachineProvider()
   429  
   430  	config := createMachineConfig(10, 5)
   431  	config.Limit = 5
   432  
   433  	d, err := p.Acquire(config)
   434  	assert.Error(t, err, "it should fail with message that currently there's no free machines")
   435  	assert.Nil(t, d)
   436  	assertIdleMachines(t, p, 5, "it should contain exactly a maximum of 5 nodes")
   437  
   438  	config.Limit = 8
   439  	d, err = p.Acquire(config)
   440  	assert.NoError(t, err)
   441  	p.Release(config, d)
   442  	assertIdleMachines(t, p, 8, "it should upscale to 8 nodes")
   443  
   444  	config.Limit = 2
   445  	d, err = p.Acquire(config)
   446  	assert.NoError(t, err)
   447  	p.Release(config, d)
   448  	assertIdleMachines(t, p, 2, "it should downscale to 2 nodes")
   449  }
   450  
   451  func TestMachineMaxBuildsForExistingMachines(t *testing.T) {
   452  	provisionRetryInterval = 0
   453  
   454  	p, _ := testMachineProvider("remove-fail")
   455  	config := createMachineConfig(1, 5)
   456  	config.Machine.MaxBuilds = 1
   457  	d, err := p.Acquire(config)
   458  	assert.Error(t, err)
   459  	assert.Nil(t, d)
   460  }
   461  
   462  func TestMachineMaxBuilds(t *testing.T) {
   463  	config := createMachineConfig(1, 5)
   464  	p, _ := testMachineProvider(newMachineName(config))
   465  	config.Machine.MaxBuilds = 2 // by default we set it to 1
   466  	d, err := p.Acquire(config)
   467  	assert.NoError(t, err)
   468  	assert.NotNil(t, d)
   469  
   470  	_, nd, err := p.Use(config, d)
   471  	assert.NoError(t, err)
   472  	assert.Nil(t, nd, "we passed the data, we should not get the data now")
   473  
   474  	p.Release(config, d)
   475  
   476  	dd := d.(*machineDetails)
   477  	assert.Equal(t, machineStateRemoving, dd.State, "the machine should be removed due to too many builds")
   478  	assert.Equal(t, "Too many builds", dd.Reason, "the machine should be removed due to too many builds")
   479  }
   480  
   481  func TestMachineIdleLimits(t *testing.T) {
   482  	p, _ := testMachineProvider()
   483  
   484  	config := createMachineConfig(2, 1)
   485  	d, errCh := p.create(config, machineStateIdle)
   486  	assert.NoError(t, <-errCh, "machine creation should not fail")
   487  
   488  	d2, err := p.Acquire(config)
   489  	p.Release(config, d2)
   490  	assert.NoError(t, err)
   491  	assert.Equal(t, machineStateIdle, d.State, "machine should not be removed, because is still in idle time")
   492  
   493  	config = createMachineConfig(2, 0)
   494  	d3, err := p.Acquire(config)
   495  	p.Release(config, d3)
   496  	assert.NoError(t, err)
   497  	assert.Equal(t, machineStateIdle, d.State, "machine should not be removed, because no more than two idle")
   498  
   499  	config = createMachineConfig(0, 0)
   500  	d4, err := p.Acquire(config)
   501  	p.Release(config, d4)
   502  	assert.NoError(t, err)
   503  	assert.Equal(t, machineStateRemoving, d.State, "machine should not be removed, because no more than two idle")
   504  	assert.Equal(t, "too many idle machines", d.Reason)
   505  }
   506  
   507  func TestMachineOffPeakIdleLimits(t *testing.T) {
   508  	daysOfWeek := map[time.Weekday]string{
   509  		time.Monday:    "mon",
   510  		time.Tuesday:   "tue",
   511  		time.Wednesday: "wed",
   512  		time.Thursday:  "thu",
   513  		time.Friday:    "fri",
   514  		time.Saturday:  "sat",
   515  		time.Sunday:    "sun",
   516  	}
   517  	now := time.Now()
   518  	offPeakEnabledPeriod := fmt.Sprintf("* * * * * %s *", daysOfWeek[now.Weekday()])
   519  	offPeakDisabledPeriod := fmt.Sprintf("* * * * * %s *", daysOfWeek[now.Add(time.Hour*48).Weekday()])
   520  
   521  	p, _ := testMachineProvider()
   522  
   523  	config := createMachineOffPeakIdleConfig(offPeakDisabledPeriod)
   524  	d, errCh := p.create(config, machineStateIdle)
   525  	assert.NoError(t, <-errCh, "machine creation should not fail")
   526  
   527  	d2, err := p.Acquire(config)
   528  	assert.NoError(t, err)
   529  	p.Release(config, d2)
   530  	assert.Equal(t, machineStateIdle, d.State, "machine should not be removed, because not in OffPeak time mode")
   531  
   532  	config = createMachineOffPeakIdleConfig(offPeakEnabledPeriod)
   533  	d3, err := p.Acquire(config)
   534  	p.Release(config, d3)
   535  	assert.NoError(t, err)
   536  	assert.Equal(t, machineStateRemoving, d.State, "machine should be removed, because in OffPeak time mode")
   537  }
   538  
   539  func TestMachineUseOnDemand(t *testing.T) {
   540  	provisionRetryInterval = 0
   541  
   542  	p, _ := testMachineProvider()
   543  
   544  	_, nd, err := p.Use(machineDefaultConfig, nil)
   545  	assert.NoError(t, err, "it create a new machine")
   546  	assert.NotNil(t, nd)
   547  	assertTotalMachines(t, p, 1, "it creates one machine")
   548  
   549  	_, nd2, err := p.Use(machineDefaultConfig, nil)
   550  	assert.NoError(t, err, "it create a new machine")
   551  	assert.NotNil(t, nd2)
   552  	assertTotalMachines(t, p, 2, "it creates two machines")
   553  
   554  	_, _, err = p.Use(machineProvisionFail, nil)
   555  	assert.Error(t, err, "fail to create a new machine")
   556  	assertTotalMachines(t, p, 2, "it fails to create a third machine")
   557  
   558  	_, _, err = p.Use(machineNoConnect, nil)
   559  	assert.Error(t, err, "fail to create a new machine on connect")
   560  	assertTotalMachines(t, p, 3, "it fails on no-connect, but we leave the machine created")
   561  }
   562  
   563  func TestMachineReleaseIfInvalidDataArePassed(t *testing.T) {
   564  	p, _ := testMachineProvider()
   565  
   566  	_, nd, err := p.Use(machineDefaultConfig, nil)
   567  	assert.NoError(t, err, "it create a new machine")
   568  	assert.NotNil(t, nd)
   569  	assertTotalMachines(t, p, 1, "it creates one machine")
   570  
   571  	p.Release(nil, nd)
   572  }
   573  
   574  func TestMachineCreationIfFailedToConnect(t *testing.T) {
   575  	p, _ := testMachineProvider()
   576  
   577  	_, nd, err := p.Use(machineNoConnect, nil)
   578  	assert.Error(t, err, "it create a new machine")
   579  	assert.Nil(t, nd)
   580  }
   581  
   582  func TestIntermediateMachineList(t *testing.T) {
   583  	p, _ := testMachineProvider()
   584  	p.details = machinesDetails{
   585  		"machine1": &machineDetails{
   586  			Name:  "machine1",
   587  			State: machineStateIdle,
   588  		},
   589  		"machine2": &machineDetails{
   590  			Name:  "machine2",
   591  			State: machineStateCreating,
   592  		},
   593  		"machine3": &machineDetails{
   594  			Name:  "machine3",
   595  			State: machineStateCreating,
   596  		},
   597  	}
   598  
   599  	expectedIntermediateMachines := []string{"machine3"}
   600  
   601  	intermediateMachine := p.intermediateMachineList([]string{"machine1", "machine2"})
   602  	assert.Equal(t, expectedIntermediateMachines, intermediateMachine)
   603  }