github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/allocator/network.go (about)

     1  package allocator
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/docker/go-events"
     9  	"github.com/docker/swarmkit/api"
    10  	"github.com/docker/swarmkit/log"
    11  	"github.com/docker/swarmkit/manager/allocator/cnmallocator"
    12  	"github.com/docker/swarmkit/manager/allocator/networkallocator"
    13  	"github.com/docker/swarmkit/manager/state"
    14  	"github.com/docker/swarmkit/manager/state/store"
    15  	"github.com/docker/swarmkit/protobuf/ptypes"
    16  	"github.com/pkg/errors"
    17  )
    18  
    19  const (
    20  	// Network allocator Voter ID for task allocation vote.
    21  	networkVoter           = "network"
    22  	allocatedStatusMessage = "pending task scheduling"
    23  )
    24  
    25  var (
    26  	// ErrNoIngress is returned when no ingress network is found in store
    27  	ErrNoIngress = errors.New("no ingress network found")
    28  	errNoChanges = errors.New("task unchanged")
    29  
    30  	retryInterval = 5 * time.Minute
    31  )
    32  
    33  // Network context information which is used throughout the network allocation code.
    34  type networkContext struct {
    35  	ingressNetwork *api.Network
    36  	// Instance of the low-level network allocator which performs
    37  	// the actual network allocation.
    38  	nwkAllocator networkallocator.NetworkAllocator
    39  
    40  	// A set of tasks which are ready to be allocated as a batch. This is
    41  	// distinct from "unallocatedTasks" which are tasks that failed to
    42  	// allocate on the first try, being held for a future retry.
    43  	pendingTasks map[string]*api.Task
    44  
    45  	// A set of unallocated tasks which will be revisited if any thing
    46  	// changes in system state that might help task allocation.
    47  	unallocatedTasks map[string]*api.Task
    48  
    49  	// A set of unallocated services which will be revisited if
    50  	// any thing changes in system state that might help service
    51  	// allocation.
    52  	unallocatedServices map[string]*api.Service
    53  
    54  	// A set of unallocated networks which will be revisited if
    55  	// any thing changes in system state that might help network
    56  	// allocation.
    57  	unallocatedNetworks map[string]*api.Network
    58  
    59  	// lastRetry is the last timestamp when unallocated
    60  	// tasks/services/networks were retried.
    61  	lastRetry time.Time
    62  
    63  	// somethingWasDeallocated indicates that we just deallocated at
    64  	// least one service/task/network, so we should retry failed
    65  	// allocations (in we are experiencing IP exhaustion and an IP was
    66  	// released).
    67  	somethingWasDeallocated bool
    68  }
    69  
    70  func (a *Allocator) doNetworkInit(ctx context.Context) (err error) {
    71  	var netConfig *cnmallocator.NetworkConfig
    72  	// There are two ways user can invoke swarm init
    73  	// with default address pool & vxlan port  or with only vxlan port
    74  	// hence we need two different way to construct netconfig
    75  	if a.networkConfig != nil {
    76  		if a.networkConfig.DefaultAddrPool != nil {
    77  			netConfig = &cnmallocator.NetworkConfig{
    78  				DefaultAddrPool: a.networkConfig.DefaultAddrPool,
    79  				SubnetSize:      a.networkConfig.SubnetSize,
    80  				VXLANUDPPort:    a.networkConfig.VXLANUDPPort,
    81  			}
    82  		} else if a.networkConfig.VXLANUDPPort != 0 {
    83  			netConfig = &cnmallocator.NetworkConfig{
    84  				DefaultAddrPool: nil,
    85  				SubnetSize:      0,
    86  				VXLANUDPPort:    a.networkConfig.VXLANUDPPort,
    87  			}
    88  		}
    89  	}
    90  
    91  	na, err := cnmallocator.New(a.pluginGetter, netConfig)
    92  	if err != nil {
    93  		return err
    94  	}
    95  
    96  	nc := &networkContext{
    97  		nwkAllocator:        na,
    98  		pendingTasks:        make(map[string]*api.Task),
    99  		unallocatedTasks:    make(map[string]*api.Task),
   100  		unallocatedServices: make(map[string]*api.Service),
   101  		unallocatedNetworks: make(map[string]*api.Network),
   102  		lastRetry:           time.Now(),
   103  	}
   104  	a.netCtx = nc
   105  	defer func() {
   106  		// Clear a.netCtx if initialization was unsuccessful.
   107  		if err != nil {
   108  			a.netCtx = nil
   109  		}
   110  	}()
   111  
   112  	// Ingress network is now created at cluster's first time creation.
   113  	// Check if we have the ingress network. If found, make sure it is
   114  	// allocated, before reading all network objects for allocation.
   115  	// If not found, it means it was removed by user, nothing to do here.
   116  	ingressNetwork, err := GetIngressNetwork(a.store)
   117  	switch err {
   118  	case nil:
   119  		// Try to complete ingress network allocation before anything else so
   120  		// that the we can get the preferred subnet for ingress network.
   121  		nc.ingressNetwork = ingressNetwork
   122  		if !na.IsAllocated(nc.ingressNetwork) {
   123  			if err := a.allocateNetwork(ctx, nc.ingressNetwork); err != nil {
   124  				log.G(ctx).WithError(err).Error("failed allocating ingress network during init")
   125  			} else if err := a.store.Batch(func(batch *store.Batch) error {
   126  				if err := a.commitAllocatedNetwork(ctx, batch, nc.ingressNetwork); err != nil {
   127  					log.G(ctx).WithError(err).Error("failed committing allocation of ingress network during init")
   128  				}
   129  				return nil
   130  			}); err != nil {
   131  				log.G(ctx).WithError(err).Error("failed committing allocation of ingress network during init")
   132  			}
   133  		}
   134  	case ErrNoIngress:
   135  		// Ingress network is not present in store, It means user removed it
   136  		// and did not create a new one.
   137  	default:
   138  		return errors.Wrap(err, "failure while looking for ingress network during init")
   139  	}
   140  
   141  	// First, allocate (read it as restore) objects likes network,nodes,serives
   142  	// and tasks that were already allocated. Then go on the allocate objects
   143  	// that are in raft and were previously not allocated. The reason being, during
   144  	// restore, we  make sure that we populate the allocated states of
   145  	// the objects in the raft onto our in memory state.
   146  	if err := a.allocateNetworks(ctx, true); err != nil {
   147  		return err
   148  	}
   149  
   150  	if err := a.allocateNodes(ctx, true); err != nil {
   151  		return err
   152  	}
   153  
   154  	if err := a.allocateServices(ctx, true); err != nil {
   155  		return err
   156  	}
   157  	if err := a.allocateTasks(ctx, true); err != nil {
   158  		return err
   159  	}
   160  	// Now allocate objects that were not previously allocated
   161  	// but were present in the raft.
   162  	if err := a.allocateNetworks(ctx, false); err != nil {
   163  		return err
   164  	}
   165  
   166  	if err := a.allocateNodes(ctx, false); err != nil {
   167  		return err
   168  	}
   169  
   170  	if err := a.allocateServices(ctx, false); err != nil {
   171  		return err
   172  	}
   173  	return a.allocateTasks(ctx, false)
   174  }
   175  
   176  func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) {
   177  	nc := a.netCtx
   178  
   179  	switch v := ev.(type) {
   180  	case api.EventCreateNetwork:
   181  		n := v.Network.Copy()
   182  		if nc.nwkAllocator.IsAllocated(n) {
   183  			break
   184  		}
   185  		if IsIngressNetwork(n) && nc.ingressNetwork != nil {
   186  			log.G(ctx).Errorf("Cannot allocate ingress network %s (%s) because another ingress network is already present: %s (%s)",
   187  				n.ID, n.Spec.Annotations.Name, nc.ingressNetwork.ID, nc.ingressNetwork.Spec.Annotations.Name)
   188  			break
   189  		}
   190  
   191  		if err := a.allocateNetwork(ctx, n); err != nil {
   192  			log.G(ctx).WithError(err).Errorf("Failed allocation for network %s", n.ID)
   193  			break
   194  		}
   195  
   196  		if err := a.store.Batch(func(batch *store.Batch) error {
   197  			return a.commitAllocatedNetwork(ctx, batch, n)
   198  		}); err != nil {
   199  			log.G(ctx).WithError(err).Errorf("Failed to commit allocation for network %s", n.ID)
   200  		}
   201  		if IsIngressNetwork(n) {
   202  			nc.ingressNetwork = n
   203  		}
   204  	case api.EventDeleteNetwork:
   205  		n := v.Network.Copy()
   206  
   207  		if IsIngressNetwork(n) && nc.ingressNetwork != nil && nc.ingressNetwork.ID == n.ID {
   208  			nc.ingressNetwork = nil
   209  		}
   210  
   211  		if err := a.deallocateNodeAttachments(ctx, n.ID); err != nil {
   212  			log.G(ctx).WithError(err).Error(err)
   213  		}
   214  
   215  		// The assumption here is that all dependent objects
   216  		// have been cleaned up when we are here so the only
   217  		// thing that needs to happen is free the network
   218  		// resources.
   219  		if err := nc.nwkAllocator.Deallocate(n); err != nil {
   220  			log.G(ctx).WithError(err).Errorf("Failed during network free for network %s", n.ID)
   221  		} else {
   222  			nc.somethingWasDeallocated = true
   223  		}
   224  
   225  		delete(nc.unallocatedNetworks, n.ID)
   226  	case api.EventCreateService:
   227  		var s *api.Service
   228  		a.store.View(func(tx store.ReadTx) {
   229  			s = store.GetService(tx, v.Service.ID)
   230  		})
   231  
   232  		if s == nil {
   233  			break
   234  		}
   235  
   236  		if nc.nwkAllocator.IsServiceAllocated(s) {
   237  			break
   238  		}
   239  
   240  		if err := a.allocateService(ctx, s, false); err != nil {
   241  			log.G(ctx).WithError(err).Errorf("Failed allocation for service %s", s.ID)
   242  			break
   243  		}
   244  
   245  		if err := a.store.Batch(func(batch *store.Batch) error {
   246  			return a.commitAllocatedService(ctx, batch, s)
   247  		}); err != nil {
   248  			log.G(ctx).WithError(err).Errorf("Failed to commit allocation for service %s", s.ID)
   249  		}
   250  	case api.EventUpdateService:
   251  		// We may have already allocated this service. If a create or
   252  		// update event is older than the current version in the store,
   253  		// we run the risk of allocating the service a second time.
   254  		// Only operate on the latest version of the service.
   255  		var s *api.Service
   256  		a.store.View(func(tx store.ReadTx) {
   257  			s = store.GetService(tx, v.Service.ID)
   258  		})
   259  
   260  		if s == nil {
   261  			break
   262  		}
   263  
   264  		if nc.nwkAllocator.IsServiceAllocated(s) {
   265  			if !nc.nwkAllocator.HostPublishPortsNeedUpdate(s) {
   266  				break
   267  			}
   268  			updatePortsInHostPublishMode(s)
   269  		} else {
   270  			if err := a.allocateService(ctx, s, false); err != nil {
   271  				log.G(ctx).WithError(err).Errorf("Failed allocation during update of service %s", s.ID)
   272  				break
   273  			}
   274  		}
   275  
   276  		if err := a.store.Batch(func(batch *store.Batch) error {
   277  			return a.commitAllocatedService(ctx, batch, s)
   278  		}); err != nil {
   279  			log.G(ctx).WithError(err).Errorf("Failed to commit allocation during update for service %s", s.ID)
   280  			nc.unallocatedServices[s.ID] = s
   281  		} else {
   282  			delete(nc.unallocatedServices, s.ID)
   283  		}
   284  	case api.EventDeleteService:
   285  		s := v.Service.Copy()
   286  
   287  		if err := nc.nwkAllocator.DeallocateService(s); err != nil {
   288  			log.G(ctx).WithError(err).Errorf("Failed deallocation during delete of service %s", s.ID)
   289  		} else {
   290  			nc.somethingWasDeallocated = true
   291  		}
   292  
   293  		// Remove it from unallocatedServices just in case
   294  		// it's still there.
   295  		delete(nc.unallocatedServices, s.ID)
   296  	case api.EventCreateNode, api.EventUpdateNode, api.EventDeleteNode:
   297  		a.doNodeAlloc(ctx, ev)
   298  	case api.EventCreateTask, api.EventUpdateTask, api.EventDeleteTask:
   299  		a.doTaskAlloc(ctx, ev)
   300  	case state.EventCommit:
   301  		a.procTasksNetwork(ctx, false)
   302  
   303  		if time.Since(nc.lastRetry) > retryInterval || nc.somethingWasDeallocated {
   304  			a.procUnallocatedNetworks(ctx)
   305  			a.procUnallocatedServices(ctx)
   306  			a.procTasksNetwork(ctx, true)
   307  			nc.lastRetry = time.Now()
   308  			nc.somethingWasDeallocated = false
   309  		}
   310  
   311  		// Any left over tasks are moved to the unallocated set
   312  		for _, t := range nc.pendingTasks {
   313  			nc.unallocatedTasks[t.ID] = t
   314  		}
   315  		nc.pendingTasks = make(map[string]*api.Task)
   316  	}
   317  }
   318  
   319  func (a *Allocator) doNodeAlloc(ctx context.Context, ev events.Event) {
   320  	var (
   321  		isDelete bool
   322  		node     *api.Node
   323  	)
   324  
   325  	// We may have already allocated this node. If a create or update
   326  	// event is older than the current version in the store, we run the
   327  	// risk of allocating the node a second time. Only operate on the
   328  	// latest version of the node.
   329  	switch v := ev.(type) {
   330  	case api.EventCreateNode:
   331  		a.store.View(func(tx store.ReadTx) {
   332  			node = store.GetNode(tx, v.Node.ID)
   333  		})
   334  	case api.EventUpdateNode:
   335  		a.store.View(func(tx store.ReadTx) {
   336  			node = store.GetNode(tx, v.Node.ID)
   337  		})
   338  	case api.EventDeleteNode:
   339  		isDelete = true
   340  		node = v.Node.Copy()
   341  	}
   342  
   343  	if node == nil {
   344  		return
   345  	}
   346  
   347  	nc := a.netCtx
   348  
   349  	if isDelete {
   350  		if err := a.deallocateNode(node); err != nil {
   351  			log.G(ctx).WithError(err).Errorf("Failed freeing network resources for node %s", node.ID)
   352  		} else {
   353  			nc.somethingWasDeallocated = true
   354  		}
   355  	} else {
   356  		// if this isn't a delete, we should try reallocating the node. if this
   357  		// is a creation, then the node will be allocated only for ingress.
   358  		if err := a.reallocateNode(ctx, node.ID); err != nil {
   359  			log.G(ctx).WithError(err).Errorf(
   360  				"error reallocating network resources for node %v", node.ID,
   361  			)
   362  		}
   363  	}
   364  }
   365  
   366  func isOverlayNetwork(n *api.Network) bool {
   367  	if n.DriverState != nil && n.DriverState.Name == "overlay" {
   368  		return true
   369  	}
   370  
   371  	if n.Spec.DriverConfig != nil && n.Spec.DriverConfig.Name == "overlay" {
   372  		return true
   373  	}
   374  
   375  	return false
   376  }
   377  
   378  func (a *Allocator) getAllocatedNetworks() ([]*api.Network, error) {
   379  	var (
   380  		err               error
   381  		nc                = a.netCtx
   382  		na                = nc.nwkAllocator
   383  		allocatedNetworks []*api.Network
   384  	)
   385  
   386  	// Find allocated networks
   387  	var networks []*api.Network
   388  	a.store.View(func(tx store.ReadTx) {
   389  		networks, err = store.FindNetworks(tx, store.All)
   390  	})
   391  
   392  	if err != nil {
   393  		return nil, errors.Wrap(err, "error listing all networks in store while trying to allocate during init")
   394  	}
   395  
   396  	for _, n := range networks {
   397  
   398  		if isOverlayNetwork(n) && na.IsAllocated(n) {
   399  			allocatedNetworks = append(allocatedNetworks, n)
   400  		}
   401  	}
   402  
   403  	return allocatedNetworks, nil
   404  }
   405  
   406  // getNodeNetworks returns all networks that should be allocated for a node
   407  func (a *Allocator) getNodeNetworks(nodeID string) ([]*api.Network, error) {
   408  	var (
   409  		// no need to initialize networks. we only append to it, and appending
   410  		// to a nil slice is valid. this has the added bonus of making this nil
   411  		// if we return an error
   412  		networks []*api.Network
   413  		err      error
   414  	)
   415  	a.store.View(func(tx store.ReadTx) {
   416  		// get all tasks currently assigned to this node. it's no big deal if
   417  		// the tasks change in the meantime, there's no race to clean up
   418  		// unneeded network attachments on a node.
   419  		var tasks []*api.Task
   420  		tasks, err = store.FindTasks(tx, store.ByNodeID(nodeID))
   421  		if err != nil {
   422  			return
   423  		}
   424  		// we need to keep track of network IDs that we've already added to the
   425  		// list of networks we're going to return. we could do
   426  		// map[string]*api.Network and then convert to []*api.Network and
   427  		// return that, but it seems cleaner to have a separate set and list.
   428  		networkIDs := map[string]struct{}{}
   429  		for _, task := range tasks {
   430  			// we don't need to check if a task is before the Assigned state.
   431  			// the only way we have a task with a NodeID that isn't yet in
   432  			// Assigned is if it's a global service task. this check is not
   433  			// necessary:
   434  			// if task.Status.State < api.TaskStateAssigned {
   435  			//     continue
   436  			// }
   437  			if task.Status.State > api.TaskStateRunning {
   438  				// we don't need to have network attachments for a task that's
   439  				// already in a terminal state
   440  				continue
   441  			}
   442  
   443  			// now go through the task's network attachments and find all of
   444  			// the networks
   445  			for _, attachment := range task.Networks {
   446  				// if the network is an overlay network, and the network ID is
   447  				// not yet in the set of network IDs, then add it to the set
   448  				// and add the network to the list of networks we'll be
   449  				// returning
   450  				if _, ok := networkIDs[attachment.Network.ID]; isOverlayNetwork(attachment.Network) && !ok {
   451  					networkIDs[attachment.Network.ID] = struct{}{}
   452  					// we don't need to worry about retrieving the network from
   453  					// the store, because the network in the attachment is an
   454  					// identical copy of the network in the store.
   455  					networks = append(networks, attachment.Network)
   456  				}
   457  			}
   458  		}
   459  	})
   460  
   461  	// finally, we need the ingress network if one exists.
   462  	if a.netCtx != nil && a.netCtx.ingressNetwork != nil {
   463  		networks = append(networks, a.netCtx.ingressNetwork)
   464  	}
   465  
   466  	return networks, err
   467  }
   468  
   469  func (a *Allocator) allocateNodes(ctx context.Context, existingAddressesOnly bool) error {
   470  	// Allocate nodes in the store so far before we process watched events.
   471  	var (
   472  		allocatedNodes []*api.Node
   473  		nodes          []*api.Node
   474  		err            error
   475  	)
   476  
   477  	a.store.View(func(tx store.ReadTx) {
   478  		nodes, err = store.FindNodes(tx, store.All)
   479  	})
   480  	if err != nil {
   481  		return errors.Wrap(err, "error listing all nodes in store while trying to allocate network resources")
   482  	}
   483  
   484  	for _, node := range nodes {
   485  		networks, err := a.getNodeNetworks(node.ID)
   486  		if err != nil {
   487  			return errors.Wrap(err, "error getting all networks needed by node")
   488  		}
   489  		isAllocated := a.allocateNode(ctx, node, existingAddressesOnly, networks)
   490  		if isAllocated {
   491  			allocatedNodes = append(allocatedNodes, node)
   492  		}
   493  	}
   494  
   495  	if err := a.store.Batch(func(batch *store.Batch) error {
   496  		for _, node := range allocatedNodes {
   497  			if err := a.commitAllocatedNode(ctx, batch, node); err != nil {
   498  				log.G(ctx).WithError(err).Errorf("Failed to commit allocation of network resources for node %s", node.ID)
   499  			}
   500  		}
   501  		return nil
   502  	}); err != nil {
   503  		log.G(ctx).WithError(err).Error("Failed to commit allocation of network resources for nodes")
   504  	}
   505  
   506  	return nil
   507  }
   508  
   509  func (a *Allocator) deallocateNodes(ctx context.Context) error {
   510  	var (
   511  		nodes []*api.Node
   512  		nc    = a.netCtx
   513  		err   error
   514  	)
   515  
   516  	a.store.View(func(tx store.ReadTx) {
   517  		nodes, err = store.FindNodes(tx, store.All)
   518  	})
   519  	if err != nil {
   520  		return fmt.Errorf("error listing all nodes in store while trying to free network resources")
   521  	}
   522  
   523  	for _, node := range nodes {
   524  		if err := a.deallocateNode(node); err != nil {
   525  			log.G(ctx).WithError(err).Errorf("Failed freeing network resources for node %s", node.ID)
   526  		} else {
   527  			nc.somethingWasDeallocated = true
   528  		}
   529  		if err := a.store.Batch(func(batch *store.Batch) error {
   530  			return a.commitAllocatedNode(ctx, batch, node)
   531  		}); err != nil {
   532  			log.G(ctx).WithError(err).Errorf("Failed to commit deallocation of network resources for node %s", node.ID)
   533  		}
   534  	}
   535  
   536  	return nil
   537  }
   538  
   539  func (a *Allocator) deallocateNodeAttachments(ctx context.Context, nid string) error {
   540  	var (
   541  		nodes []*api.Node
   542  		nc    = a.netCtx
   543  		err   error
   544  	)
   545  
   546  	a.store.View(func(tx store.ReadTx) {
   547  		nodes, err = store.FindNodes(tx, store.All)
   548  	})
   549  	if err != nil {
   550  		return fmt.Errorf("error listing all nodes in store while trying to free network resources")
   551  	}
   552  
   553  	for _, node := range nodes {
   554  
   555  		var networkAttachment *api.NetworkAttachment
   556  		var naIndex int
   557  		for index, na := range node.Attachments {
   558  			if na.Network.ID == nid {
   559  				networkAttachment = na
   560  				naIndex = index
   561  				break
   562  			}
   563  		}
   564  
   565  		if networkAttachment == nil {
   566  			log.G(ctx).Errorf("Failed to find network %s on node %s", nid, node.ID)
   567  			continue
   568  		}
   569  
   570  		if nc.nwkAllocator.IsAttachmentAllocated(node, networkAttachment) {
   571  			if err := nc.nwkAllocator.DeallocateAttachment(node, networkAttachment); err != nil {
   572  				log.G(ctx).WithError(err).Errorf("Failed to commit deallocation of network resources for node %s", node.ID)
   573  			} else {
   574  
   575  				// Delete the lbattachment
   576  				node.Attachments[naIndex] = node.Attachments[len(node.Attachments)-1]
   577  				node.Attachments[len(node.Attachments)-1] = nil
   578  				node.Attachments = node.Attachments[:len(node.Attachments)-1]
   579  
   580  				if err := a.store.Batch(func(batch *store.Batch) error {
   581  					return a.commitAllocatedNode(ctx, batch, node)
   582  				}); err != nil {
   583  					log.G(ctx).WithError(err).Errorf("Failed to commit deallocation of network resources for node %s", node.ID)
   584  				}
   585  
   586  			}
   587  		}
   588  
   589  	}
   590  	return nil
   591  }
   592  
   593  func (a *Allocator) deallocateNode(node *api.Node) error {
   594  	var (
   595  		nc = a.netCtx
   596  	)
   597  
   598  	for _, na := range node.Attachments {
   599  		if nc.nwkAllocator.IsAttachmentAllocated(node, na) {
   600  			if err := nc.nwkAllocator.DeallocateAttachment(node, na); err != nil {
   601  				return err
   602  			}
   603  		}
   604  	}
   605  
   606  	node.Attachments = nil
   607  
   608  	return nil
   609  }
   610  
   611  // allocateNetworks allocates (restores) networks in the store so far before we process
   612  // watched events. existingOnly flags is set to true to specify if only allocated
   613  // networks need to be restored.
   614  func (a *Allocator) allocateNetworks(ctx context.Context, existingOnly bool) error {
   615  	var (
   616  		nc       = a.netCtx
   617  		networks []*api.Network
   618  		err      error
   619  	)
   620  	a.store.View(func(tx store.ReadTx) {
   621  		networks, err = store.FindNetworks(tx, store.All)
   622  	})
   623  	if err != nil {
   624  		return errors.Wrap(err, "error listing all networks in store while trying to allocate during init")
   625  	}
   626  
   627  	var allocatedNetworks []*api.Network
   628  	for _, n := range networks {
   629  		if nc.nwkAllocator.IsAllocated(n) {
   630  			continue
   631  		}
   632  		// Network is considered allocated only if the DriverState and IPAM are NOT nil.
   633  		// During initial restore (existingOnly being true), check the network state in
   634  		// raft store. If it is allocated, then restore the same in the in memory allocator
   635  		// state. If it is not allocated, then skip allocating the network at this step.
   636  		// This is to avoid allocating  an in-use network IP, subnet pool or vxlan id to
   637  		// another network.
   638  		if existingOnly &&
   639  			(n.DriverState == nil ||
   640  				n.IPAM == nil) {
   641  			continue
   642  		}
   643  
   644  		if err := a.allocateNetwork(ctx, n); err != nil {
   645  			log.G(ctx).WithField("existingOnly", existingOnly).WithError(err).Errorf("failed allocating network %s during init", n.ID)
   646  			continue
   647  		}
   648  		allocatedNetworks = append(allocatedNetworks, n)
   649  	}
   650  
   651  	if err := a.store.Batch(func(batch *store.Batch) error {
   652  		for _, n := range allocatedNetworks {
   653  			if err := a.commitAllocatedNetwork(ctx, batch, n); err != nil {
   654  				log.G(ctx).WithError(err).Errorf("failed committing allocation of network %s during init", n.ID)
   655  			}
   656  		}
   657  		return nil
   658  	}); err != nil {
   659  		log.G(ctx).WithError(err).Error("failed committing allocation of networks during init")
   660  	}
   661  
   662  	return nil
   663  }
   664  
   665  // allocateServices allocates services in the store so far before we process
   666  // watched events.
   667  func (a *Allocator) allocateServices(ctx context.Context, existingAddressesOnly bool) error {
   668  	var (
   669  		nc       = a.netCtx
   670  		services []*api.Service
   671  		err      error
   672  	)
   673  	a.store.View(func(tx store.ReadTx) {
   674  		services, err = store.FindServices(tx, store.All)
   675  	})
   676  	if err != nil {
   677  		return errors.Wrap(err, "error listing all services in store while trying to allocate during init")
   678  	}
   679  
   680  	var allocatedServices []*api.Service
   681  	for _, s := range services {
   682  		if nc.nwkAllocator.IsServiceAllocated(s, networkallocator.OnInit) {
   683  			continue
   684  		}
   685  		if existingAddressesOnly &&
   686  			(s.Endpoint == nil ||
   687  				len(s.Endpoint.VirtualIPs) == 0) {
   688  			continue
   689  		}
   690  
   691  		if err := a.allocateService(ctx, s, existingAddressesOnly); err != nil {
   692  			log.G(ctx).WithField("existingAddressesOnly", existingAddressesOnly).WithError(err).Errorf("failed allocating service %s during init", s.ID)
   693  			continue
   694  		}
   695  		allocatedServices = append(allocatedServices, s)
   696  	}
   697  
   698  	if err := a.store.Batch(func(batch *store.Batch) error {
   699  		for _, s := range allocatedServices {
   700  			if err := a.commitAllocatedService(ctx, batch, s); err != nil {
   701  				log.G(ctx).WithError(err).Errorf("failed committing allocation of service %s during init", s.ID)
   702  			}
   703  		}
   704  		return nil
   705  	}); err != nil {
   706  		for _, s := range allocatedServices {
   707  			log.G(ctx).WithError(err).Errorf("failed committing allocation of service %v during init", s.GetID())
   708  		}
   709  	}
   710  
   711  	return nil
   712  }
   713  
   714  // allocateTasks allocates tasks in the store so far before we started watching.
   715  func (a *Allocator) allocateTasks(ctx context.Context, existingAddressesOnly bool) error {
   716  	var (
   717  		nc             = a.netCtx
   718  		tasks          []*api.Task
   719  		allocatedTasks []*api.Task
   720  		err            error
   721  	)
   722  	a.store.View(func(tx store.ReadTx) {
   723  		tasks, err = store.FindTasks(tx, store.All)
   724  	})
   725  	if err != nil {
   726  		return errors.Wrap(err, "error listing all tasks in store while trying to allocate during init")
   727  	}
   728  
   729  	logger := log.G(ctx).WithField("method", "(*Allocator).allocateTasks")
   730  
   731  	for _, t := range tasks {
   732  		if t.Status.State > api.TaskStateRunning {
   733  			logger.Debugf("task %v is in allocated state: %v", t.GetID(), t.Status.State)
   734  			continue
   735  		}
   736  
   737  		if existingAddressesOnly {
   738  			hasAddresses := false
   739  			for _, nAttach := range t.Networks {
   740  				if len(nAttach.Addresses) != 0 {
   741  					hasAddresses = true
   742  					break
   743  				}
   744  			}
   745  			if !hasAddresses {
   746  				logger.Debugf("task %v has no attached addresses", t.GetID())
   747  				continue
   748  			}
   749  		}
   750  
   751  		var s *api.Service
   752  		if t.ServiceID != "" {
   753  			a.store.View(func(tx store.ReadTx) {
   754  				s = store.GetService(tx, t.ServiceID)
   755  			})
   756  		}
   757  
   758  		// Populate network attachments in the task
   759  		// based on service spec.
   760  		a.taskCreateNetworkAttachments(t, s)
   761  
   762  		if taskReadyForNetworkVote(t, s, nc) {
   763  			if t.Status.State >= api.TaskStatePending {
   764  				logger.Debugf("task %v is in allocated state: %v", t.GetID(), t.Status.State)
   765  				continue
   766  			}
   767  
   768  			if a.taskAllocateVote(networkVoter, t.ID) {
   769  				// If the task is not attached to any network, network
   770  				// allocators job is done. Immediately cast a vote so
   771  				// that the task can be moved to the PENDING state as
   772  				// soon as possible.
   773  				updateTaskStatus(t, api.TaskStatePending, allocatedStatusMessage)
   774  				allocatedTasks = append(allocatedTasks, t)
   775  				logger.Debugf("allocated task %v, state update %v", t.GetID(), api.TaskStatePending)
   776  			}
   777  			continue
   778  		}
   779  
   780  		err := a.allocateTask(ctx, t)
   781  		if err == nil {
   782  			allocatedTasks = append(allocatedTasks, t)
   783  		} else if err != errNoChanges {
   784  			logger.WithError(err).Errorf("failed allocating task %s during init", t.ID)
   785  			nc.unallocatedTasks[t.ID] = t
   786  		}
   787  	}
   788  
   789  	if err := a.store.Batch(func(batch *store.Batch) error {
   790  		for _, t := range allocatedTasks {
   791  			if err := a.commitAllocatedTask(ctx, batch, t); err != nil {
   792  				logger.WithError(err).Errorf("failed committing allocation of task %s during init", t.ID)
   793  			}
   794  		}
   795  
   796  		return nil
   797  	}); err != nil {
   798  		for _, t := range allocatedTasks {
   799  			logger.WithError(err).Errorf("failed committing allocation of task %v during init", t.GetID())
   800  		}
   801  	}
   802  
   803  	return nil
   804  }
   805  
   806  // taskReadyForNetworkVote checks if the task is ready for a network
   807  // vote to move it to PENDING state.
   808  func taskReadyForNetworkVote(t *api.Task, s *api.Service, nc *networkContext) bool {
   809  	// Task is ready for vote if the following is true:
   810  	//
   811  	// Task has no network attached or networks attached but all
   812  	// of them allocated AND Task's service has no endpoint or
   813  	// network configured or service endpoints have been
   814  	// allocated.
   815  	return (len(t.Networks) == 0 || nc.nwkAllocator.IsTaskAllocated(t)) &&
   816  		(s == nil || nc.nwkAllocator.IsServiceAllocated(s))
   817  }
   818  
   819  func taskUpdateNetworks(t *api.Task, networks []*api.NetworkAttachment) {
   820  	networksCopy := make([]*api.NetworkAttachment, 0, len(networks))
   821  	for _, n := range networks {
   822  		networksCopy = append(networksCopy, n.Copy())
   823  	}
   824  
   825  	t.Networks = networksCopy
   826  }
   827  
   828  func taskUpdateEndpoint(t *api.Task, endpoint *api.Endpoint) {
   829  	t.Endpoint = endpoint.Copy()
   830  }
   831  
   832  // IsIngressNetworkNeeded checks whether the service requires the routing-mesh
   833  func IsIngressNetworkNeeded(s *api.Service) bool {
   834  	return networkallocator.IsIngressNetworkNeeded(s)
   835  }
   836  
   837  func (a *Allocator) taskCreateNetworkAttachments(t *api.Task, s *api.Service) {
   838  	// If task network attachments have already been filled in no
   839  	// need to do anything else.
   840  	if len(t.Networks) != 0 {
   841  		return
   842  	}
   843  
   844  	var networks []*api.NetworkAttachment
   845  	if IsIngressNetworkNeeded(s) && a.netCtx.ingressNetwork != nil {
   846  		networks = append(networks, &api.NetworkAttachment{Network: a.netCtx.ingressNetwork})
   847  	}
   848  
   849  	a.store.View(func(tx store.ReadTx) {
   850  		// Always prefer NetworkAttachmentConfig in the TaskSpec
   851  		specNetworks := t.Spec.Networks
   852  		if len(specNetworks) == 0 && s != nil && len(s.Spec.Networks) != 0 {
   853  			specNetworks = s.Spec.Networks
   854  		}
   855  
   856  		for _, na := range specNetworks {
   857  			n := store.GetNetwork(tx, na.Target)
   858  			if n == nil {
   859  				continue
   860  			}
   861  
   862  			attachment := api.NetworkAttachment{Network: n}
   863  			attachment.Aliases = append(attachment.Aliases, na.Aliases...)
   864  			attachment.Addresses = append(attachment.Addresses, na.Addresses...)
   865  			attachment.DriverAttachmentOpts = na.DriverAttachmentOpts
   866  			networks = append(networks, &attachment)
   867  		}
   868  	})
   869  
   870  	taskUpdateNetworks(t, networks)
   871  }
   872  
   873  func (a *Allocator) doTaskAlloc(ctx context.Context, ev events.Event) {
   874  	var (
   875  		isDelete bool
   876  		t        *api.Task
   877  	)
   878  
   879  	logger := log.G(ctx).WithField("method", "(*Allocator).doTaskAlloc")
   880  
   881  	// We may have already allocated this task. If a create or update
   882  	// event is older than the current version in the store, we run the
   883  	// risk of allocating the task a second time. Only operate on the
   884  	// latest version of the task.
   885  	switch v := ev.(type) {
   886  	case api.EventCreateTask:
   887  		a.store.View(func(tx store.ReadTx) {
   888  			t = store.GetTask(tx, v.Task.ID)
   889  		})
   890  	case api.EventUpdateTask:
   891  		a.store.View(func(tx store.ReadTx) {
   892  			t = store.GetTask(tx, v.Task.ID)
   893  		})
   894  	case api.EventDeleteTask:
   895  		isDelete = true
   896  		t = v.Task.Copy()
   897  	}
   898  
   899  	if t == nil {
   900  		return
   901  	}
   902  
   903  	nc := a.netCtx
   904  
   905  	// If the task has stopped running then we should free the network
   906  	// resources associated with the task right away.
   907  	if t.Status.State > api.TaskStateRunning || isDelete {
   908  		if nc.nwkAllocator.IsTaskAllocated(t) {
   909  			if err := nc.nwkAllocator.DeallocateTask(t); err != nil {
   910  				logger.WithError(err).Errorf("Failed freeing network resources for task %s", t.ID)
   911  			} else {
   912  				nc.somethingWasDeallocated = true
   913  			}
   914  		}
   915  
   916  		// if we're deallocating the task, we also might need to deallocate the
   917  		// node's network attachment, if this is the last task on the node that
   918  		// needs it. we can do that by doing the same dance to reallocate a
   919  		// node
   920  		if err := a.reallocateNode(ctx, t.NodeID); err != nil {
   921  			logger.WithError(err).Errorf("error reallocating node %v", t.NodeID)
   922  		}
   923  
   924  		// Cleanup any task references that might exist
   925  		delete(nc.pendingTasks, t.ID)
   926  		delete(nc.unallocatedTasks, t.ID)
   927  
   928  		return
   929  	}
   930  
   931  	// if the task has a node ID, we should allocate an attachment for the node
   932  	// this happens if the task is in any non-terminal state.
   933  	if t.NodeID != "" && t.Status.State <= api.TaskStateRunning {
   934  		if err := a.reallocateNode(ctx, t.NodeID); err != nil {
   935  			// TODO(dperny): not entire sure what the error handling flow here
   936  			// should be... for now, just log and keep going
   937  			logger.WithError(err).Errorf("error reallocating node %v", t.NodeID)
   938  		}
   939  	}
   940  
   941  	// If we are already in allocated state, there is
   942  	// absolutely nothing else to do.
   943  	if t.Status.State >= api.TaskStatePending {
   944  		logger.Debugf("Task %s is already in allocated state %v", t.ID, t.Status.State)
   945  		delete(nc.pendingTasks, t.ID)
   946  		delete(nc.unallocatedTasks, t.ID)
   947  		return
   948  	}
   949  
   950  	var s *api.Service
   951  	if t.ServiceID != "" {
   952  		a.store.View(func(tx store.ReadTx) {
   953  			s = store.GetService(tx, t.ServiceID)
   954  		})
   955  		if s == nil {
   956  			// If the task is running it is not normal to
   957  			// not be able to find the associated
   958  			// service. If the task is not running (task
   959  			// is either dead or the desired state is set
   960  			// to dead) then the service may not be
   961  			// available in store. But we still need to
   962  			// cleanup network resources associated with
   963  			// the task.
   964  			if t.Status.State <= api.TaskStateRunning && !isDelete {
   965  				log.G(ctx).Errorf("Event %T: Failed to get service %s for task %s state %s: could not find service %s", ev, t.ServiceID, t.ID, t.Status.State, t.ServiceID)
   966  				return
   967  			}
   968  		}
   969  	}
   970  
   971  	// Populate network attachments in the task
   972  	// based on service spec.
   973  	a.taskCreateNetworkAttachments(t, s)
   974  
   975  	nc.pendingTasks[t.ID] = t
   976  	log.G(ctx).Debugf("task %v was marked pending allocation", t.ID)
   977  }
   978  
   979  // allocateNode takes a context, a node, whether or not new allocations should
   980  // be made, and the networks to allocate. it then makes sure an attachment is
   981  // allocated for every network in the provided networks, allocating new
   982  // attachments if existingAddressesOnly is false. it return true if something
   983  // new was allocated or something was removed, or false otherwise.
   984  //
   985  // additionally, allocateNode will remove and free any attachments for networks
   986  // not in the set of networks passed in.
   987  func (a *Allocator) allocateNode(ctx context.Context, node *api.Node, existingAddressesOnly bool, networks []*api.Network) bool {
   988  	var allocated bool
   989  
   990  	nc := a.netCtx
   991  
   992  	var nwIDs = make(map[string]struct{}, len(networks))
   993  
   994  	// go through all of the networks we've passed in
   995  	for _, network := range networks {
   996  		nwIDs[network.ID] = struct{}{}
   997  
   998  		// for each one, create space for an attachment. then, search through
   999  		// all of the attachments already on the node. if the attachment
  1000  		// exists, then copy it to the node. if not, we'll allocate it below.
  1001  		var lbAttachment *api.NetworkAttachment
  1002  		for _, na := range node.Attachments {
  1003  			if na.Network != nil && na.Network.ID == network.ID {
  1004  				lbAttachment = na
  1005  				break
  1006  			}
  1007  		}
  1008  
  1009  		if lbAttachment != nil {
  1010  			if nc.nwkAllocator.IsAttachmentAllocated(node, lbAttachment) {
  1011  				continue
  1012  			}
  1013  		}
  1014  
  1015  		if lbAttachment == nil {
  1016  			// if we're restoring state, we should not add an attachment here.
  1017  			if existingAddressesOnly {
  1018  				continue
  1019  			}
  1020  			lbAttachment = &api.NetworkAttachment{}
  1021  			node.Attachments = append(node.Attachments, lbAttachment)
  1022  		}
  1023  
  1024  		if existingAddressesOnly && len(lbAttachment.Addresses) == 0 {
  1025  			continue
  1026  		}
  1027  
  1028  		lbAttachment.Network = network.Copy()
  1029  		if err := a.netCtx.nwkAllocator.AllocateAttachment(node, lbAttachment); err != nil {
  1030  			log.G(ctx).WithError(err).Errorf("Failed to allocate network resources for node %s", node.ID)
  1031  			// TODO: Should we add a unallocatedNode and retry allocating resources like we do for network, tasks, services?
  1032  			// right now, we will only retry allocating network resources for the node when the node is updated.
  1033  			continue
  1034  		}
  1035  
  1036  		allocated = true
  1037  	}
  1038  
  1039  	// if we're only initializing existing addresses, we should stop here and
  1040  	// not deallocate anything
  1041  	if existingAddressesOnly {
  1042  		return allocated
  1043  	}
  1044  
  1045  	// now that we've allocated everything new, we have to remove things that
  1046  	// do not belong. we have to do this last because we can easily roll back
  1047  	// attachments we've allocated if something goes wrong by freeing them, but
  1048  	// we can't roll back deallocating attachments by reacquiring them.
  1049  
  1050  	// we're using a trick to filter without allocating see the official go
  1051  	// wiki on github:
  1052  	// https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating
  1053  	attachments := node.Attachments[:0]
  1054  	for _, attach := range node.Attachments {
  1055  		if _, ok := nwIDs[attach.Network.ID]; ok {
  1056  			// attachment belongs to one of the networks, so keep it
  1057  			attachments = append(attachments, attach)
  1058  		} else {
  1059  			// free the attachment and remove it from the node's attachments by
  1060  			// re-slicing
  1061  			if err := a.netCtx.nwkAllocator.DeallocateAttachment(node, attach); err != nil {
  1062  				// if deallocation fails, there's nothing we can do besides log
  1063  				// an error and keep going
  1064  				log.G(ctx).WithError(err).Errorf(
  1065  					"error deallocating attachment for network %v on node %v",
  1066  					attach.Network.ID, node.ID,
  1067  				)
  1068  			}
  1069  			// strictly speaking, nothing was allocated, but something was
  1070  			// deallocated and that counts.
  1071  			allocated = true
  1072  			// also, set the somethingWasDeallocated flag so the allocator
  1073  			// knows that it can now try again.
  1074  			a.netCtx.somethingWasDeallocated = true
  1075  		}
  1076  	}
  1077  	node.Attachments = attachments
  1078  
  1079  	return allocated
  1080  }
  1081  
  1082  func (a *Allocator) reallocateNode(ctx context.Context, nodeID string) error {
  1083  	var (
  1084  		node *api.Node
  1085  	)
  1086  	a.store.View(func(tx store.ReadTx) {
  1087  		node = store.GetNode(tx, nodeID)
  1088  	})
  1089  	if node == nil {
  1090  		return errors.Errorf("node %v cannot be found", nodeID)
  1091  	}
  1092  
  1093  	networks, err := a.getNodeNetworks(node.ID)
  1094  	if err != nil {
  1095  		return errors.Wrapf(err, "error getting networks for node %v", nodeID)
  1096  	}
  1097  	if a.allocateNode(ctx, node, false, networks) {
  1098  		// if something was allocated, commit the node
  1099  		if err := a.store.Batch(func(batch *store.Batch) error {
  1100  			return a.commitAllocatedNode(ctx, batch, node)
  1101  		}); err != nil {
  1102  			return errors.Wrapf(err, "error committing allocation for node %v", nodeID)
  1103  		}
  1104  	}
  1105  	return nil
  1106  }
  1107  
  1108  func (a *Allocator) commitAllocatedNode(ctx context.Context, batch *store.Batch, node *api.Node) error {
  1109  	if err := batch.Update(func(tx store.Tx) error {
  1110  		err := store.UpdateNode(tx, node)
  1111  
  1112  		if err == store.ErrSequenceConflict {
  1113  			storeNode := store.GetNode(tx, node.ID)
  1114  			storeNode.Attachments = node.Attachments
  1115  			err = store.UpdateNode(tx, storeNode)
  1116  		}
  1117  
  1118  		return errors.Wrapf(err, "failed updating state in store transaction for node %s", node.ID)
  1119  	}); err != nil {
  1120  		if err := a.deallocateNode(node); err != nil {
  1121  			log.G(ctx).WithError(err).Errorf("failed rolling back allocation of node %s", node.ID)
  1122  		}
  1123  
  1124  		return err
  1125  	}
  1126  
  1127  	return nil
  1128  }
  1129  
  1130  // This function prepares the service object for being updated when the change regards
  1131  // the published ports in host mode: It resets the runtime state ports (s.Endpoint.Ports)
  1132  // to the current ingress mode runtime state ports plus the newly configured publish mode ports,
  1133  // so that the service allocation invoked on this new service object will trigger the deallocation
  1134  // of any old publish mode port and allocation of any new one.
  1135  func updatePortsInHostPublishMode(s *api.Service) {
  1136  	// First, remove all host-mode ports from s.Endpoint.Ports
  1137  	if s.Endpoint != nil {
  1138  		var portConfigs []*api.PortConfig
  1139  		for _, portConfig := range s.Endpoint.Ports {
  1140  			if portConfig.PublishMode != api.PublishModeHost {
  1141  				portConfigs = append(portConfigs, portConfig)
  1142  			}
  1143  		}
  1144  		s.Endpoint.Ports = portConfigs
  1145  	}
  1146  
  1147  	// Add back all host-mode ports
  1148  	if s.Spec.Endpoint != nil {
  1149  		if s.Endpoint == nil {
  1150  			s.Endpoint = &api.Endpoint{}
  1151  		}
  1152  		for _, portConfig := range s.Spec.Endpoint.Ports {
  1153  			if portConfig.PublishMode == api.PublishModeHost {
  1154  				s.Endpoint.Ports = append(s.Endpoint.Ports, portConfig.Copy())
  1155  			}
  1156  		}
  1157  	}
  1158  	s.Endpoint.Spec = s.Spec.Endpoint.Copy()
  1159  }
  1160  
  1161  // allocateService takes care to align the desired state with the spec passed
  1162  // the last parameter is true only during restart when the data is read from raft
  1163  // and used to build internal state
  1164  func (a *Allocator) allocateService(ctx context.Context, s *api.Service, existingAddressesOnly bool) error {
  1165  	nc := a.netCtx
  1166  
  1167  	if s.Spec.Endpoint != nil {
  1168  		// service has user-defined endpoint
  1169  		if s.Endpoint == nil {
  1170  			// service currently has no allocated endpoint, need allocated.
  1171  			s.Endpoint = &api.Endpoint{
  1172  				Spec: s.Spec.Endpoint.Copy(),
  1173  			}
  1174  		}
  1175  
  1176  		// The service is trying to expose ports to the external
  1177  		// world. Automatically attach the service to the ingress
  1178  		// network only if it is not already done.
  1179  		if IsIngressNetworkNeeded(s) {
  1180  			if nc.ingressNetwork == nil {
  1181  				return fmt.Errorf("ingress network is missing")
  1182  			}
  1183  			var found bool
  1184  			for _, vip := range s.Endpoint.VirtualIPs {
  1185  				if vip.NetworkID == nc.ingressNetwork.ID {
  1186  					found = true
  1187  					break
  1188  				}
  1189  			}
  1190  
  1191  			if !found {
  1192  				s.Endpoint.VirtualIPs = append(s.Endpoint.VirtualIPs,
  1193  					&api.Endpoint_VirtualIP{NetworkID: nc.ingressNetwork.ID})
  1194  			}
  1195  		}
  1196  	} else if s.Endpoint != nil && !existingAddressesOnly {
  1197  		// if we are in the restart phase there is no reason to try to deallocate anything because the state
  1198  		// is not there
  1199  		// service has no user-defined endpoints while has already allocated network resources,
  1200  		// need deallocated.
  1201  		if err := nc.nwkAllocator.DeallocateService(s); err != nil {
  1202  			return err
  1203  		}
  1204  		nc.somethingWasDeallocated = true
  1205  	}
  1206  
  1207  	if err := nc.nwkAllocator.AllocateService(s); err != nil {
  1208  		nc.unallocatedServices[s.ID] = s
  1209  		return err
  1210  	}
  1211  
  1212  	// If the service doesn't expose ports any more and if we have
  1213  	// any lingering virtual IP references for ingress network
  1214  	// clean them up here.
  1215  	if !IsIngressNetworkNeeded(s) && nc.ingressNetwork != nil {
  1216  		if s.Endpoint != nil {
  1217  			for i, vip := range s.Endpoint.VirtualIPs {
  1218  				if vip.NetworkID == nc.ingressNetwork.ID {
  1219  					n := len(s.Endpoint.VirtualIPs)
  1220  					s.Endpoint.VirtualIPs[i], s.Endpoint.VirtualIPs[n-1] = s.Endpoint.VirtualIPs[n-1], nil
  1221  					s.Endpoint.VirtualIPs = s.Endpoint.VirtualIPs[:n-1]
  1222  					break
  1223  				}
  1224  			}
  1225  		}
  1226  	}
  1227  	return nil
  1228  }
  1229  
  1230  func (a *Allocator) commitAllocatedService(ctx context.Context, batch *store.Batch, s *api.Service) error {
  1231  	if err := batch.Update(func(tx store.Tx) error {
  1232  		err := store.UpdateService(tx, s)
  1233  
  1234  		if err == store.ErrSequenceConflict {
  1235  			storeService := store.GetService(tx, s.ID)
  1236  			storeService.Endpoint = s.Endpoint
  1237  			err = store.UpdateService(tx, storeService)
  1238  		}
  1239  
  1240  		return errors.Wrapf(err, "failed updating state in store transaction for service %s", s.ID)
  1241  	}); err != nil {
  1242  		if err := a.netCtx.nwkAllocator.DeallocateService(s); err != nil {
  1243  			log.G(ctx).WithError(err).Errorf("failed rolling back allocation of service %s", s.ID)
  1244  		}
  1245  
  1246  		return err
  1247  	}
  1248  
  1249  	return nil
  1250  }
  1251  
  1252  func (a *Allocator) allocateNetwork(ctx context.Context, n *api.Network) error {
  1253  	nc := a.netCtx
  1254  
  1255  	if err := nc.nwkAllocator.Allocate(n); err != nil {
  1256  		nc.unallocatedNetworks[n.ID] = n
  1257  		return err
  1258  	}
  1259  
  1260  	return nil
  1261  }
  1262  
  1263  func (a *Allocator) commitAllocatedNetwork(ctx context.Context, batch *store.Batch, n *api.Network) error {
  1264  	if err := batch.Update(func(tx store.Tx) error {
  1265  		if err := store.UpdateNetwork(tx, n); err != nil {
  1266  			return errors.Wrapf(err, "failed updating state in store transaction for network %s", n.ID)
  1267  		}
  1268  		return nil
  1269  	}); err != nil {
  1270  		if err := a.netCtx.nwkAllocator.Deallocate(n); err != nil {
  1271  			log.G(ctx).WithError(err).Errorf("failed rolling back allocation of network %s", n.ID)
  1272  		}
  1273  
  1274  		return err
  1275  	}
  1276  
  1277  	return nil
  1278  }
  1279  
  1280  func (a *Allocator) allocateTask(ctx context.Context, t *api.Task) (err error) {
  1281  	taskUpdated := false
  1282  	nc := a.netCtx
  1283  
  1284  	logger := log.G(ctx).WithField("method", "(*Allocator).allocateTask")
  1285  
  1286  	// We might be here even if a task allocation has already
  1287  	// happened but wasn't successfully committed to store. In such
  1288  	// cases skip allocation and go straight ahead to updating the
  1289  	// store.
  1290  	if !nc.nwkAllocator.IsTaskAllocated(t) {
  1291  		a.store.View(func(tx store.ReadTx) {
  1292  			if t.ServiceID != "" {
  1293  				s := store.GetService(tx, t.ServiceID)
  1294  				if s == nil {
  1295  					err = fmt.Errorf("could not find service %s for task %s", t.ServiceID, t.GetID())
  1296  					return
  1297  				}
  1298  
  1299  				if !nc.nwkAllocator.IsServiceAllocated(s) {
  1300  					err = fmt.Errorf("service %s to which task %s belongs has pending allocations", s.ID, t.ID)
  1301  					return
  1302  				}
  1303  
  1304  				if s.Endpoint != nil {
  1305  					taskUpdateEndpoint(t, s.Endpoint)
  1306  					taskUpdated = true
  1307  				}
  1308  			}
  1309  
  1310  			for _, na := range t.Networks {
  1311  				n := store.GetNetwork(tx, na.Network.ID)
  1312  				if n == nil {
  1313  					err = fmt.Errorf("failed to retrieve network %s while allocating task %s", na.Network.ID, t.ID)
  1314  					return
  1315  				}
  1316  
  1317  				if !nc.nwkAllocator.IsAllocated(n) {
  1318  					err = fmt.Errorf("network %s attached to task %s not allocated yet", n.ID, t.ID)
  1319  					return
  1320  				}
  1321  
  1322  				na.Network = n
  1323  			}
  1324  
  1325  			if err = nc.nwkAllocator.AllocateTask(t); err != nil {
  1326  				return
  1327  			}
  1328  			if nc.nwkAllocator.IsTaskAllocated(t) {
  1329  				taskUpdated = true
  1330  			}
  1331  		})
  1332  
  1333  		if err != nil {
  1334  			return err
  1335  		}
  1336  	}
  1337  
  1338  	// Update the network allocations and moving to
  1339  	// PENDING state on top of the latest store state.
  1340  	if a.taskAllocateVote(networkVoter, t.ID) {
  1341  		if t.Status.State < api.TaskStatePending {
  1342  			updateTaskStatus(t, api.TaskStatePending, allocatedStatusMessage)
  1343  			logger.Debugf("allocated task %v, state update %v", t.GetID(), api.TaskStatePending)
  1344  			taskUpdated = true
  1345  		} else {
  1346  			logger.Debugf("task %v, already in allocated state %v", t.GetID(), t.Status.State)
  1347  		}
  1348  	}
  1349  
  1350  	if !taskUpdated {
  1351  		return errNoChanges
  1352  	}
  1353  
  1354  	return nil
  1355  }
  1356  
  1357  func (a *Allocator) commitAllocatedTask(ctx context.Context, batch *store.Batch, t *api.Task) error {
  1358  	retError := batch.Update(func(tx store.Tx) error {
  1359  		err := store.UpdateTask(tx, t)
  1360  
  1361  		if err == store.ErrSequenceConflict {
  1362  			storeTask := store.GetTask(tx, t.ID)
  1363  			taskUpdateNetworks(storeTask, t.Networks)
  1364  			taskUpdateEndpoint(storeTask, t.Endpoint)
  1365  			if storeTask.Status.State < api.TaskStatePending {
  1366  				storeTask.Status = t.Status
  1367  			}
  1368  			err = store.UpdateTask(tx, storeTask)
  1369  		}
  1370  
  1371  		return errors.Wrapf(err, "failed updating state in store transaction for task %s", t.ID)
  1372  	})
  1373  
  1374  	if retError == nil {
  1375  		log.G(ctx).Debugf("committed allocated task %v, state update %v", t.GetID(), t.Status)
  1376  	}
  1377  
  1378  	return retError
  1379  }
  1380  
  1381  func (a *Allocator) procUnallocatedNetworks(ctx context.Context) {
  1382  	nc := a.netCtx
  1383  	var allocatedNetworks []*api.Network
  1384  	for _, n := range nc.unallocatedNetworks {
  1385  		if !nc.nwkAllocator.IsAllocated(n) {
  1386  			if err := a.allocateNetwork(ctx, n); err != nil {
  1387  				log.G(ctx).WithError(err).Debugf("Failed allocation of unallocated network %s", n.ID)
  1388  				continue
  1389  			}
  1390  			allocatedNetworks = append(allocatedNetworks, n)
  1391  		}
  1392  	}
  1393  
  1394  	if len(allocatedNetworks) == 0 {
  1395  		return
  1396  	}
  1397  
  1398  	err := a.store.Batch(func(batch *store.Batch) error {
  1399  		for _, n := range allocatedNetworks {
  1400  			if err := a.commitAllocatedNetwork(ctx, batch, n); err != nil {
  1401  				log.G(ctx).WithError(err).Debugf("Failed to commit allocation of unallocated network %s", n.ID)
  1402  				continue
  1403  			}
  1404  			delete(nc.unallocatedNetworks, n.ID)
  1405  		}
  1406  		return nil
  1407  	})
  1408  
  1409  	if err != nil {
  1410  		log.G(ctx).WithError(err).Error("Failed to commit allocation of unallocated networks")
  1411  		// We optimistically removed these from nc.unallocatedNetworks
  1412  		// above in anticipation of successfully committing the batch,
  1413  		// but since the transaction has failed, we requeue them here.
  1414  		for _, n := range allocatedNetworks {
  1415  			nc.unallocatedNetworks[n.ID] = n
  1416  		}
  1417  	}
  1418  }
  1419  
  1420  func (a *Allocator) procUnallocatedServices(ctx context.Context) {
  1421  	nc := a.netCtx
  1422  	var allocatedServices []*api.Service
  1423  	for _, s := range nc.unallocatedServices {
  1424  		if !nc.nwkAllocator.IsServiceAllocated(s) {
  1425  			if err := a.allocateService(ctx, s, false); err != nil {
  1426  				log.G(ctx).WithError(err).Debugf("Failed allocation of unallocated service %s", s.ID)
  1427  				continue
  1428  			}
  1429  			allocatedServices = append(allocatedServices, s)
  1430  		}
  1431  	}
  1432  
  1433  	if len(allocatedServices) == 0 {
  1434  		return
  1435  	}
  1436  
  1437  	err := a.store.Batch(func(batch *store.Batch) error {
  1438  		for _, s := range allocatedServices {
  1439  			if err := a.commitAllocatedService(ctx, batch, s); err != nil {
  1440  				log.G(ctx).WithError(err).Debugf("Failed to commit allocation of unallocated service %s", s.ID)
  1441  				continue
  1442  			}
  1443  			delete(nc.unallocatedServices, s.ID)
  1444  		}
  1445  		return nil
  1446  	})
  1447  
  1448  	if err != nil {
  1449  		log.G(ctx).WithError(err).Error("Failed to commit allocation of unallocated services")
  1450  		// We optimistically removed these from nc.unallocatedServices
  1451  		// above in anticipation of successfully committing the batch,
  1452  		// but since the transaction has failed, we requeue them here.
  1453  		for _, s := range allocatedServices {
  1454  			nc.unallocatedServices[s.ID] = s
  1455  		}
  1456  	}
  1457  }
  1458  
  1459  func (a *Allocator) procTasksNetwork(ctx context.Context, onRetry bool) {
  1460  	nc := a.netCtx
  1461  	quiet := false
  1462  	toAllocate := nc.pendingTasks
  1463  	if onRetry {
  1464  		toAllocate = nc.unallocatedTasks
  1465  		quiet = true
  1466  	}
  1467  	allocatedTasks := make([]*api.Task, 0, len(toAllocate))
  1468  
  1469  	for _, t := range toAllocate {
  1470  
  1471  		if err := a.allocateTask(ctx, t); err == nil {
  1472  			allocatedTasks = append(allocatedTasks, t)
  1473  		} else if err != errNoChanges {
  1474  			if quiet {
  1475  				log.G(ctx).WithError(err).Debug("task allocation failure")
  1476  			} else {
  1477  				log.G(ctx).WithError(err).Error("task allocation failure")
  1478  			}
  1479  		}
  1480  	}
  1481  
  1482  	if len(allocatedTasks) == 0 {
  1483  		return
  1484  	}
  1485  
  1486  	err := a.store.Batch(func(batch *store.Batch) error {
  1487  		for _, t := range allocatedTasks {
  1488  			err := a.commitAllocatedTask(ctx, batch, t)
  1489  			if err != nil {
  1490  				log.G(ctx).WithField("method", "(*Allocator).procTasksNetwork").WithError(err).Errorf("allocation commit failure for task %s", t.GetID())
  1491  				continue
  1492  			}
  1493  			delete(toAllocate, t.ID)
  1494  		}
  1495  
  1496  		return nil
  1497  	})
  1498  
  1499  	if err != nil {
  1500  		log.G(ctx).WithError(err).Error("failed a store batch operation while processing tasks")
  1501  		// We optimistically removed these from toAllocate above in
  1502  		// anticipation of successfully committing the batch, but since
  1503  		// the transaction has failed, we requeue them here.
  1504  		for _, t := range allocatedTasks {
  1505  			toAllocate[t.ID] = t
  1506  		}
  1507  	}
  1508  }
  1509  
  1510  // IsBuiltInNetworkDriver returns whether the passed driver is an internal network driver
  1511  func IsBuiltInNetworkDriver(name string) bool {
  1512  	return cnmallocator.IsBuiltInDriver(name)
  1513  }
  1514  
  1515  // PredefinedNetworks returns the list of predefined network structures for a given network model
  1516  func PredefinedNetworks() []networkallocator.PredefinedNetworkData {
  1517  	return cnmallocator.PredefinedNetworks()
  1518  }
  1519  
  1520  // updateTaskStatus sets TaskStatus and updates timestamp.
  1521  func updateTaskStatus(t *api.Task, newStatus api.TaskState, message string) {
  1522  	t.Status = api.TaskStatus{
  1523  		State:     newStatus,
  1524  		Message:   message,
  1525  		Timestamp: ptypes.MustTimestampProto(time.Now()),
  1526  	}
  1527  }
  1528  
  1529  // IsIngressNetwork returns whether the passed network is an ingress network.
  1530  func IsIngressNetwork(nw *api.Network) bool {
  1531  	return networkallocator.IsIngressNetwork(nw)
  1532  }
  1533  
  1534  // GetIngressNetwork fetches the ingress network from store.
  1535  // ErrNoIngress will be returned if the ingress network is not present,
  1536  // nil otherwise. In case of any other failure in accessing the store,
  1537  // the respective error will be reported as is.
  1538  func GetIngressNetwork(s *store.MemoryStore) (*api.Network, error) {
  1539  	var (
  1540  		networks []*api.Network
  1541  		err      error
  1542  	)
  1543  	s.View(func(tx store.ReadTx) {
  1544  		networks, err = store.FindNetworks(tx, store.All)
  1545  	})
  1546  	if err != nil {
  1547  		return nil, err
  1548  	}
  1549  	for _, n := range networks {
  1550  		if IsIngressNetwork(n) {
  1551  			return n, nil
  1552  		}
  1553  	}
  1554  	return nil, ErrNoIngress
  1555  }