github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/firewaller/firewaller.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package firewaller
     5  
     6  import (
     7  	"io"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/EvilSuperstars/go-cidrman"
    12  	"github.com/juju/clock"
    13  	"github.com/juju/collections/set"
    14  	"github.com/juju/errors"
    15  	"gopkg.in/juju/charm.v6"
    16  	"gopkg.in/juju/names.v2"
    17  	"gopkg.in/juju/worker.v1"
    18  	"gopkg.in/juju/worker.v1/catacomb"
    19  	"gopkg.in/macaroon.v2-unstable"
    20  
    21  	"github.com/juju/juju/api"
    22  	"github.com/juju/juju/api/firewaller"
    23  	"github.com/juju/juju/api/remoterelations"
    24  	"github.com/juju/juju/apiserver/params"
    25  	"github.com/juju/juju/core/instance"
    26  	corenetwork "github.com/juju/juju/core/network"
    27  	"github.com/juju/juju/core/relation"
    28  	"github.com/juju/juju/core/watcher"
    29  	"github.com/juju/juju/environs"
    30  	"github.com/juju/juju/environs/config"
    31  	"github.com/juju/juju/environs/context"
    32  	"github.com/juju/juju/environs/instances"
    33  	"github.com/juju/juju/network"
    34  	"github.com/juju/juju/worker/common"
    35  )
    36  
    37  // FirewallerAPI exposes functionality off the firewaller API facade to a worker.
    38  type FirewallerAPI interface {
    39  	WatchModelMachines() (watcher.StringsWatcher, error)
    40  	WatchOpenedPorts() (watcher.StringsWatcher, error)
    41  	Machine(tag names.MachineTag) (*firewaller.Machine, error)
    42  	Unit(tag names.UnitTag) (*firewaller.Unit, error)
    43  	Relation(tag names.RelationTag) (*firewaller.Relation, error)
    44  	WatchEgressAddressesForRelation(tag names.RelationTag) (watcher.StringsWatcher, error)
    45  	WatchIngressAddressesForRelation(tag names.RelationTag) (watcher.StringsWatcher, error)
    46  	ControllerAPIInfoForModel(modelUUID string) (*api.Info, error)
    47  	MacaroonForRelation(relationKey string) (*macaroon.Macaroon, error)
    48  	SetRelationStatus(relationKey string, status relation.Status, message string) error
    49  	FirewallRules(applicationNames ...string) ([]params.FirewallRule, error)
    50  }
    51  
    52  // CrossModelFirewallerFacade exposes firewaller functionality on the
    53  // remote offering model to a worker.
    54  type CrossModelFirewallerFacade interface {
    55  	PublishIngressNetworkChange(params.IngressNetworksChangeEvent) error
    56  	WatchEgressAddressesForRelation(details params.RemoteEntityArg) (watcher.StringsWatcher, error)
    57  }
    58  
    59  // RemoteFirewallerAPICloser implements CrossModelFirewallerFacade
    60  // and adds a Close() method.
    61  type CrossModelFirewallerFacadeCloser interface {
    62  	io.Closer
    63  	CrossModelFirewallerFacade
    64  }
    65  
    66  // EnvironFirewaller defines methods to allow the worker to perform
    67  // firewall operations (open/close ports) on a Juju cloud environment.
    68  type EnvironFirewaller interface {
    69  	environs.Firewaller
    70  }
    71  
    72  // EnvironInstances defines methods to allow the worker to perform
    73  // operations on instances in a Juju cloud environment.
    74  type EnvironInstances interface {
    75  	Instances(ctx context.ProviderCallContext, ids []instance.Id) ([]instances.Instance, error)
    76  }
    77  
    78  type newCrossModelFacadeFunc func(*api.Info) (CrossModelFirewallerFacadeCloser, error)
    79  
    80  // Config defines the operation of a Worker.
    81  type Config struct {
    82  	ModelUUID          string
    83  	Mode               string
    84  	FirewallerAPI      FirewallerAPI
    85  	RemoteRelationsApi *remoterelations.Client
    86  	EnvironFirewaller  EnvironFirewaller
    87  	EnvironInstances   EnvironInstances
    88  
    89  	NewCrossModelFacadeFunc newCrossModelFacadeFunc
    90  
    91  	Clock clock.Clock
    92  
    93  	CredentialAPI common.CredentialAPI
    94  }
    95  
    96  // Validate returns an error if cfg cannot drive a Worker.
    97  func (cfg Config) Validate() error {
    98  	if cfg.ModelUUID == "" {
    99  		return errors.NotValidf("empty model uuid")
   100  	}
   101  	if cfg.FirewallerAPI == nil {
   102  		return errors.NotValidf("nil Firewaller Facade")
   103  	}
   104  	if cfg.RemoteRelationsApi == nil {
   105  		return errors.NotValidf("nil RemoteRelations Facade")
   106  	}
   107  	if cfg.Mode == config.FwGlobal && cfg.EnvironFirewaller == nil {
   108  		return errors.NotValidf("nil EnvironFirewaller")
   109  	}
   110  	if cfg.EnvironInstances == nil {
   111  		return errors.NotValidf("nil EnvironInstances")
   112  	}
   113  	if cfg.NewCrossModelFacadeFunc == nil {
   114  		return errors.NotValidf("nil Cross Model Facade func")
   115  	}
   116  	if cfg.CredentialAPI == nil {
   117  		return errors.NotValidf("nil Credential Facade")
   118  	}
   119  	return nil
   120  }
   121  
   122  type portRanges map[corenetwork.PortRange]bool
   123  
   124  // Firewaller watches the state for port ranges opened or closed on
   125  // machines and reflects those changes onto the backing environment.
   126  // Uses Firewaller API V1.
   127  type Firewaller struct {
   128  	catacomb           catacomb.Catacomb
   129  	firewallerApi      FirewallerAPI
   130  	remoteRelationsApi *remoterelations.Client
   131  	environFirewaller  EnvironFirewaller
   132  	environInstances   EnvironInstances
   133  
   134  	machinesWatcher      watcher.StringsWatcher
   135  	portsWatcher         watcher.StringsWatcher
   136  	machineds            map[names.MachineTag]*machineData
   137  	unitsChange          chan *unitsChange
   138  	unitds               map[names.UnitTag]*unitData
   139  	applicationids       map[names.ApplicationTag]*applicationData
   140  	exposedChange        chan *exposedChange
   141  	globalMode           bool
   142  	globalIngressRuleRef map[string]int // map of rule names to count of occurrences
   143  
   144  	modelUUID                  string
   145  	newRemoteFirewallerAPIFunc newCrossModelFacadeFunc
   146  	remoteRelationsWatcher     watcher.StringsWatcher
   147  	localRelationsChange       chan *remoteRelationNetworkChange
   148  	relationIngress            map[names.RelationTag]*remoteRelationData
   149  	relationWorkerRunner       *worker.Runner
   150  	pollClock                  clock.Clock
   151  
   152  	cloudCallContext context.ProviderCallContext
   153  }
   154  
   155  // NewFirewaller returns a new Firewaller.
   156  func NewFirewaller(cfg Config) (worker.Worker, error) {
   157  	if err := cfg.Validate(); err != nil {
   158  		return nil, errors.Trace(err)
   159  	}
   160  	clk := cfg.Clock
   161  	if clk == nil {
   162  		clk = clock.WallClock
   163  	}
   164  
   165  	fw := &Firewaller{
   166  		firewallerApi:              cfg.FirewallerAPI,
   167  		remoteRelationsApi:         cfg.RemoteRelationsApi,
   168  		environFirewaller:          cfg.EnvironFirewaller,
   169  		environInstances:           cfg.EnvironInstances,
   170  		newRemoteFirewallerAPIFunc: cfg.NewCrossModelFacadeFunc,
   171  		modelUUID:                  cfg.ModelUUID,
   172  		machineds:                  make(map[names.MachineTag]*machineData),
   173  		unitsChange:                make(chan *unitsChange),
   174  		unitds:                     make(map[names.UnitTag]*unitData),
   175  		applicationids:             make(map[names.ApplicationTag]*applicationData),
   176  		exposedChange:              make(chan *exposedChange),
   177  		relationIngress:            make(map[names.RelationTag]*remoteRelationData),
   178  		localRelationsChange:       make(chan *remoteRelationNetworkChange),
   179  		pollClock:                  clk,
   180  		relationWorkerRunner: worker.NewRunner(worker.RunnerParams{
   181  			Clock: clk,
   182  
   183  			// One of the remote relation workers failing should not
   184  			// prevent the others from running.
   185  			IsFatal: func(error) bool { return false },
   186  
   187  			// For any failures, try again in 1 minute.
   188  			RestartDelay: time.Minute,
   189  		}),
   190  		cloudCallContext: common.NewCloudCallContext(cfg.CredentialAPI, nil),
   191  	}
   192  
   193  	switch cfg.Mode {
   194  	case config.FwInstance:
   195  	case config.FwGlobal:
   196  		fw.globalMode = true
   197  		fw.globalIngressRuleRef = make(map[string]int)
   198  	default:
   199  		return nil, errors.Errorf("invalid firewall-mode %q", cfg.Mode)
   200  	}
   201  
   202  	err := catacomb.Invoke(catacomb.Plan{
   203  		Site: &fw.catacomb,
   204  		Work: fw.loop,
   205  		Init: []worker.Worker{fw.relationWorkerRunner},
   206  	})
   207  	if err != nil {
   208  		return nil, errors.Trace(err)
   209  	}
   210  	return fw, nil
   211  }
   212  
   213  func (fw *Firewaller) setUp() error {
   214  	var err error
   215  	fw.machinesWatcher, err = fw.firewallerApi.WatchModelMachines()
   216  	if err != nil {
   217  		return errors.Trace(err)
   218  	}
   219  	if err := fw.catacomb.Add(fw.machinesWatcher); err != nil {
   220  		return errors.Trace(err)
   221  	}
   222  
   223  	fw.portsWatcher, err = fw.firewallerApi.WatchOpenedPorts()
   224  	if err != nil {
   225  		return errors.Annotatef(err, "failed to start ports watcher")
   226  	}
   227  	if err := fw.catacomb.Add(fw.portsWatcher); err != nil {
   228  		return errors.Trace(err)
   229  	}
   230  
   231  	fw.remoteRelationsWatcher, err = fw.remoteRelationsApi.WatchRemoteRelations()
   232  	if err != nil {
   233  		return errors.Trace(err)
   234  	}
   235  	if err := fw.catacomb.Add(fw.remoteRelationsWatcher); err != nil {
   236  		return errors.Trace(err)
   237  	}
   238  
   239  	logger.Debugf("started watching opened port ranges for the model")
   240  	return nil
   241  }
   242  
   243  func (fw *Firewaller) loop() error {
   244  	if err := fw.setUp(); err != nil {
   245  		return errors.Trace(err)
   246  	}
   247  	var reconciled bool
   248  	portsChange := fw.portsWatcher.Changes()
   249  	for {
   250  		select {
   251  		case <-fw.catacomb.Dying():
   252  			return fw.catacomb.ErrDying()
   253  		case change, ok := <-fw.machinesWatcher.Changes():
   254  			if !ok {
   255  				return errors.New("machines watcher closed")
   256  			}
   257  			for _, machineId := range change {
   258  				if err := fw.machineLifeChanged(names.NewMachineTag(machineId)); err != nil {
   259  					return err
   260  				}
   261  			}
   262  			if !reconciled {
   263  				reconciled = true
   264  				var err error
   265  				if fw.globalMode {
   266  					err = fw.reconcileGlobal()
   267  				} else {
   268  					err = fw.reconcileInstances()
   269  				}
   270  				if err != nil {
   271  					return errors.Trace(err)
   272  				}
   273  			}
   274  		case change, ok := <-portsChange:
   275  			if !ok {
   276  				return errors.New("ports watcher closed")
   277  			}
   278  			for _, portsGlobalKey := range change {
   279  				machineTag, subnetTag, err := parsePortsKey(portsGlobalKey)
   280  				if err != nil {
   281  					return errors.Trace(err)
   282  				}
   283  				if err := fw.openedPortsChanged(machineTag, subnetTag); err != nil {
   284  					return errors.Trace(err)
   285  				}
   286  			}
   287  		case change, ok := <-fw.remoteRelationsWatcher.Changes():
   288  			if !ok {
   289  				return errors.New("remote relations watcher closed")
   290  			}
   291  			for _, relationKey := range change {
   292  				if err := fw.relationLifeChanged(names.NewRelationTag(relationKey)); err != nil {
   293  					return err
   294  				}
   295  			}
   296  		case change := <-fw.localRelationsChange:
   297  			// We have a notification that the remote (consuming) model
   298  			// has changed egress networks so need to update the local
   299  			// model to allow those networks through the firewall.
   300  			if err := fw.relationIngressChanged(change); err != nil {
   301  				return errors.Trace(err)
   302  			}
   303  		case change := <-fw.unitsChange:
   304  			if err := fw.unitsChanged(change); err != nil {
   305  				return errors.Trace(err)
   306  			}
   307  		case change := <-fw.exposedChange:
   308  			change.applicationd.exposed = change.exposed
   309  			unitds := []*unitData{}
   310  			for _, unitd := range change.applicationd.unitds {
   311  				unitds = append(unitds, unitd)
   312  			}
   313  			if err := fw.flushUnits(unitds); err != nil {
   314  				return errors.Annotate(err, "cannot change firewall ports")
   315  			}
   316  		}
   317  	}
   318  }
   319  
   320  func (fw *Firewaller) relationIngressChanged(change *remoteRelationNetworkChange) error {
   321  	logger.Debugf("process remote relation ingress change for %v", change.relationTag)
   322  	relData, ok := fw.relationIngress[change.relationTag]
   323  	if ok {
   324  		relData.networks = change.networks
   325  		relData.ingressRequired = change.ingressRequired
   326  	}
   327  	appData, ok := fw.applicationids[change.localApplicationTag]
   328  	if !ok {
   329  		logger.Debugf("ignoring unknown application: %v", change.localApplicationTag)
   330  		return nil
   331  	}
   332  	unitds := []*unitData{}
   333  	for _, unitd := range appData.unitds {
   334  		unitds = append(unitds, unitd)
   335  	}
   336  	if err := fw.flushUnits(unitds); err != nil {
   337  		return errors.Annotate(err, "cannot change firewall ports")
   338  	}
   339  	return nil
   340  }
   341  
   342  // startMachine creates a new data value for tracking details of the
   343  // machine and starts watching the machine for units added or removed.
   344  func (fw *Firewaller) startMachine(tag names.MachineTag) error {
   345  	machined := &machineData{
   346  		fw:           fw,
   347  		tag:          tag,
   348  		unitds:       make(map[names.UnitTag]*unitData),
   349  		ingressRules: make([]network.IngressRule, 0),
   350  		definedPorts: make(map[names.UnitTag]portRanges),
   351  	}
   352  	m, err := machined.machine()
   353  	if params.IsCodeNotFound(err) {
   354  		logger.Debugf("not watching %q", tag)
   355  		return nil
   356  	} else if err != nil {
   357  		return errors.Annotate(err, "cannot watch machine units")
   358  	}
   359  	manual, err := m.IsManual()
   360  	if err != nil {
   361  		return errors.Trace(err)
   362  	}
   363  	if manual {
   364  		// Don't track manual machines, we can't change their ports.
   365  		logger.Debugf("not watching manual %q", tag)
   366  		return nil
   367  	}
   368  	unitw, err := m.WatchUnits()
   369  	if err != nil {
   370  		return errors.Trace(err)
   371  	}
   372  	// XXX(fwereade): this is the best of a bunch of bad options. We've started
   373  	// the watch, so we're responsible for it; but we (probably?) need to do this
   374  	// little dance below to update the machined data on the fw loop goroutine,
   375  	// whence it's usually accessed, before we start the machined watchLoop
   376  	// below. That catacomb *should* be the only one responsible -- and it *is*
   377  	// responsible -- but having it in the main fw catacomb as well does no harm,
   378  	// and greatly simplifies the code below (which would otherwise have to
   379  	// manage unitw lifetime and errors manually).
   380  	if err := fw.catacomb.Add(unitw); err != nil {
   381  		return errors.Trace(err)
   382  	}
   383  	select {
   384  	case <-fw.catacomb.Dying():
   385  		return fw.catacomb.ErrDying()
   386  	case change, ok := <-unitw.Changes():
   387  		if !ok {
   388  			return errors.New("machine units watcher closed")
   389  		}
   390  		fw.machineds[tag] = machined
   391  		err = fw.unitsChanged(&unitsChange{machined, change})
   392  		if err != nil {
   393  			delete(fw.machineds, tag)
   394  			return errors.Annotatef(err, "cannot respond to units changes for %q", tag)
   395  		}
   396  	}
   397  
   398  	err = catacomb.Invoke(catacomb.Plan{
   399  		Site: &machined.catacomb,
   400  		Work: func() error {
   401  			return machined.watchLoop(unitw)
   402  		},
   403  	})
   404  	if err != nil {
   405  		delete(fw.machineds, tag)
   406  		return errors.Trace(err)
   407  	}
   408  
   409  	// register the machined with the firewaller's catacomb.
   410  	err = fw.catacomb.Add(machined)
   411  	if err == nil {
   412  		logger.Debugf("started watching %q", tag)
   413  	}
   414  	return err
   415  }
   416  
   417  // startUnit creates a new data value for tracking details of the unit
   418  // The provided machineTag must be the tag for the machine the unit was last
   419  // observed to be assigned to.
   420  func (fw *Firewaller) startUnit(unit *firewaller.Unit, machineTag names.MachineTag) error {
   421  	application, err := unit.Application()
   422  	if err != nil {
   423  		return err
   424  	}
   425  	applicationTag := application.Tag()
   426  	unitTag := unit.Tag()
   427  	if err != nil {
   428  		return err
   429  	}
   430  	unitd := &unitData{
   431  		fw:   fw,
   432  		unit: unit,
   433  		tag:  unitTag,
   434  	}
   435  	fw.unitds[unitTag] = unitd
   436  
   437  	unitd.machined = fw.machineds[machineTag]
   438  	unitd.machined.unitds[unitTag] = unitd
   439  	if fw.applicationids[applicationTag] == nil {
   440  		err := fw.startApplication(application)
   441  		if err != nil {
   442  			delete(fw.unitds, unitTag)
   443  			delete(unitd.machined.unitds, unitTag)
   444  			return err
   445  		}
   446  	}
   447  	unitd.applicationd = fw.applicationids[applicationTag]
   448  	unitd.applicationd.unitds[unitTag] = unitd
   449  
   450  	m, err := unitd.machined.machine()
   451  	if err != nil {
   452  		return err
   453  	}
   454  
   455  	// check if the machine has ports open on any subnets
   456  	subnetTags, err := m.ActiveSubnets()
   457  	if err != nil {
   458  		return errors.Annotatef(err, "failed getting %q active subnets", machineTag)
   459  	}
   460  	for _, subnetTag := range subnetTags {
   461  		err := fw.openedPortsChanged(machineTag, subnetTag)
   462  		if err != nil {
   463  			return err
   464  		}
   465  	}
   466  
   467  	return nil
   468  }
   469  
   470  // startApplication creates a new data value for tracking details of the
   471  // application and starts watching the application for exposure changes.
   472  func (fw *Firewaller) startApplication(app *firewaller.Application) error {
   473  	exposed, err := app.IsExposed()
   474  	if err != nil {
   475  		return err
   476  	}
   477  	applicationd := &applicationData{
   478  		fw:          fw,
   479  		application: app,
   480  		exposed:     exposed,
   481  		unitds:      make(map[names.UnitTag]*unitData),
   482  	}
   483  	fw.applicationids[app.Tag()] = applicationd
   484  
   485  	err = catacomb.Invoke(catacomb.Plan{
   486  		Site: &applicationd.catacomb,
   487  		Work: func() error {
   488  			return applicationd.watchLoop(exposed)
   489  		},
   490  	})
   491  	if err != nil {
   492  		return errors.Trace(err)
   493  	}
   494  	if err := fw.catacomb.Add(applicationd); err != nil {
   495  		return errors.Trace(err)
   496  	}
   497  	return nil
   498  }
   499  
   500  // reconcileGlobal compares the initially started watcher for machines,
   501  // units and applications with the opened and closed ports globally and
   502  // opens and closes the appropriate ports for the whole environment.
   503  func (fw *Firewaller) reconcileGlobal() error {
   504  	var machines []*machineData
   505  	for _, machined := range fw.machineds {
   506  		machines = append(machines, machined)
   507  	}
   508  	want, err := fw.gatherIngressRules(machines...)
   509  	initialPortRanges, err := fw.environFirewaller.IngressRules(fw.cloudCallContext)
   510  	if err != nil {
   511  		return err
   512  	}
   513  
   514  	// Check which ports to open or to close.
   515  	toOpen, toClose := diffRanges(initialPortRanges, want)
   516  	if len(toOpen) > 0 {
   517  		logger.Infof("opening global ports %v", toOpen)
   518  		if err := fw.environFirewaller.OpenPorts(fw.cloudCallContext, toOpen); err != nil {
   519  			return err
   520  		}
   521  	}
   522  	if len(toClose) > 0 {
   523  		logger.Infof("closing global ports %v", toClose)
   524  		if err := fw.environFirewaller.ClosePorts(fw.cloudCallContext, toClose); err != nil {
   525  			return err
   526  		}
   527  	}
   528  	return nil
   529  }
   530  
   531  // reconcileInstances compares the initially started watcher for machines,
   532  // units and applications with the opened and closed ports of the instances and
   533  // opens and closes the appropriate ports for each instance.
   534  func (fw *Firewaller) reconcileInstances() error {
   535  	for _, machined := range fw.machineds {
   536  		m, err := machined.machine()
   537  		if params.IsCodeNotFound(err) {
   538  			if err := fw.forgetMachine(machined); err != nil {
   539  				return err
   540  			}
   541  			continue
   542  		}
   543  		if err != nil {
   544  			return err
   545  		}
   546  		instanceId, err := m.InstanceId()
   547  		if errors.IsNotProvisioned(err) {
   548  			logger.Errorf("Machine not yet provisioned: %v", err)
   549  			continue
   550  		}
   551  		if err != nil {
   552  			return err
   553  		}
   554  		envInstances, err := fw.environInstances.Instances(fw.cloudCallContext, []instance.Id{instanceId})
   555  		if err == environs.ErrNoInstances {
   556  			return nil
   557  		}
   558  		if err != nil {
   559  			return err
   560  		}
   561  		machineId := machined.tag.Id()
   562  
   563  		fwInstance, ok := envInstances[0].(instances.InstanceFirewaller)
   564  		if !ok {
   565  			return nil
   566  		}
   567  
   568  		initialRules, err := fwInstance.IngressRules(fw.cloudCallContext, machineId)
   569  		if err != nil {
   570  			return err
   571  		}
   572  
   573  		// Check which ports to open or to close.
   574  		toOpen, toClose := diffRanges(initialRules, machined.ingressRules)
   575  		if len(toOpen) > 0 {
   576  			logger.Infof("opening instance port ranges %v for %q",
   577  				toOpen, machined.tag)
   578  			if err := fwInstance.OpenPorts(fw.cloudCallContext, machineId, toOpen); err != nil {
   579  				// TODO(mue) Add local retry logic.
   580  				return err
   581  			}
   582  		}
   583  		if len(toClose) > 0 {
   584  			logger.Infof("closing instance port ranges %v for %q",
   585  				toClose, machined.tag)
   586  			if err := fwInstance.ClosePorts(fw.cloudCallContext, machineId, toClose); err != nil {
   587  				// TODO(mue) Add local retry logic.
   588  				return err
   589  			}
   590  		}
   591  	}
   592  	return nil
   593  }
   594  
   595  // unitsChanged responds to changes to the assigned units.
   596  func (fw *Firewaller) unitsChanged(change *unitsChange) error {
   597  	changed := []*unitData{}
   598  	for _, name := range change.units {
   599  		unitTag := names.NewUnitTag(name)
   600  		unit, err := fw.firewallerApi.Unit(unitTag)
   601  		if err != nil && !params.IsCodeNotFound(err) {
   602  			return err
   603  		}
   604  		var machineTag names.MachineTag
   605  		if unit != nil {
   606  			machineTag, err = unit.AssignedMachine()
   607  			if params.IsCodeNotFound(err) {
   608  				continue
   609  			} else if err != nil && !params.IsCodeNotAssigned(err) {
   610  				return err
   611  			}
   612  		}
   613  		if unitd, known := fw.unitds[unitTag]; known {
   614  			knownMachineTag := fw.unitds[unitTag].machined.tag
   615  			if unit == nil || unit.Life() == params.Dead || machineTag != knownMachineTag {
   616  				fw.forgetUnit(unitd)
   617  				changed = append(changed, unitd)
   618  				logger.Debugf("stopped watching unit %s", name)
   619  			}
   620  			// TODO(dfc) fw.machineds should be map[names.Tag]
   621  		} else if unit != nil && unit.Life() != params.Dead && fw.machineds[machineTag] != nil {
   622  			err = fw.startUnit(unit, machineTag)
   623  			if params.IsCodeNotFound(err) {
   624  				continue
   625  			}
   626  			if err != nil {
   627  				return err
   628  			}
   629  			changed = append(changed, fw.unitds[unitTag])
   630  			logger.Debugf("started watching %q", unitTag)
   631  		}
   632  	}
   633  	if err := fw.flushUnits(changed); err != nil {
   634  		return errors.Annotate(err, "cannot change firewall ports")
   635  	}
   636  	return nil
   637  }
   638  
   639  // openedPortsChanged handles port change notifications
   640  func (fw *Firewaller) openedPortsChanged(machineTag names.MachineTag, subnetTag names.SubnetTag) (err error) {
   641  	defer func() {
   642  		if params.IsCodeNotFound(err) {
   643  			err = nil
   644  		}
   645  	}()
   646  	machined, ok := fw.machineds[machineTag]
   647  	if !ok {
   648  		// It is common to receive a port change notification before
   649  		// registering the machine, so if a machine is not found in
   650  		// firewaller's list, just skip the change.  Look up will also
   651  		// fail if it's a manual machine.
   652  		logger.Debugf("failed to lookup %q, skipping port change", machineTag)
   653  		return nil
   654  	}
   655  
   656  	m, err := machined.machine()
   657  	if err != nil {
   658  		return err
   659  	}
   660  
   661  	ports, err := m.OpenedPorts(subnetTag)
   662  	if err != nil {
   663  		return err
   664  	}
   665  
   666  	newPortRanges := make(map[names.UnitTag]portRanges)
   667  	for portRange, unitTag := range ports {
   668  		unitd, ok := machined.unitds[unitTag]
   669  		if !ok {
   670  			// It is common to receive port change notification before
   671  			// registering a unit. Skip handling the port change - it will
   672  			// be handled when the unit is registered.
   673  			logger.Debugf("failed to lookup %q, skipping port change", unitTag)
   674  			return nil
   675  		}
   676  		ranges, ok := newPortRanges[unitd.tag]
   677  		if !ok {
   678  			ranges = make(portRanges)
   679  			newPortRanges[unitd.tag] = ranges
   680  		}
   681  		ranges[portRange] = true
   682  	}
   683  
   684  	if !unitPortsEqual(machined.definedPorts, newPortRanges) {
   685  		machined.definedPorts = newPortRanges
   686  		return fw.flushMachine(machined)
   687  	}
   688  	return nil
   689  }
   690  
   691  func unitPortsEqual(a, b map[names.UnitTag]portRanges) bool {
   692  	if len(a) != len(b) {
   693  		return false
   694  	}
   695  	for key, valueA := range a {
   696  		valueB, exists := b[key]
   697  		if !exists {
   698  			return false
   699  		}
   700  		if !portRangesEqual(valueA, valueB) {
   701  			return false
   702  		}
   703  	}
   704  	return true
   705  }
   706  
   707  func portRangesEqual(a, b portRanges) bool {
   708  	if len(a) != len(b) {
   709  		return false
   710  	}
   711  	for key, valueA := range a {
   712  		valueB, exists := b[key]
   713  		if !exists {
   714  			return false
   715  		}
   716  		if valueA != valueB {
   717  			return false
   718  		}
   719  	}
   720  	return true
   721  }
   722  
   723  // flushUnits opens and closes ports for the passed unit data.
   724  func (fw *Firewaller) flushUnits(unitds []*unitData) error {
   725  	machineds := map[names.MachineTag]*machineData{}
   726  	for _, unitd := range unitds {
   727  		machineds[unitd.machined.tag] = unitd.machined
   728  	}
   729  	for _, machined := range machineds {
   730  		if err := fw.flushMachine(machined); err != nil {
   731  			return err
   732  		}
   733  	}
   734  	return nil
   735  }
   736  
   737  // flushMachine opens and closes ports for the passed machine.
   738  func (fw *Firewaller) flushMachine(machined *machineData) error {
   739  	want, err := fw.gatherIngressRules(machined)
   740  	if err != nil {
   741  		return errors.Trace(err)
   742  	}
   743  	toOpen, toClose := diffRanges(machined.ingressRules, want)
   744  	machined.ingressRules = want
   745  	if fw.globalMode {
   746  		return fw.flushGlobalPorts(toOpen, toClose)
   747  	}
   748  	return fw.flushInstancePorts(machined, toOpen, toClose)
   749  }
   750  
   751  // gatherIngressRules returns the ingress rules to open and close
   752  // for the specified machines.
   753  func (fw *Firewaller) gatherIngressRules(machines ...*machineData) ([]network.IngressRule, error) {
   754  	var want []network.IngressRule
   755  	for _, machined := range machines {
   756  		for unitTag, portRanges := range machined.definedPorts {
   757  			unitd, known := machined.unitds[unitTag]
   758  			if !known {
   759  				logger.Debugf("no ingress rules for unknown %v on %v", unitTag, machined.tag)
   760  				continue
   761  			}
   762  
   763  			cidrs := set.NewStrings()
   764  			// If the unit is exposed, allow access from everywhere.
   765  			if unitd.applicationd.exposed {
   766  				cidrs.Add("0.0.0.0/0")
   767  			} else {
   768  				// Not exposed, so add any ingress rules required by remote relations.
   769  				if err := fw.updateForRemoteRelationIngress(unitd.applicationd.application.Tag(), cidrs); err != nil {
   770  					return nil, errors.Trace(err)
   771  				}
   772  				logger.Debugf("CIDRS for %v: %v", unitTag, cidrs.Values())
   773  			}
   774  			if cidrs.Size() > 0 {
   775  				for portRange := range portRanges {
   776  					sourceCidrs := cidrs.SortedValues()
   777  					rule, err := network.NewIngressRule(portRange.Protocol, portRange.FromPort, portRange.ToPort, sourceCidrs...)
   778  					if err != nil {
   779  						return nil, errors.Trace(err)
   780  					}
   781  					want = append(want, rule)
   782  				}
   783  			}
   784  		}
   785  	}
   786  	return want, nil
   787  }
   788  
   789  // TODO(wallyworld) - consider making this configurable.
   790  const maxAllowedCIDRS = 20
   791  
   792  func (fw *Firewaller) updateForRemoteRelationIngress(appTag names.ApplicationTag, cidrs set.Strings) error {
   793  	logger.Debugf("finding egress rules for %v", appTag)
   794  	// Now create the rules for any remote relations of which the
   795  	// unit's application is a part.
   796  	newCidrs := make(set.Strings)
   797  	for _, data := range fw.relationIngress {
   798  		if data.localApplicationTag != appTag {
   799  			continue
   800  		}
   801  		if !data.ingressRequired {
   802  			continue
   803  		}
   804  		for _, cidr := range data.networks.Values() {
   805  			newCidrs.Add(cidr)
   806  		}
   807  	}
   808  	// If we have too many CIDRs to create a rule for, consolidate.
   809  	// If a firewall rule with a whitelist of CIDRs has been set up,
   810  	// use that, else open to the world.
   811  	if newCidrs.Size() > maxAllowedCIDRS {
   812  		// First, try and merge the cidrs.
   813  		merged, err := cidrman.MergeCIDRs(newCidrs.Values())
   814  		if err != nil {
   815  			return errors.Trace(err)
   816  		}
   817  		newCidrs = set.NewStrings(merged...)
   818  	}
   819  
   820  	// If there's still too many after merging, look for any firewall whitelist.
   821  	if newCidrs.Size() > maxAllowedCIDRS {
   822  		newCidrs = make(set.Strings)
   823  		rules, err := fw.firewallerApi.FirewallRules("juju-application-offer")
   824  		if err != nil {
   825  			return errors.Trace(err)
   826  		}
   827  		if len(rules) > 0 {
   828  			rule := rules[0]
   829  			if len(rule.WhitelistCIDRS) > 0 {
   830  				for _, cidr := range rule.WhitelistCIDRS {
   831  					newCidrs.Add(cidr)
   832  				}
   833  			}
   834  		}
   835  		// No relevant firewall rule exists, so go public.
   836  		if newCidrs.Size() == 0 {
   837  			newCidrs.Add("0.0.0.0/0")
   838  		}
   839  	}
   840  	for _, cidr := range newCidrs.Values() {
   841  		cidrs.Add(cidr)
   842  	}
   843  	return nil
   844  }
   845  
   846  // flushGlobalPorts opens and closes global ports in the environment.
   847  // It keeps a reference count for ports so that only 0-to-1 and 1-to-0 events
   848  // modify the environment.
   849  func (fw *Firewaller) flushGlobalPorts(rawOpen, rawClose []network.IngressRule) error {
   850  	// Filter which ports are really to open or close.
   851  	var toOpen, toClose []network.IngressRule
   852  	for _, rule := range rawOpen {
   853  		ruleName := rule.String()
   854  		if fw.globalIngressRuleRef[ruleName] == 0 {
   855  			toOpen = append(toOpen, rule)
   856  		}
   857  		fw.globalIngressRuleRef[ruleName]++
   858  	}
   859  	for _, rule := range rawClose {
   860  		ruleName := rule.String()
   861  		fw.globalIngressRuleRef[ruleName]--
   862  		if fw.globalIngressRuleRef[ruleName] == 0 {
   863  			toClose = append(toClose, rule)
   864  			delete(fw.globalIngressRuleRef, ruleName)
   865  		}
   866  	}
   867  	// Open and close the ports.
   868  	if len(toOpen) > 0 {
   869  		if err := fw.environFirewaller.OpenPorts(fw.cloudCallContext, toOpen); err != nil {
   870  			// TODO(mue) Add local retry logic.
   871  			return err
   872  		}
   873  		network.SortIngressRules(toOpen)
   874  		logger.Infof("opened port ranges %v in environment", toOpen)
   875  	}
   876  	if len(toClose) > 0 {
   877  		if err := fw.environFirewaller.ClosePorts(fw.cloudCallContext, toClose); err != nil {
   878  			// TODO(mue) Add local retry logic.
   879  			return err
   880  		}
   881  		network.SortIngressRules(toClose)
   882  		logger.Infof("closed port ranges %v in environment", toClose)
   883  	}
   884  	return nil
   885  }
   886  
   887  // flushInstancePorts opens and closes ports global on the machine.
   888  func (fw *Firewaller) flushInstancePorts(machined *machineData, toOpen, toClose []network.IngressRule) (err error) {
   889  	defer func() {
   890  		if params.IsCodeNotFound(err) {
   891  			err = nil
   892  		}
   893  	}()
   894  
   895  	// If there's nothing to do, do nothing.
   896  	// This is important because when a machine is first created,
   897  	// it will have no instance id but also no open ports -
   898  	// InstanceId will fail but we don't care.
   899  	logger.Debugf("flush instance ports: to open %v, to close %v", toOpen, toClose)
   900  	if len(toOpen) == 0 && len(toClose) == 0 {
   901  		return nil
   902  	}
   903  	m, err := machined.machine()
   904  	if err != nil {
   905  		return err
   906  	}
   907  	machineId := machined.tag.Id()
   908  	instanceId, err := m.InstanceId()
   909  	if params.IsCodeNotProvisioned(err) {
   910  		// Not provisioned yet, so nothing to do for this instance
   911  		return nil
   912  	}
   913  	if err != nil {
   914  		return err
   915  	}
   916  	envInstances, err := fw.environInstances.Instances(fw.cloudCallContext, []instance.Id{instanceId})
   917  	if err != nil {
   918  		return err
   919  	}
   920  	fwInstance, ok := envInstances[0].(instances.InstanceFirewaller)
   921  	if !ok {
   922  		logger.Infof("flushInstancePorts called on an instance of type %T which doesn't support firewall.", envInstances[0])
   923  		return nil
   924  	}
   925  
   926  	// Open and close the ports.
   927  	if len(toOpen) > 0 {
   928  		if err := fwInstance.OpenPorts(fw.cloudCallContext, machineId, toOpen); err != nil {
   929  			// TODO(mue) Add local retry logic.
   930  			return err
   931  		}
   932  		network.SortIngressRules(toOpen)
   933  		logger.Infof("opened port ranges %v on %q", toOpen, machined.tag)
   934  	}
   935  	if len(toClose) > 0 {
   936  		if err := fwInstance.ClosePorts(fw.cloudCallContext, machineId, toClose); err != nil {
   937  			// TODO(mue) Add local retry logic.
   938  			return err
   939  		}
   940  		network.SortIngressRules(toClose)
   941  		logger.Infof("closed port ranges %v on %q", toClose, machined.tag)
   942  	}
   943  	return nil
   944  }
   945  
   946  // machineLifeChanged starts watching new machines when the firewaller
   947  // is starting, or when new machines come to life, and stops watching
   948  // machines that are dying.
   949  func (fw *Firewaller) machineLifeChanged(tag names.MachineTag) error {
   950  	m, err := fw.firewallerApi.Machine(tag)
   951  	found := !params.IsCodeNotFound(err)
   952  	if found && err != nil {
   953  		return err
   954  	}
   955  	dead := !found || m.Life() == params.Dead
   956  	machined, known := fw.machineds[tag]
   957  	if known && dead {
   958  		return fw.forgetMachine(machined)
   959  	}
   960  	if !known && !dead {
   961  		err := fw.startMachine(tag)
   962  		if err != nil {
   963  			return err
   964  		}
   965  	}
   966  	return nil
   967  }
   968  
   969  // forgetMachine cleans the machine data after the machine is removed.
   970  func (fw *Firewaller) forgetMachine(machined *machineData) error {
   971  	for _, unitd := range machined.unitds {
   972  		fw.forgetUnit(unitd)
   973  	}
   974  	if err := fw.flushMachine(machined); err != nil {
   975  		return errors.Trace(err)
   976  	}
   977  
   978  	// Unusually, it's fine to ignore this error, because we know the machined
   979  	// is being tracked in fw.catacomb. But we do still want to wait until the
   980  	// watch loop has stopped before we nuke the last data and return.
   981  	_ = worker.Stop(machined)
   982  	delete(fw.machineds, machined.tag)
   983  	logger.Debugf("stopped watching %q", machined.tag)
   984  	return nil
   985  }
   986  
   987  // forgetUnit cleans the unit data after the unit is removed.
   988  func (fw *Firewaller) forgetUnit(unitd *unitData) {
   989  	applicationd := unitd.applicationd
   990  	machined := unitd.machined
   991  
   992  	// If it's the last unit in the application, we'll need to stop the applicationd.
   993  	stoppedApplication := false
   994  	if len(applicationd.unitds) == 1 {
   995  		if _, found := applicationd.unitds[unitd.tag]; found {
   996  			// Unusually, it's fine to ignore this error, because we know the
   997  			// applicationd is being tracked in fw.catacomb. But we do still want
   998  			// to wait until the watch loop has stopped before we nuke the last
   999  			// data and return.
  1000  			_ = worker.Stop(applicationd)
  1001  			stoppedApplication = true
  1002  		}
  1003  	}
  1004  
  1005  	// Clean up after stopping.
  1006  	delete(fw.unitds, unitd.tag)
  1007  	delete(machined.unitds, unitd.tag)
  1008  	delete(applicationd.unitds, unitd.tag)
  1009  	logger.Debugf("stopped watching %q", unitd.tag)
  1010  	if stoppedApplication {
  1011  		applicationTag := applicationd.application.Tag()
  1012  		delete(fw.applicationids, applicationTag)
  1013  		logger.Debugf("stopped watching %q", applicationTag)
  1014  	}
  1015  }
  1016  
  1017  // Kill is part of the worker.Worker interface.
  1018  func (fw *Firewaller) Kill() {
  1019  	fw.catacomb.Kill(nil)
  1020  }
  1021  
  1022  // Wait is part of the worker.Worker interface.
  1023  func (fw *Firewaller) Wait() error {
  1024  	return fw.catacomb.Wait()
  1025  }
  1026  
  1027  // unitsChange contains the changed units for one specific machine.
  1028  type unitsChange struct {
  1029  	machined *machineData
  1030  	units    []string
  1031  }
  1032  
  1033  // machineData holds machine details and watches units added or removed.
  1034  type machineData struct {
  1035  	catacomb     catacomb.Catacomb
  1036  	fw           *Firewaller
  1037  	tag          names.MachineTag
  1038  	unitds       map[names.UnitTag]*unitData
  1039  	ingressRules []network.IngressRule
  1040  	// ports defined by units on this machine
  1041  	definedPorts map[names.UnitTag]portRanges
  1042  }
  1043  
  1044  func (md *machineData) machine() (*firewaller.Machine, error) {
  1045  	return md.fw.firewallerApi.Machine(md.tag)
  1046  }
  1047  
  1048  // watchLoop watches the machine for units added or removed.
  1049  func (md *machineData) watchLoop(unitw watcher.StringsWatcher) error {
  1050  	if err := md.catacomb.Add(unitw); err != nil {
  1051  		return errors.Trace(err)
  1052  	}
  1053  	for {
  1054  		select {
  1055  		case <-md.catacomb.Dying():
  1056  			return md.catacomb.ErrDying()
  1057  		case change, ok := <-unitw.Changes():
  1058  			if !ok {
  1059  				return errors.New("machine units watcher closed")
  1060  			}
  1061  			select {
  1062  			case <-md.catacomb.Dying():
  1063  				return md.catacomb.ErrDying()
  1064  			case md.fw.unitsChange <- &unitsChange{md, change}:
  1065  			}
  1066  		}
  1067  	}
  1068  }
  1069  
  1070  // Kill is part of the worker.Worker interface.
  1071  func (md *machineData) Kill() {
  1072  	md.catacomb.Kill(nil)
  1073  }
  1074  
  1075  // Wait is part of the worker.Worker interface.
  1076  func (md *machineData) Wait() error {
  1077  	return md.catacomb.Wait()
  1078  }
  1079  
  1080  // unitData holds unit details.
  1081  type unitData struct {
  1082  	fw           *Firewaller
  1083  	tag          names.UnitTag
  1084  	unit         *firewaller.Unit
  1085  	applicationd *applicationData
  1086  	machined     *machineData
  1087  }
  1088  
  1089  // exposedChange contains the changed exposed flag for one specific application.
  1090  type exposedChange struct {
  1091  	applicationd *applicationData
  1092  	exposed      bool
  1093  }
  1094  
  1095  // applicationData holds application details and watches exposure changes.
  1096  type applicationData struct {
  1097  	catacomb    catacomb.Catacomb
  1098  	fw          *Firewaller
  1099  	application *firewaller.Application
  1100  	exposed     bool
  1101  	unitds      map[names.UnitTag]*unitData
  1102  }
  1103  
  1104  // watchLoop watches the application's exposed flag for changes.
  1105  func (ad *applicationData) watchLoop(exposed bool) error {
  1106  	appWatcher, err := ad.application.Watch()
  1107  	if err != nil {
  1108  		if params.IsCodeNotFound(err) {
  1109  			return nil
  1110  		}
  1111  		return errors.Trace(err)
  1112  	}
  1113  	if err := ad.catacomb.Add(appWatcher); err != nil {
  1114  		return errors.Trace(err)
  1115  	}
  1116  	for {
  1117  		select {
  1118  		case <-ad.catacomb.Dying():
  1119  			return ad.catacomb.ErrDying()
  1120  		case _, ok := <-appWatcher.Changes():
  1121  			if !ok {
  1122  				return errors.New("application watcher closed")
  1123  			}
  1124  			change, err := ad.application.IsExposed()
  1125  			if err != nil {
  1126  				if errors.IsNotFound(err) {
  1127  					logger.Debugf("application(%q).IsExposed() returned NotFound: %v", ad.application.Name(), err)
  1128  					return nil
  1129  				}
  1130  				return errors.Trace(err)
  1131  			}
  1132  			if change == exposed {
  1133  				logger.Tracef("application(%q).IsExposed() == %v (unchanged)", ad.application.Name(), exposed)
  1134  				continue
  1135  			}
  1136  			logger.Tracef("application(%q).IsExposed() changed %v => %v", ad.application.Name(), exposed, change)
  1137  
  1138  			exposed = change
  1139  			select {
  1140  			case <-ad.catacomb.Dying():
  1141  				return ad.catacomb.ErrDying()
  1142  			case ad.fw.exposedChange <- &exposedChange{ad, change}:
  1143  			}
  1144  		}
  1145  	}
  1146  }
  1147  
  1148  // Kill is part of the worker.Worker interface.
  1149  func (ad *applicationData) Kill() {
  1150  	ad.catacomb.Kill(nil)
  1151  }
  1152  
  1153  // Wait is part of the worker.Worker interface.
  1154  func (ad *applicationData) Wait() error {
  1155  	return ad.catacomb.Wait()
  1156  }
  1157  
  1158  // parsePortsKey parses a ports document global key coming from the ports
  1159  // watcher (e.g. "42:0.1.2.0/24") and returns the machine and subnet tags from
  1160  // its components (in the last example "machine-42" and "subnet-0.1.2.0/24").
  1161  func parsePortsKey(change string) (machineTag names.MachineTag, subnetTag names.SubnetTag, err error) {
  1162  	defer errors.DeferredAnnotatef(&err, "invalid ports change %q", change)
  1163  
  1164  	parts := strings.SplitN(change, ":", 2)
  1165  	if len(parts) != 2 {
  1166  		return names.MachineTag{}, names.SubnetTag{}, errors.Errorf("unexpected format")
  1167  	}
  1168  	machineID, subnetID := parts[0], parts[1]
  1169  
  1170  	machineTag = names.NewMachineTag(machineID)
  1171  	if subnetID != "" {
  1172  		subnetTag = names.NewSubnetTag(subnetID)
  1173  	}
  1174  	return machineTag, subnetTag, nil
  1175  }
  1176  
  1177  func diffRanges(currentRules, wantedRules []network.IngressRule) (toOpen, toClose []network.IngressRule) {
  1178  	portCidrs := func(rules []network.IngressRule) map[corenetwork.PortRange]set.Strings {
  1179  		result := make(map[corenetwork.PortRange]set.Strings)
  1180  		for _, rule := range rules {
  1181  			cidrs, ok := result[rule.PortRange]
  1182  			if !ok {
  1183  				cidrs = set.NewStrings()
  1184  				result[rule.PortRange] = cidrs
  1185  			}
  1186  			ruleCidrs := rule.SourceCIDRs
  1187  			if len(ruleCidrs) == 0 {
  1188  				ruleCidrs = []string{"0.0.0.0/0"}
  1189  			}
  1190  			for _, cidr := range ruleCidrs {
  1191  				cidrs.Add(cidr)
  1192  			}
  1193  		}
  1194  		return result
  1195  	}
  1196  
  1197  	currentPortCidrs := portCidrs(currentRules)
  1198  	wantedPortCidrs := portCidrs(wantedRules)
  1199  	for portRange, wantedCidrs := range wantedPortCidrs {
  1200  		existingCidrs, ok := currentPortCidrs[portRange]
  1201  
  1202  		// If the wanted port range doesn't exist at all, the entire rule is to be opened.
  1203  		if !ok {
  1204  			rule := network.IngressRule{PortRange: portRange, SourceCIDRs: wantedCidrs.SortedValues()}
  1205  			toOpen = append(toOpen, rule)
  1206  			continue
  1207  		}
  1208  
  1209  		// Figure out the difference between CIDRs to get the rules to open/close.
  1210  		toOpenCidrs := wantedCidrs.Difference(existingCidrs)
  1211  		if toOpenCidrs.Size() > 0 {
  1212  			rule := network.IngressRule{PortRange: portRange, SourceCIDRs: toOpenCidrs.SortedValues()}
  1213  			toOpen = append(toOpen, rule)
  1214  		}
  1215  		toCloseCidrs := existingCidrs.Difference(wantedCidrs)
  1216  		if toCloseCidrs.Size() > 0 {
  1217  			rule := network.IngressRule{PortRange: portRange, SourceCIDRs: toCloseCidrs.SortedValues()}
  1218  			toClose = append(toClose, rule)
  1219  		}
  1220  	}
  1221  
  1222  	for portRange, currentCidrs := range currentPortCidrs {
  1223  		// If a current port range doesn't exist at all in the wanted set, the entire rule is to be closed.
  1224  		if _, ok := wantedPortCidrs[portRange]; !ok {
  1225  			rule := network.IngressRule{PortRange: portRange, SourceCIDRs: currentCidrs.SortedValues()}
  1226  			toClose = append(toClose, rule)
  1227  		}
  1228  	}
  1229  	network.SortIngressRules(toOpen)
  1230  	network.SortIngressRules(toClose)
  1231  	return toOpen, toClose
  1232  }
  1233  
  1234  // relationLifeChanged manages the workers to process ingress changes for
  1235  // the specified relation.
  1236  func (fw *Firewaller) relationLifeChanged(tag names.RelationTag) error {
  1237  	results, err := fw.remoteRelationsApi.Relations([]string{tag.Id()})
  1238  	if err != nil {
  1239  		return errors.Trace(err)
  1240  	}
  1241  	relErr := results[0].Error
  1242  	notfound := relErr != nil && params.IsCodeNotFound(relErr)
  1243  	if relErr != nil && !notfound {
  1244  		return err
  1245  	}
  1246  	rel := results[0].Result
  1247  
  1248  	gone := notfound || rel.Life == params.Dead || rel.Suspended
  1249  	data, known := fw.relationIngress[tag]
  1250  	if known && gone {
  1251  		logger.Debugf("relation %v was known but has died or been suspended", tag.Id())
  1252  		// If relation is suspended, shut off ingress immediately.
  1253  		// Units will also eventually leave scope which would cause
  1254  		// ingress to be shut off, but best to do it up front.
  1255  		if rel != nil && rel.Suspended {
  1256  			change := &remoteRelationNetworkChange{
  1257  				relationTag:         tag,
  1258  				localApplicationTag: data.localApplicationTag,
  1259  				ingressRequired:     false,
  1260  			}
  1261  			if err := fw.relationIngressChanged(change); err != nil {
  1262  				return errors.Trace(err)
  1263  			}
  1264  		}
  1265  		return fw.forgetRelation(data)
  1266  	}
  1267  	if !known && !gone {
  1268  		err := fw.startRelation(rel, rel.Endpoint.Role)
  1269  		if err != nil {
  1270  			return err
  1271  		}
  1272  	}
  1273  	return nil
  1274  }
  1275  
  1276  type remoteRelationInfo struct {
  1277  	relationToken    string
  1278  	applicationToken string
  1279  }
  1280  
  1281  type remoteRelationData struct {
  1282  	catacomb      catacomb.Catacomb
  1283  	fw            *Firewaller
  1284  	relationReady chan remoteRelationInfo
  1285  
  1286  	tag                 names.RelationTag
  1287  	localApplicationTag names.ApplicationTag
  1288  	relationToken       string
  1289  	applicationToken    string
  1290  	remoteModelUUID     string
  1291  	endpointRole        charm.RelationRole
  1292  	isOffer             bool
  1293  
  1294  	crossModelFirewallerFacade CrossModelFirewallerFacadeCloser
  1295  
  1296  	// These values are updated when ingress information on the
  1297  	// relation changes in the model.
  1298  	ingressRequired bool
  1299  	networks        set.Strings
  1300  }
  1301  
  1302  // startRelation creates a new data value for tracking details of the
  1303  // relation and starts watching the related models for subnets added or removed.
  1304  func (fw *Firewaller) startRelation(rel *params.RemoteRelation, role charm.RelationRole) error {
  1305  	remoteApps, err := fw.remoteRelationsApi.RemoteApplications([]string{rel.RemoteApplicationName})
  1306  	if err != nil {
  1307  		return errors.Trace(err)
  1308  	}
  1309  	remoteAppResult := remoteApps[0]
  1310  	if remoteAppResult.Error != nil {
  1311  		return errors.Trace(err)
  1312  	}
  1313  
  1314  	tag := names.NewRelationTag(rel.Key)
  1315  	data := &remoteRelationData{
  1316  		fw:                  fw,
  1317  		tag:                 tag,
  1318  		remoteModelUUID:     rel.SourceModelUUID,
  1319  		localApplicationTag: names.NewApplicationTag(rel.ApplicationName),
  1320  		endpointRole:        role,
  1321  		relationReady:       make(chan remoteRelationInfo),
  1322  	}
  1323  
  1324  	// Start the worker which will watch the remote relation for things like new networks.
  1325  	if err := fw.relationWorkerRunner.StartWorker(tag.Id(), func() (worker.Worker, error) {
  1326  		if err := catacomb.Invoke(catacomb.Plan{
  1327  			Site: &data.catacomb,
  1328  			Work: data.watchLoop,
  1329  		}); err != nil {
  1330  			return nil, errors.Trace(err)
  1331  		}
  1332  		return data, nil
  1333  	}); err != nil {
  1334  		return errors.Annotate(err, "error starting remote relation worker")
  1335  	}
  1336  	fw.relationIngress[tag] = data
  1337  
  1338  	data.isOffer = remoteAppResult.Result.IsConsumerProxy
  1339  	return fw.startRelationPoller(rel.Key, rel.RemoteApplicationName, data.relationReady)
  1340  }
  1341  
  1342  // watchLoop watches the relation for networks added or removed.
  1343  func (rd *remoteRelationData) watchLoop() error {
  1344  	defer func() {
  1345  		if rd.crossModelFirewallerFacade != nil {
  1346  			rd.crossModelFirewallerFacade.Close()
  1347  		}
  1348  	}()
  1349  
  1350  	// First, wait for relation to become ready.
  1351  	for rd.relationToken == "" {
  1352  		select {
  1353  		case <-rd.catacomb.Dying():
  1354  			return rd.catacomb.ErrDying()
  1355  		case remoteRelationInfo := <-rd.relationReady:
  1356  			rd.relationToken = remoteRelationInfo.relationToken
  1357  			rd.applicationToken = remoteRelationInfo.applicationToken
  1358  			logger.Debugf(
  1359  				"relation %v for remote app %v in model %v is ready",
  1360  				rd.relationToken, rd.applicationToken, rd.remoteModelUUID)
  1361  		}
  1362  	}
  1363  
  1364  	if rd.endpointRole == charm.RoleRequirer {
  1365  		return rd.requirerEndpointLoop()
  1366  	}
  1367  	return rd.providerEndpointLoop()
  1368  }
  1369  
  1370  func (rd *remoteRelationData) requirerEndpointLoop() error {
  1371  	// If the requirer end of the relation is on the offering model,
  1372  	// there's nothing to do here because the provider end on the
  1373  	// consuming model will be watching for changes.
  1374  	// TODO(wallyworld) - this will change if we want to allow bidirectional traffic.
  1375  	if rd.isOffer {
  1376  		return nil
  1377  	}
  1378  
  1379  	logger.Debugf("starting requirer endpoint loop for %v on %v ", rd.tag.Id(), rd.localApplicationTag.Id())
  1380  	// Now watch for updates to egress addresses so we can inform the offering
  1381  	// model what firewall ingress to allow.
  1382  	egressAddressWatcher, err := rd.fw.firewallerApi.WatchEgressAddressesForRelation(rd.tag)
  1383  	if err != nil {
  1384  		if !params.IsCodeNotFound(err) && !params.IsCodeNotSupported(err) {
  1385  			return errors.Trace(err)
  1386  		}
  1387  		logger.Infof("no egress required for %v", rd.localApplicationTag)
  1388  		rd.ingressRequired = false
  1389  		return nil
  1390  	}
  1391  	if err := rd.catacomb.Add(egressAddressWatcher); err != nil {
  1392  		return errors.Trace(err)
  1393  	}
  1394  	for {
  1395  		select {
  1396  		case <-rd.catacomb.Dying():
  1397  			return rd.catacomb.ErrDying()
  1398  		case cidrs := <-egressAddressWatcher.Changes():
  1399  			logger.Debugf("relation egress addresses for %v changed in model %v: %v", rd.tag, rd.fw.modelUUID, cidrs)
  1400  			if err := rd.updateProviderModel(cidrs); err != nil {
  1401  				return errors.Trace(err)
  1402  			}
  1403  		}
  1404  	}
  1405  }
  1406  
  1407  func (rd *remoteRelationData) providerEndpointLoop() error {
  1408  	logger.Debugf("starting provider endpoint loop for %v on %v ", rd.tag.Id(), rd.localApplicationTag.Id())
  1409  	// Watch for ingress changes requested by the consuming model.
  1410  	ingressAddressWatcher, err := rd.ingressAddressWatcher()
  1411  	if err != nil {
  1412  		if !params.IsCodeNotFound(err) && !params.IsCodeNotSupported(err) {
  1413  			return errors.Trace(err)
  1414  		}
  1415  		logger.Infof("no ingress required for %v", rd.localApplicationTag)
  1416  		rd.ingressRequired = false
  1417  		return nil
  1418  	}
  1419  	if err := rd.catacomb.Add(ingressAddressWatcher); err != nil {
  1420  		return errors.Trace(err)
  1421  	}
  1422  	for {
  1423  		select {
  1424  		case <-rd.catacomb.Dying():
  1425  			return rd.catacomb.ErrDying()
  1426  		case cidrs := <-ingressAddressWatcher.Changes():
  1427  			logger.Debugf("relation ingress addresses for %v changed in model %v: %v", rd.tag, rd.fw.modelUUID, cidrs)
  1428  			if err := rd.updateIngressNetworks(cidrs); err != nil {
  1429  				return errors.Trace(err)
  1430  			}
  1431  		}
  1432  	}
  1433  }
  1434  
  1435  func (rd *remoteRelationData) ingressAddressWatcher() (watcher.StringsWatcher, error) {
  1436  	if rd.isOffer {
  1437  		// On the offering side we watch the local model for ingress changes
  1438  		// which will have been published from the consuming model.
  1439  		return rd.fw.firewallerApi.WatchIngressAddressesForRelation(rd.tag)
  1440  	} else {
  1441  		// On the consuming side, if this is the provider end of the relation,
  1442  		// we watch the remote model's egress changes to get our ingress changes.
  1443  		apiInfo, err := rd.fw.firewallerApi.ControllerAPIInfoForModel(rd.remoteModelUUID)
  1444  		if err != nil {
  1445  			return nil, errors.Annotatef(err, "cannot get api info for model %v", rd.remoteModelUUID)
  1446  		}
  1447  		rd.crossModelFirewallerFacade, err = rd.fw.newRemoteFirewallerAPIFunc(apiInfo)
  1448  		if err != nil {
  1449  			return nil, errors.Annotate(err, "cannot open facade to remote model to watch ingress addresses")
  1450  		}
  1451  
  1452  		mac, err := rd.fw.firewallerApi.MacaroonForRelation(rd.tag.Id())
  1453  		if err != nil {
  1454  			return nil, errors.Annotatef(err, "cannot get macaroon for %v", rd.tag.Id())
  1455  		}
  1456  		arg := params.RemoteEntityArg{
  1457  			Token:     rd.relationToken,
  1458  			Macaroons: macaroon.Slice{mac},
  1459  		}
  1460  		return rd.crossModelFirewallerFacade.WatchEgressAddressesForRelation(arg)
  1461  	}
  1462  }
  1463  
  1464  type remoteRelationNetworkChange struct {
  1465  	relationTag         names.RelationTag
  1466  	localApplicationTag names.ApplicationTag
  1467  	networks            set.Strings
  1468  	ingressRequired     bool
  1469  }
  1470  
  1471  // updateProviderModel gathers the ingress CIDRs for the relation and notifies
  1472  // that a change has occurred.
  1473  func (rd *remoteRelationData) updateProviderModel(cidrs []string) error {
  1474  	logger.Debugf("ingress cidrs for %v: %+v", rd.tag, cidrs)
  1475  	change := &remoteRelationNetworkChange{
  1476  		relationTag:         rd.tag,
  1477  		localApplicationTag: rd.localApplicationTag,
  1478  		networks:            set.NewStrings(cidrs...),
  1479  		ingressRequired:     len(cidrs) > 0,
  1480  	}
  1481  
  1482  	apiInfo, err := rd.fw.firewallerApi.ControllerAPIInfoForModel(rd.remoteModelUUID)
  1483  	if err != nil {
  1484  		return errors.Annotatef(err, "cannot get api info for model %v", rd.remoteModelUUID)
  1485  	}
  1486  	mac, err := rd.fw.firewallerApi.MacaroonForRelation(rd.tag.Id())
  1487  	if params.IsCodeNotFound(err) {
  1488  		// Relation has gone, nothing to do.
  1489  		return nil
  1490  	}
  1491  	if err != nil {
  1492  		return errors.Annotatef(err, "cannot get macaroon for %v", rd.tag.Id())
  1493  	}
  1494  	remoteModelAPI, err := rd.fw.newRemoteFirewallerAPIFunc(apiInfo)
  1495  	if err != nil {
  1496  		return errors.Annotate(err, "cannot open facade to remote model to publish network change")
  1497  	}
  1498  	defer remoteModelAPI.Close()
  1499  	event := params.IngressNetworksChangeEvent{
  1500  		RelationToken:    rd.relationToken,
  1501  		ApplicationToken: rd.applicationToken,
  1502  		Networks:         change.networks.Values(),
  1503  		IngressRequired:  change.ingressRequired,
  1504  		Macaroons:        macaroon.Slice{mac},
  1505  	}
  1506  	err = remoteModelAPI.PublishIngressNetworkChange(event)
  1507  	if errors.IsNotFound(err) {
  1508  		logger.Debugf("relation id not found publishing %+v", event)
  1509  		return nil
  1510  	}
  1511  
  1512  	// If the requested ingress is not permitted on the offering side,
  1513  	// mark the relation as in error. It's not an error that requires a
  1514  	// worker restart though.
  1515  	if params.IsCodeForbidden(err) {
  1516  		return rd.fw.firewallerApi.SetRelationStatus(rd.tag.Id(), relation.Error, err.Error())
  1517  	}
  1518  	return errors.Annotate(err, "cannot publish ingress network change")
  1519  }
  1520  
  1521  // updateIngressNetworks processes the changed ingress networks on the relation.
  1522  func (rd *remoteRelationData) updateIngressNetworks(cidrs []string) error {
  1523  	logger.Debugf("ingress cidrs for %v: %+v", rd.tag, cidrs)
  1524  	change := &remoteRelationNetworkChange{
  1525  		relationTag:         rd.tag,
  1526  		localApplicationTag: rd.localApplicationTag,
  1527  		networks:            set.NewStrings(cidrs...),
  1528  		ingressRequired:     len(cidrs) > 0,
  1529  	}
  1530  	select {
  1531  	case <-rd.catacomb.Dying():
  1532  		return rd.catacomb.ErrDying()
  1533  	case rd.fw.localRelationsChange <- change:
  1534  	}
  1535  	return nil
  1536  }
  1537  
  1538  // Kill is part of the worker.Worker interface.
  1539  func (rd *remoteRelationData) Kill() {
  1540  	rd.catacomb.Kill(nil)
  1541  }
  1542  
  1543  // Wait is part of the worker.Worker interface.
  1544  func (rd *remoteRelationData) Wait() error {
  1545  	return rd.catacomb.Wait()
  1546  }
  1547  
  1548  // forgetRelation cleans the relation data after the relation is removed.
  1549  func (fw *Firewaller) forgetRelation(data *remoteRelationData) error {
  1550  	logger.Debugf("forget relation %v", data.tag.Id())
  1551  	delete(fw.relationIngress, data.tag)
  1552  	// There's not much we can do if there's an error stopping the remote
  1553  	// relation worker, so just log it.
  1554  	if err := fw.relationWorkerRunner.StopWorker(data.tag.Id()); err != nil {
  1555  		logger.Errorf("error stopping remote relation worker for %s: %v", data.tag, err)
  1556  	}
  1557  	logger.Debugf("stopped watching %q", data.tag)
  1558  	return nil
  1559  }
  1560  
  1561  type remoteRelationPoller struct {
  1562  	catacomb       catacomb.Catacomb
  1563  	fw             *Firewaller
  1564  	relationTag    names.RelationTag
  1565  	applicationTag names.ApplicationTag
  1566  	relationReady  chan remoteRelationInfo
  1567  }
  1568  
  1569  // startRelationPoller creates a new worker which waits until a remote
  1570  // relation is registered in both models.
  1571  func (fw *Firewaller) startRelationPoller(relationKey, remoteAppName string, relationReady chan remoteRelationInfo) error {
  1572  	poller := &remoteRelationPoller{
  1573  		fw:             fw,
  1574  		relationTag:    names.NewRelationTag(relationKey),
  1575  		applicationTag: names.NewApplicationTag(remoteAppName),
  1576  		relationReady:  relationReady,
  1577  	}
  1578  
  1579  	err := catacomb.Invoke(catacomb.Plan{
  1580  		Site: &poller.catacomb,
  1581  		Work: poller.pollLoop,
  1582  	})
  1583  	if err != nil {
  1584  		return errors.Trace(err)
  1585  	}
  1586  
  1587  	// register poller with the firewaller's catacomb.
  1588  	return fw.catacomb.Add(poller)
  1589  }
  1590  
  1591  // pollLoop waits for a remote relation to be registered.
  1592  // It does this by waiting for the relation and app tokens to be created.
  1593  func (p *remoteRelationPoller) pollLoop() error {
  1594  	logger.Debugf("polling for relation %v on %v to be ready", p.relationTag, p.applicationTag)
  1595  	for {
  1596  		select {
  1597  		case <-p.catacomb.Dying():
  1598  			return p.catacomb.ErrDying()
  1599  		case <-p.fw.pollClock.After(3 * time.Second):
  1600  			// Relation is exported with the consuming model UUID.
  1601  			relToken, err := p.fw.remoteRelationsApi.GetToken(p.relationTag)
  1602  			if err != nil {
  1603  				continue
  1604  			}
  1605  			logger.Debugf("token %v for relation id: %v in model %v", relToken, p.relationTag.Id(), p.fw.modelUUID)
  1606  
  1607  			// Application is exported with the offering model UUID.
  1608  			appToken, err := p.fw.remoteRelationsApi.GetToken(p.applicationTag)
  1609  			if err != nil {
  1610  				continue
  1611  			}
  1612  			logger.Debugf("token %v for application id: %v", appToken, p.applicationTag.Id())
  1613  
  1614  			// relation and application are ready.
  1615  			relationInfo := remoteRelationInfo{
  1616  				relationToken:    relToken,
  1617  				applicationToken: appToken,
  1618  			}
  1619  			select {
  1620  			case <-p.catacomb.Dying():
  1621  				return p.catacomb.ErrDying()
  1622  			case p.relationReady <- relationInfo:
  1623  			}
  1624  			return nil
  1625  		}
  1626  	}
  1627  }
  1628  
  1629  // Kill is part of the worker.Worker interface.
  1630  func (p *remoteRelationPoller) Kill() {
  1631  	p.catacomb.Kill(nil)
  1632  }
  1633  
  1634  // Wait is part of the worker.Worker interface.
  1635  func (p *remoteRelationPoller) Wait() error {
  1636  	return p.catacomb.Wait()
  1637  }