github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/model/v1beta1/network.go (about)

     1  package v1beta1
     2  
     3  import (
     4  	"fmt"
     5  	"net"
     6  	"regexp"
     7  	"strings"
     8  
     9  	"go.uber.org/multierr"
    10  	"golang.org/x/exp/slices"
    11  )
    12  
    13  //go:generate stringer -type=Network --trimprefix=Network
    14  type Network int
    15  
    16  const (
    17  	// NetworkNone specifies that the job does not require networking.
    18  	NetworkNone Network = iota
    19  
    20  	// NetworkFull specifies that the job requires unfiltered raw IP networking.
    21  	NetworkFull
    22  
    23  	// NetworkHTTP specifies that the job requires HTTP networking to certain domains.
    24  	//
    25  	// The model is: the job specifier submits a job with the domain(s) it will
    26  	// need to communicate with, the compute provider uses this to make some
    27  	// decision about the risk of the job and bids accordingly, and then at run
    28  	// time the traffic is limited to only the domain(s) specified.
    29  	//
    30  	// As a command, something like:
    31  	//
    32  	//  bacalhau docker run —network=http —domain=crates.io —domain=github.com -v Qmy1234myd4t4:/code rust/compile
    33  	//
    34  	// The “risk” for the compute provider is that the job does something that
    35  	// violates its terms, the terms of its hosting provider or ISP, or even the
    36  	// law in its jurisdiction (e.g. accessing and spreading illegal content,
    37  	// performing cyberattacks). So the same sort of risk as operating a Tor
    38  	// exit node.
    39  	//
    40  	// The risk for the job specifier is that we are operating in an environment
    41  	// they are paying for, so there is an incentive to hijack that environment
    42  	// (e.g. via a compromised package download that runs a crypto miner on
    43  	// install, and uses up all the paid-for job time). Having the traffic
    44  	// enforced to only domains specified makes those sorts of attacks much
    45  	// trickier and less valuable.
    46  	//
    47  	// The compute provider might well enforce its limits by other means, but
    48  	// having the domains specified up front allows it to skip bidding on jobs
    49  	// it knows will fail in its executor. So this is hopefully a better UX for
    50  	// job specifiers who can have their job picked up only by someone who will
    51  	// run it successfully.
    52  	NetworkHTTP
    53  )
    54  
    55  var domainRegex = regexp.MustCompile(`\b([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}\b`)
    56  
    57  func ParseNetwork(s string) (Network, error) {
    58  	for typ := NetworkNone; typ <= NetworkHTTP; typ++ {
    59  		if equal(typ.String(), s) {
    60  			return typ, nil
    61  		}
    62  	}
    63  
    64  	return NetworkNone, fmt.Errorf("%T: unknown type '%s'", NetworkNone, s)
    65  }
    66  
    67  func (n Network) MarshalText() ([]byte, error) {
    68  	return []byte(n.String()), nil
    69  }
    70  
    71  func (n *Network) UnmarshalText(text []byte) (err error) {
    72  	name := string(text)
    73  	*n, err = ParseNetwork(name)
    74  	return
    75  }
    76  
    77  type NetworkConfig struct {
    78  	Type    Network  `json:"Type"`
    79  	Domains []string `json:"Domains,omitempty"`
    80  }
    81  
    82  // Disabled returns whether network connections should be completely disabled according
    83  // to this config.
    84  func (n NetworkConfig) Disabled() bool {
    85  	return n.Type == NetworkNone
    86  }
    87  
    88  // IsValid returns an error if any of the fields do not pass validation, or nil
    89  // otherwise.
    90  func (n NetworkConfig) IsValid() (err error) {
    91  	if n.Type < NetworkNone || n.Type > NetworkHTTP {
    92  		err = multierr.Append(err, fmt.Errorf("invalid networking type %q", n.Type))
    93  	}
    94  
    95  	for _, domain := range n.Domains {
    96  		if domainRegex.MatchString(domain) {
    97  			continue
    98  		}
    99  		if net.ParseIP(domain) != nil {
   100  			continue
   101  		}
   102  		err = multierr.Append(err, fmt.Errorf("invalid domain %q", domain))
   103  	}
   104  
   105  	return
   106  }
   107  
   108  // DomainSet returns the "unique set" of domains from the network config.
   109  // Domains listed multiple times and any subdomain that is also matched by a
   110  // wildcard is removed.
   111  //
   112  // This is something of an implementation detail – it matches the behavior
   113  // expected by our Docker HTTP gateway, which complains and/or fails to start if
   114  // these requirements are not met.
   115  func (n NetworkConfig) DomainSet() []string {
   116  	domains := slices.Clone(n.Domains)
   117  	slices.SortFunc(domains, func(a, b string) bool {
   118  		// If the domains "match", the match may be the result of a wildcard. We
   119  		// want to keep the wildcard because it matches more things. Wildcards
   120  		// will always be shorter than any subdomain they match, so we can
   121  		// simply sort on string length. Compact will then remove non-wildcards.
   122  		ret := matchDomain(a, b)
   123  		if ret == 0 {
   124  			return len(a) < len(b)
   125  		} else {
   126  			return ret < 0
   127  		}
   128  	})
   129  	domains = slices.CompactFunc(domains, func(a, b string) bool {
   130  		return matchDomain(a, b) == 0
   131  	})
   132  	return domains
   133  }
   134  
   135  func matchDomain(left, right string) (diff int) {
   136  	const wildcard = ""
   137  	lefts := strings.Split(strings.ToLower(strings.Trim(left, " ")), ".")
   138  	rights := strings.Split(strings.ToLower(strings.Trim(right, " ")), ".")
   139  
   140  	diff = len(lefts) - len(rights)
   141  	if diff != 0 && lefts[0] != wildcard && rights[0] != wildcard {
   142  		// Domains don't have same number of components, so
   143  		// the one that is longer should sort after.
   144  		return diff
   145  	}
   146  
   147  	lcur, rcur := len(lefts)-1, len(rights)-1
   148  	for lcur >= 0 && rcur >= 0 {
   149  		// If neither is a blank, these components need to match.
   150  		if lefts[lcur] != wildcard && rights[rcur] != wildcard {
   151  			if diff = strings.Compare(lefts[lcur], rights[rcur]); diff != 0 {
   152  				return diff
   153  			}
   154  		}
   155  
   156  		// If both are blanks, they match.
   157  		if lefts[lcur] == wildcard || rights[rcur] == wildcard {
   158  			break
   159  		}
   160  
   161  		// Blank means we are matching any subdomains, so only the rest of
   162  		// the domain needs to match for this to work.
   163  		if lefts[lcur] != wildcard {
   164  			lcur -= 1
   165  		}
   166  
   167  		if rights[rcur] != wildcard {
   168  			rcur -= 1
   169  		}
   170  	}
   171  
   172  	// If we are here, we have run out of components; either the domains match
   173  	// in all components or one of them is a wildcard.
   174  	return 0
   175  }