github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/model/v1beta1/network.go (about) 1 package v1beta1 2 3 import ( 4 "fmt" 5 "net" 6 "regexp" 7 "strings" 8 9 "go.uber.org/multierr" 10 "golang.org/x/exp/slices" 11 ) 12 13 //go:generate stringer -type=Network --trimprefix=Network 14 type Network int 15 16 const ( 17 // NetworkNone specifies that the job does not require networking. 18 NetworkNone Network = iota 19 20 // NetworkFull specifies that the job requires unfiltered raw IP networking. 21 NetworkFull 22 23 // NetworkHTTP specifies that the job requires HTTP networking to certain domains. 24 // 25 // The model is: the job specifier submits a job with the domain(s) it will 26 // need to communicate with, the compute provider uses this to make some 27 // decision about the risk of the job and bids accordingly, and then at run 28 // time the traffic is limited to only the domain(s) specified. 29 // 30 // As a command, something like: 31 // 32 // bacalhau docker run —network=http —domain=crates.io —domain=github.com -v Qmy1234myd4t4:/code rust/compile 33 // 34 // The “risk” for the compute provider is that the job does something that 35 // violates its terms, the terms of its hosting provider or ISP, or even the 36 // law in its jurisdiction (e.g. accessing and spreading illegal content, 37 // performing cyberattacks). So the same sort of risk as operating a Tor 38 // exit node. 39 // 40 // The risk for the job specifier is that we are operating in an environment 41 // they are paying for, so there is an incentive to hijack that environment 42 // (e.g. via a compromised package download that runs a crypto miner on 43 // install, and uses up all the paid-for job time). Having the traffic 44 // enforced to only domains specified makes those sorts of attacks much 45 // trickier and less valuable. 46 // 47 // The compute provider might well enforce its limits by other means, but 48 // having the domains specified up front allows it to skip bidding on jobs 49 // it knows will fail in its executor. So this is hopefully a better UX for 50 // job specifiers who can have their job picked up only by someone who will 51 // run it successfully. 52 NetworkHTTP 53 ) 54 55 var domainRegex = regexp.MustCompile(`\b([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}\b`) 56 57 func ParseNetwork(s string) (Network, error) { 58 for typ := NetworkNone; typ <= NetworkHTTP; typ++ { 59 if equal(typ.String(), s) { 60 return typ, nil 61 } 62 } 63 64 return NetworkNone, fmt.Errorf("%T: unknown type '%s'", NetworkNone, s) 65 } 66 67 func (n Network) MarshalText() ([]byte, error) { 68 return []byte(n.String()), nil 69 } 70 71 func (n *Network) UnmarshalText(text []byte) (err error) { 72 name := string(text) 73 *n, err = ParseNetwork(name) 74 return 75 } 76 77 type NetworkConfig struct { 78 Type Network `json:"Type"` 79 Domains []string `json:"Domains,omitempty"` 80 } 81 82 // Disabled returns whether network connections should be completely disabled according 83 // to this config. 84 func (n NetworkConfig) Disabled() bool { 85 return n.Type == NetworkNone 86 } 87 88 // IsValid returns an error if any of the fields do not pass validation, or nil 89 // otherwise. 90 func (n NetworkConfig) IsValid() (err error) { 91 if n.Type < NetworkNone || n.Type > NetworkHTTP { 92 err = multierr.Append(err, fmt.Errorf("invalid networking type %q", n.Type)) 93 } 94 95 for _, domain := range n.Domains { 96 if domainRegex.MatchString(domain) { 97 continue 98 } 99 if net.ParseIP(domain) != nil { 100 continue 101 } 102 err = multierr.Append(err, fmt.Errorf("invalid domain %q", domain)) 103 } 104 105 return 106 } 107 108 // DomainSet returns the "unique set" of domains from the network config. 109 // Domains listed multiple times and any subdomain that is also matched by a 110 // wildcard is removed. 111 // 112 // This is something of an implementation detail – it matches the behavior 113 // expected by our Docker HTTP gateway, which complains and/or fails to start if 114 // these requirements are not met. 115 func (n NetworkConfig) DomainSet() []string { 116 domains := slices.Clone(n.Domains) 117 slices.SortFunc(domains, func(a, b string) bool { 118 // If the domains "match", the match may be the result of a wildcard. We 119 // want to keep the wildcard because it matches more things. Wildcards 120 // will always be shorter than any subdomain they match, so we can 121 // simply sort on string length. Compact will then remove non-wildcards. 122 ret := matchDomain(a, b) 123 if ret == 0 { 124 return len(a) < len(b) 125 } else { 126 return ret < 0 127 } 128 }) 129 domains = slices.CompactFunc(domains, func(a, b string) bool { 130 return matchDomain(a, b) == 0 131 }) 132 return domains 133 } 134 135 func matchDomain(left, right string) (diff int) { 136 const wildcard = "" 137 lefts := strings.Split(strings.ToLower(strings.Trim(left, " ")), ".") 138 rights := strings.Split(strings.ToLower(strings.Trim(right, " ")), ".") 139 140 diff = len(lefts) - len(rights) 141 if diff != 0 && lefts[0] != wildcard && rights[0] != wildcard { 142 // Domains don't have same number of components, so 143 // the one that is longer should sort after. 144 return diff 145 } 146 147 lcur, rcur := len(lefts)-1, len(rights)-1 148 for lcur >= 0 && rcur >= 0 { 149 // If neither is a blank, these components need to match. 150 if lefts[lcur] != wildcard && rights[rcur] != wildcard { 151 if diff = strings.Compare(lefts[lcur], rights[rcur]); diff != 0 { 152 return diff 153 } 154 } 155 156 // If both are blanks, they match. 157 if lefts[lcur] == wildcard || rights[rcur] == wildcard { 158 break 159 } 160 161 // Blank means we are matching any subdomains, so only the rest of 162 // the domain needs to match for this to work. 163 if lefts[lcur] != wildcard { 164 lcur -= 1 165 } 166 167 if rights[rcur] != wildcard { 168 rcur -= 1 169 } 170 } 171 172 // If we are here, we have run out of components; either the domains match 173 // in all components or one of them is a wildcard. 174 return 0 175 }