github.com/telepresenceio/telepresence/v2@v2.20.0-pro.6.0.20240517030216-236ea954e789/pkg/client/rootd/dns/server_linux.go (about) 1 package dns 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "math" 8 "net" 9 "strconv" 10 "strings" 11 "time" 12 13 "github.com/datawire/dlib/dexec" 14 "github.com/datawire/dlib/dgroup" 15 "github.com/datawire/dlib/dlog" 16 "github.com/datawire/dlib/dtime" 17 "github.com/telepresenceio/telepresence/v2/pkg/dnsproxy" 18 "github.com/telepresenceio/telepresence/v2/pkg/forwarder" 19 "github.com/telepresenceio/telepresence/v2/pkg/iputil" 20 "github.com/telepresenceio/telepresence/v2/pkg/proc" 21 "github.com/telepresenceio/telepresence/v2/pkg/shellquote" 22 "github.com/telepresenceio/telepresence/v2/pkg/vif" 23 ) 24 25 const ( 26 maxRecursionTestRetries = 10 27 28 // We use a fairly short delay here because if DNS recursion is a thing, then the cluster's DNS-server 29 // has access to the caller host's network, so it runs locally in a Docker container or similar. 30 recursionTestTimeout = 200 * time.Millisecond 31 ) 32 33 var errResolveDNotConfigured = errors.New("resolved not configured") 34 35 func (s *Server) Worker(c context.Context, dev vif.Device, configureDNS func(net.IP, *net.UDPAddr)) error { 36 if proc.RunningInContainer() { 37 // Don't bother with systemd-resolved when running in a docker container 38 return s.runOverridingServer(c, dev) 39 } 40 41 err := s.tryResolveD(dgroup.WithGoroutineName(c, "/resolved"), dev, configureDNS) 42 if err == errResolveDNotConfigured { 43 err = nil 44 if c.Err() == nil { 45 dlog.Info(c, "Unable to use systemd-resolved, falling back to local server") 46 err = s.runOverridingServer(dgroup.WithGoroutineName(c, "/legacy"), dev) 47 } 48 } 49 return err 50 } 51 52 func (s *Server) runOverridingServer(c context.Context, dev vif.Device) error { 53 if s.localIP == nil { 54 rf, err := dnsproxy.ReadResolveFile("/etc/resolv.conf") 55 if err != nil { 56 return err 57 } 58 dlog.Debug(c, rf.String()) 59 if len(rf.Nameservers) > 0 { 60 ip := iputil.Parse(rf.Nameservers[0]) 61 s.localIP = ip 62 dlog.Infof(c, "Automatically set -dns=%s", ip) 63 } 64 65 // The search entries in /etc/resolv.conf are not intended for this resolver so 66 // ensure that we strip them off when we send queries to the cluster. 67 for _, sp := range rf.Search { 68 lsp := len(sp) 69 if lsp > 0 { 70 if sp[0] == '.' { 71 sp = sp[1:] 72 lsp-- 73 } 74 if lsp > 0 { 75 if sp[lsp-1] != '.' { 76 sp += "." 77 } 78 s.dropSuffixes = append(s.dropSuffixes, strings.ToLower(sp)) 79 } 80 } 81 } 82 } 83 if s.localIP == nil { 84 return errors.New("couldn't determine dns ip from /etc/resolv.conf") 85 } 86 87 listeners, err := s.dnsListeners(c) 88 if err != nil { 89 return err 90 } 91 dnsResolverAddr, err := splitToUDPAddr(listeners[0].LocalAddr()) 92 if err != nil { 93 return err 94 } 95 dlog.Debugf(c, "Bootstrapping local DNS server on port %d", dnsResolverAddr.Port) 96 97 // Create the connection pool later used for fallback. We need to create this before the firewall 98 // rule because the rule must exclude the local address of this connection in order to 99 // let it reach the original destination and not cause an endless loop. 100 pool, err := NewConnPool(s.localIP.String(), 10) 101 if err != nil { 102 return err 103 } 104 defer func() { 105 pool.Close() 106 }() 107 108 serverStarted := make(chan struct{}) 109 serverDone := make(chan struct{}) 110 g := dgroup.NewGroup(c, dgroup.GroupConfig{}) 111 g.Go("Server", func(c context.Context) error { 112 defer close(serverDone) 113 // Server will close the listener, so no need to close it here. 114 s.processSearchPaths(g, func(c context.Context, _ vif.Device) error { 115 s.flushDNS() 116 return nil 117 }, dev) 118 return s.Run(c, serverStarted, listeners, pool, s.resolveInCluster) 119 }) 120 121 if proc.RunningInContainer() { 122 g.Go("Local DNS", func(c context.Context) error { 123 select { 124 case <-c.Done(): 125 case <-serverStarted: 126 // Give DNS server time to start before rerouting NAT 127 dtime.SleepWithContext(c, time.Millisecond) 128 129 lc := net.ListenConfig{} 130 pc, err := lc.ListenPacket(c, "udp", ":53") 131 if err != nil { 132 return nil 133 } 134 go func() { 135 if err = forwarder.ForwardUDP(c, pc.(*net.UDPConn), dnsResolverAddr); err != nil { 136 dlog.Error(c, err) 137 } 138 }() 139 } 140 return nil 141 }) 142 } 143 144 g.Go("NAT-redirect", func(c context.Context) error { 145 select { 146 case <-c.Done(): 147 case <-serverStarted: 148 // Give DNS server time to start before rerouting NAT 149 dtime.SleepWithContext(c, time.Millisecond) 150 151 err := routeDNS(c, s.localIP, dnsResolverAddr, pool.LocalAddrs()) 152 if err != nil { 153 return err 154 } 155 defer func() { 156 c := context.Background() 157 unrouteDNS(c) 158 s.flushDNS() 159 }() 160 s.flushDNS() 161 <-serverDone // Stay alive until DNS server is done 162 } 163 return nil 164 }) 165 return g.Wait() 166 } 167 168 func (s *Server) dnsListeners(c context.Context) ([]net.PacketConn, error) { 169 listener, err := newLocalUDPListener(c) 170 if err != nil { 171 return nil, err 172 } 173 listeners := []net.PacketConn{listener} 174 if proc.RunningInContainer() { 175 // Inside docker. Don't add docker bridge 176 return listeners, nil 177 } 178 179 // This is the default docker bridge. We need to listen here because the nat logic we use to intercept 180 // dns packets will divert the packet to the interface it originates from, which in the case of 181 // containers is the docker bridge. Without this dns won't work from inside containers. 182 output, err := dexec.CommandContext(c, "docker", "inspect", "bridge", 183 "-f", "{{(index .IPAM.Config 0).Gateway}}").Output() 184 if err != nil { 185 dlog.Info(c, "not listening on docker bridge") 186 return listeners, nil 187 } 188 189 localAddr, err := splitToUDPAddr(listener.LocalAddr()) 190 if err != nil { 191 return nil, err 192 } 193 194 dockerGatewayIP := net.ParseIP(strings.TrimSpace(string(output))) 195 if dockerGatewayIP == nil || dockerGatewayIP.Equal(localAddr.IP) { 196 return listeners, nil 197 } 198 199 // Check that the dockerGatewayIP is registered as an interface on this machine. When running WSL2 on 200 // a Windows box, the gateway is managed by Windows and never visible to the Linux host and hence 201 // will not be affected by the nat logic. Also, any attempt to listen to it will fail. 202 found := false 203 ifAddrs, err := net.InterfaceAddrs() 204 if err != nil { 205 return nil, err 206 } 207 for _, ifAddr := range ifAddrs { 208 _, network, err := net.ParseCIDR(ifAddr.String()) 209 if err != nil { 210 continue 211 } 212 if network.Contains(dockerGatewayIP) { 213 found = true 214 break 215 } 216 } 217 218 if !found { 219 dlog.Infof(c, "docker gateway %s is not visible as a network interface", dockerGatewayIP) 220 return listeners, nil 221 } 222 223 for { 224 extraAddr := &net.UDPAddr{IP: dockerGatewayIP, Port: localAddr.Port} 225 ls, err := net.ListenPacket("udp", extraAddr.String()) 226 if err == nil { 227 dlog.Infof(c, "listening to docker bridge at %s", dockerGatewayIP) 228 return append(listeners, ls), nil 229 } 230 231 // the extraAddr was busy, try next available port 232 for localAddr.Port++; localAddr.Port <= math.MaxUint16; localAddr.Port++ { 233 if ls, err = net.ListenPacket("udp", localAddr.String()); err == nil { 234 if localAddr, err = splitToUDPAddr(ls.LocalAddr()); err != nil { 235 ls.Close() 236 return nil, err 237 } 238 _ = listeners[0].Close() 239 listeners = []net.PacketConn{ls} 240 break 241 } 242 } 243 if localAddr.Port > math.MaxUint16 { 244 return nil, fmt.Errorf("unable to find a free port for both %s and %s", localAddr.IP, extraAddr.IP) 245 } 246 } 247 } 248 249 // runNatTableCmd runs "iptables -t nat ...". 250 func runNatTableCmd(c context.Context, args ...string) error { 251 // We specifically don't want to use the cancellation of 'ctx' here, because we don't ever 252 // want to leave things in a half-cleaned-up state. 253 args = append([]string{"-t", "nat"}, args...) 254 cmd := dexec.CommandContext(c, "iptables", args...) 255 cmd.DisableLogging = dlog.MaxLogLevel(c) < dlog.LogLevelDebug 256 dlog.Debug(c, shellquote.ShellString("iptables", args)) 257 return cmd.Run() 258 } 259 260 const tpDNSChain = "TELEPRESENCE_DNS" 261 262 // routeDNS creates a new chain in the "nat" table with two rules in it. One rule ensures 263 // that all packets sent to the currently configured DNS service are rerouted to our local 264 // DNS service. Another rule ensures that when our local DNS service cannot resolve and 265 // uses a fallback, that fallback reaches the original DNS service. 266 func routeDNS(c context.Context, dnsIP net.IP, toAddr *net.UDPAddr, localDNSs []*net.UDPAddr) (err error) { 267 // create the chain 268 unrouteDNS(c) 269 270 // Create the TELEPRESENCE_DNS chain 271 if err = runNatTableCmd(c, "-N", tpDNSChain); err != nil { 272 return err 273 } 274 275 // This rule prevents that any rules in this table applies to the localDNS address when 276 // used as a source. I.e. we let the local DNS server reach the original DNS server 277 for _, localDNS := range localDNSs { 278 if err = runNatTableCmd(c, "-A", tpDNSChain, 279 "-p", "udp", 280 "--source", localDNS.IP.String(), 281 "--sport", strconv.Itoa(localDNS.Port), 282 "-j", "RETURN", 283 ); err != nil { 284 return err 285 } 286 } 287 // This rule redirects all packets intended for the DNS service to our local DNS service 288 if err = runNatTableCmd(c, "-A", tpDNSChain, 289 "-p", "udp", 290 "--dest", dnsIP.String()+"/32", 291 "--dport", "53", 292 "-j", "DNAT", 293 "--to-destination", toAddr.String(), 294 ); err != nil { 295 return err 296 } 297 298 // Alter locally generated packets before routing 299 return runNatTableCmd(c, "-I", "OUTPUT", "1", "-j", tpDNSChain) 300 } 301 302 // unrouteDNS removes the chain installed by routeDNS. 303 func unrouteDNS(c context.Context) { 304 // The errors returned by these commands aren't of any interest besides logging. And they 305 // are already logged since dexec is used. 306 _ = runNatTableCmd(c, "-D", "OUTPUT", "-j", tpDNSChain) 307 _ = runNatTableCmd(c, "-F", tpDNSChain) 308 _ = runNatTableCmd(c, "-X", tpDNSChain) 309 }