lab.nexedi.com/kirr/go123@v0.0.0-20240207185015-8299741fa871/xnet/lonet/lonet.go (about)

     1  // Copyright (C) 2018-2020  Nexedi SA and Contributors.
     2  //                          Kirill Smelkov <kirr@nexedi.com>
     3  //
     4  // This program is free software: you can Use, Study, Modify and Redistribute
     5  // it under the terms of the GNU General Public License version 3, or (at your
     6  // option) any later version, as published by the Free Software Foundation.
     7  //
     8  // You can also Link and Combine this program with other software covered by
     9  // the terms of any of the Free Software licenses or any of the Open Source
    10  // Initiative approved licenses and Convey the resulting work. Corresponding
    11  // source of such a combination shall include the source code for all other
    12  // software used.
    13  //
    14  // This program is distributed WITHOUT ANY WARRANTY; without even the implied
    15  // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
    16  //
    17  // See COPYING file for full licensing terms.
    18  // See https://www.nexedi.com/licensing for rationale and options.
    19  
    20  // Package lonet provides TCP network simulated on top of localhost TCP loopback.
    21  //
    22  // For testing distributed systems it is sometimes handy to imitate network of
    23  // several TCP hosts. It is also handy that ports allocated on Dial/Listen/Accept on
    24  // that hosts be predictable - that would help tests to verify network events
    25  // against expected sequence. When whole system could be imitated in 1 OS-level
    26  // process, package lab.nexedi.com/kirr/go123/xnet/pipenet serves the task via
    27  // providing TCP-like synchronous in-memory network of net.Pipes. However
    28  // pipenet cannot be used for cases where tested system consists of 2 or more
    29  // OS-level processes. This is where lonet comes into play:
    30  //
    31  // Similarly to pipenet addresses on lonet are host:port pairs and several
    32  // hosts could be created with different names. A host is xnet.Networker and
    33  // so can be worked with similarly to regular TCP network access-point with
    34  // Dial/Listen/Accept. Host's ports allocation is predictable: ports of a host
    35  // are contiguous integer sequence starting from 1 that are all initially free,
    36  // and whenever autobind is requested the first free port of the host will be
    37  // used.
    38  //
    39  // Internally lonet network maintains registry of hosts so that lonet
    40  // addresses could be resolved to OS-level addresses, for example α:1 and β:1
    41  // to 127.0.0.1:4567 and 127.0.0.1:8765, and once lonet connection is
    42  // established it becomes served by OS-level TCP connection over loopback.
    43  //
    44  // Example:
    45  //
    46  //	net, err := lonet.Join(ctx, "mynet")
    47  //	hα, err := net.NewHost(ctx, "α")
    48  //	hβ, err := net.NewHost(ctx, "β")
    49  //
    50  //	// starts listening on address "α:10"
    51  //	l, err := hα.Listen(ctx, ":10")
    52  //	go func() {
    53  //		csrv, err := l.Accept(ctx) // csrv will have LocalAddr "α:1"
    54  //	}()
    55  //	ccli, err := hβ.Dial(ctx, "α:10")  // ccli will be connection between "β:1" - "α:1"
    56  //
    57  // Once again lonet is similar to pipenet, but since it works via OS TCP stack
    58  // it could be handy for testing networked application when there are several
    59  // OS-level processes involved.
    60  //
    61  // Package lonet also provides corresponding Python package for accessing lonet
    62  // networks from Python(*).
    63  //
    64  // --------
    65  //
    66  // (*) use https://pypi.org/project/pygolang to import.
    67  package lonet
    68  
    69  // Lonet organization
    70  //
    71  // For every lonet network there is a registry with information about hosts
    72  // available on the network, and for each host its OS-level listening address.
    73  // The registry is kept as SQLite database under
    74  //
    75  //	/<tmp>/lonet/<network>/registry.db
    76  //
    77  // Whenever host α needs to establish connection to address on host β, it
    78  // queries the registry for β and further talks to β on that address.
    79  // Correspondingly when a host joins the network, it announces itself to the
    80  // registry so that other hosts could see it.
    81  //
    82  //
    83  // Handshake protocol
    84  //
    85  // After α establishes OS-level connection to β via main β address, it sends
    86  // request to further establish lonet connection on top of that:
    87  //
    88  //	> lonet "<network>" dial "<α:portα>" "<β:portβ>"\n
    89  //
    90  // β checks whether portβ is listening, and if yes, accepts the connection on
    91  // corresponding on-β listener with giving feedback to α that connection was
    92  // accepted:
    93  //
    94  //	< lonet "<network>" connected "<β:portβ'>"\n
    95  //
    96  // After that connection is considered to be lonet-established and all further
    97  // exchange on it is directly controlled by corresponding lonet-level
    98  // Read/Write on α and β.
    99  //
   100  // If, on the other hand, lonet-level connection cannot be established, β replies:
   101  //
   102  //	< lonet "<networkβ>" E "<error>"\n
   103  //
   104  // where <error> could be:
   105  //
   106  //	- connection refused	if <β:portβ> is not listening
   107  //	- network mismatch	if β thinks it works on different lonet network than α
   108  //	- protocol error	if β thinks that α send incorrect dial request
   109  //	- ...
   110  
   111  import (
   112  	"context"
   113  	stderrors "errors"
   114  	"fmt"
   115  	"io"
   116  	"io/ioutil"
   117  	"log"
   118  	"net"
   119  	"os"
   120  	"path/filepath"
   121  	"sync"
   122  
   123  	"github.com/pkg/errors"
   124  
   125  	"lab.nexedi.com/kirr/go123/xerr"
   126  	"lab.nexedi.com/kirr/go123/xnet"
   127  	"lab.nexedi.com/kirr/go123/xnet/virtnet"
   128  )
   129  
   130  const netPrefix = "lonet" // lonet package creates only "lonet*" networks
   131  
   132  
   133  // protocolError represents logical error in lonet handshake exchange.
   134  type protocolError struct {
   135  	err error
   136  }
   137  
   138  // subNetwork represents one subnetwork of a lonet network.
   139  type subNetwork struct {
   140  	vnet *virtnet.SubNetwork
   141  
   142  	// OS-level listener of this subnetwork.
   143  	// whenever connection to subnet's host is tried to be established it goes here.
   144  	oslistener xnet.Listener
   145  
   146  	// accepted connections are further routed here for virtnet to handle.
   147  	vnotify virtnet.Notifier
   148  
   149  	// cancel for spawned .serve(ctx)
   150  	serveCancel func()
   151  }
   152  
   153  // vengine implements virtnet.Engine for subNetwork.
   154  type vengine struct {
   155  	subnet *subNetwork
   156  }
   157  
   158  var tcp4 = xnet.NetPlain("tcp4")
   159  
   160  // Join joins or creates new lonet network with given name.
   161  //
   162  // Network is the name of this network under "lonet" namespace, e.g. "α" will
   163  // give full network name "lonetα".
   164  //
   165  // If network is "" new network with random unique name will be created.
   166  //
   167  // Join returns new subnetwork on the joined network.
   168  //
   169  // See package lab.nexedi.com/kirr/go123/xnet/virtnet for documentation on how
   170  // to use returned subnetwork.
   171  func Join(ctx context.Context, network string) (_ *virtnet.SubNetwork, err error) {
   172  	defer xerr.Contextf(&err, "lonet: join %q", network)
   173  
   174  	// create/join registry under /tmp/lonet/<network>/registry.db
   175  	lonet := os.TempDir() + "/lonet"
   176  	err = os.MkdirAll(lonet, 0777 | os.ModeSticky)
   177  	if err != nil {
   178  		return nil, err
   179  	}
   180  
   181  	var netdir string
   182  	if network != "" {
   183  		netdir = lonet + "/" + network
   184  		err = os.MkdirAll(netdir, 0700)
   185  	} else {
   186  		// new with random name
   187  		netdir, err = ioutil.TempDir(lonet, "")
   188  		network = filepath.Base(netdir)
   189  	}
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  
   194  	registry, err := openRegistrySQLite(ctx, netdir + "/registry.db", network)
   195  	if err != nil {
   196  		return nil, err
   197  	}
   198  
   199  	// start OS listener
   200  	oslistener, err := tcp4.Listen(ctx, "127.0.0.1:")
   201  	if err != nil {
   202  		registry.Close()
   203  		return nil, err
   204  	}
   205  
   206  	// joined ok
   207  	losubnet := &subNetwork{oslistener: oslistener}
   208  	engine := &vengine{losubnet}
   209  	subnet, vnotify := virtnet.NewSubNetwork(netPrefix + network, engine, registry)
   210  	losubnet.vnet = subnet
   211  	losubnet.vnotify = vnotify
   212  
   213  	serveCtx, serveCancel := context.WithCancel(context.Background())
   214  	losubnet.serveCancel = serveCancel
   215  	go losubnet.serve(serveCtx)
   216  
   217  	return subnet, nil
   218  }
   219  
   220  // ---- subnetwork OS-level serving ----
   221  
   222  // Close implements virtnet.Engine .
   223  func (v *vengine) Close() (err error) {
   224  	n := v.subnet
   225  	defer xerr.Contextf(&err, "lonet %q: close", n.network())
   226  
   227  	n.serveCancel()             // this will cancel loaccepts spawned by serve
   228  	return n.oslistener.Close() // this will interrupt Accept in serve
   229  }
   230  
   231  // serve serves incoming OS-level connections to this subnetwork.
   232  //
   233  // for every accepted connection lonet-handshake is initiated.
   234  func (n *subNetwork) serve(ctx context.Context) {
   235  	var wg sync.WaitGroup
   236  	defer wg.Wait()
   237  
   238  	// wait for incoming OS connections and do lonet protocol handshake on them.
   239  	// if successful - route handshaked connection to particular Host's listener.
   240  	for {
   241  		osconn, err := n.oslistener.Accept(ctx)
   242  		if err != nil {
   243  			// mark subnetwork as being down and stop
   244  			n.vnotify.VNetDown(err)
   245  			return
   246  		}
   247  
   248  		wg.Add(1)
   249  		go func(osconn net.Conn) {
   250  			defer wg.Done()
   251  
   252  			err := n.loaccept(ctx, osconn)
   253  			if err == nil {
   254  				return
   255  			}
   256  
   257  			// log if error is unexpected
   258  			switch errors.Cause(err) {
   259  			case virtnet.ErrConnRefused,
   260  			     context.Canceled,
   261  			     context.DeadlineExceeded:
   262  				return // all ok - don't log.
   263  			}
   264  
   265  			log.Printf("lonet %q: serve %s <- %s : %s", n.network(),
   266  				n.oslistener.Addr(), osconn.RemoteAddr(), err)
   267  		}(osconn)
   268  	}
   269  }
   270  
   271  
   272  // ---- acceptor and dialer that talk to each other via lonet handshake protocol ----
   273  
   274  // loaccept handles incoming OS-level connection.
   275  //
   276  // It performs lonet protocol handshake as listener, and if successful further
   277  // conveys accepted connection to lonet-level Accept.
   278  //
   279  // If handshake is not successful the connection is closed.
   280  func (n *subNetwork) loaccept(ctx context.Context, osconn net.Conn) (err error) {
   281  	defer xerr.Context(&err, "loaccept")
   282  
   283  	// close osconn on error
   284  	osconnClosed := false
   285  	defer func() {
   286  		if err != nil && !osconnClosed {
   287  			osconn.Close()
   288  		}
   289  	}()
   290  
   291  	// spawn accept
   292  	type ret struct { err error }
   293  	doneq := make(chan ret)
   294  	go func() {
   295  		err := n._loaccept(ctx, osconn)
   296  		doneq <- ret{err}
   297  	}()
   298  
   299  	// wait for completion / interrupt IO on ctx cancel
   300  	select {
   301  	case <-ctx.Done():
   302  		osconnClosed = true
   303  		osconn.Close()
   304  		<-doneq
   305  		return ctx.Err()
   306  
   307  	case ret := <-doneq:
   308  		return ret.err
   309  	}
   310  }
   311  
   312  func (n *subNetwork) _loaccept(ctx context.Context, osconn net.Conn) error {
   313  	// read handshake line and parse it
   314  	line, err := readline(osconn, 1024) // limit line length not to cause memory dos
   315  	if err != nil {
   316  		return err
   317  	}
   318  
   319  	// replyf performs formatted reply to osconn.
   320  	// the error returned is for result of osconn.Write.
   321  	replyf := func(format string, argv ...interface{}) error {
   322  		line := fmt.Sprintf("< lonet %q " + format + "\n",
   323  				append([]interface{}{n.network()}, argv...)...)
   324  		_, err := osconn.Write([]byte(line))
   325  		return err
   326  	}
   327  
   328  	// ereply performs error reply to osconn.
   329  	// for convenience returned error is the error itself, not the
   330  	// error returned from osconn.Write.
   331  	ereply := func(err error) error {
   332  		replyf("E %q", err) // ignore osconn.Write error
   333  		return err
   334  	}
   335  
   336  	// eproto prepares protocol error and replies it to osconn.
   337  	//
   338  	// the error sent to peer contains only ereason, not details.
   339  	// for convenience returned error is protocol error constructed from
   340  	// error reason and details.
   341  	//
   342  	// error from osconn.Write is ignored.
   343  	eproto := func(ereason, detailf string, argv ...interface{}) error {
   344  		ereply(protocolErrorf(ereason))
   345  		return protocolErrorf(ereason + ": " + detailf, argv...)
   346  	}
   347  
   348  	var network, src, dst string
   349  	_, err = fmt.Sscanf(line, "> lonet %q dial %q %q\n", &network, &src, &dst)
   350  	if err != nil {
   351  		return eproto("invalid dial request", "%q", line)
   352  	}
   353  
   354  	if network != n.network() {
   355  		return eproto("network mismatch", "%q", network)
   356  	}
   357  
   358  	asrc, err := virtnet.ParseAddr(network, src)
   359  	if err != nil {
   360  		return eproto("src address invalid", "%q", src)
   361  	}
   362  	adst, err := virtnet.ParseAddr(network, dst)
   363  	if err != nil {
   364  		return eproto("dst address invalid", "%q", dst)
   365  	}
   366  
   367  	defer xerr.Contextf(&err, "%s <- %s", dst, src)
   368  
   369  	accept, err := n.vnotify.VNetAccept(ctx, asrc, adst, osconn)
   370  	if err != nil {
   371  		return ereply(err)
   372  	}
   373  
   374  	err = replyf("connected %q", accept.Addr)
   375  	accept.Ack <- err
   376  	return err
   377  }
   378  
   379  func (n *subNetwork) _loconnect(osconn net.Conn, src, dst *virtnet.Addr) (*virtnet.Addr, error) {
   380  	_, err := osconn.Write([]byte(fmt.Sprintf("> lonet %q dial %q %q\n", n.network(), src, dst)))
   381  	if err != nil {
   382  		return nil, err
   383  	}
   384  
   385  	line, err := readline(osconn, 1024)
   386  	if err != nil {
   387  		return nil, err
   388  	}
   389  
   390  	var network, reply, arg string
   391  	_, err = fmt.Sscanf(line, "< lonet %q %s %q\n", &network, &reply, &arg)
   392  	if err != nil {
   393  		return nil, protocolErrorf("invalid dial reply: %q", line)
   394  	}
   395  
   396  	switch reply {
   397  	default:
   398  		return nil, protocolErrorf("invalid reply verb: %q", reply)
   399  
   400  	case "E":
   401  		switch arg {
   402  		// handle canonical errors like ErrConnRefused
   403  		case "connection refused":
   404  			err = virtnet.ErrConnRefused
   405  		default:
   406  			err = stderrors.New(arg)
   407  		}
   408  
   409  		return nil, err
   410  
   411  	case "connected":
   412  		// ok
   413  	}
   414  
   415  	if network != n.network() {
   416  		return nil, protocolErrorf("connected, but network mismatch: %q", network)
   417  	}
   418  
   419  	acceptAddr, err := virtnet.ParseAddr(network, arg)
   420  	if err != nil {
   421  		return nil, protocolErrorf("connected, but accept address invalid: %q", acceptAddr)
   422  	}
   423  	if acceptAddr.Host != dst.Host {
   424  		return nil, protocolErrorf("connected, but accept address is for different host: %q", acceptAddr.Host)
   425  	}
   426  
   427  	// everything is ok
   428  	return acceptAddr, nil
   429  }
   430  
   431  // loconnect tries to establish lonet connection on top of OS-level connection.
   432  //
   433  // It performs lonet protocol handshake as dialer, and if successful returns
   434  // lonet-level peer's address of the accepted lonet connection.
   435  //
   436  // If handshake is not successful the connection is closed.
   437  func (n *subNetwork) loconnect(ctx context.Context, osconn net.Conn, src, dst *virtnet.Addr) (acceptAddr *virtnet.Addr, err error) {
   438  	defer func() {
   439  		switch err {
   440  		default:
   441  			// n.network, src, dst will be provided by virtnet while
   442  			// wrapping us with net.OpError{"dial", ...}
   443  			xerr.Contextf(&err, "loconnect %s", osconn.RemoteAddr())
   444  
   445  		// this errors remain unwrapped
   446  		case nil:
   447  		case virtnet.ErrConnRefused:
   448  		}
   449  	}()
   450  
   451  	// close osconn on error
   452  	osconnClosed := false
   453  	defer func() {
   454  		if err != nil && !osconnClosed {
   455  			osconn.Close()
   456  		}
   457  	}()
   458  
   459  	// spawn connect
   460  	type ret struct { acceptAddr *virtnet.Addr; err error }
   461  	doneq := make(chan ret)
   462  	go func() {
   463  		acceptAddr, err := n._loconnect(osconn, src, dst)
   464  		doneq <- ret{acceptAddr, err}
   465  	}()
   466  
   467  	// wait for completion / interrupt IO on ctx cancel
   468  	select {
   469  	case <-ctx.Done():
   470  		osconnClosed = true
   471  		osconn.Close()
   472  		<-doneq
   473  		return nil, ctx.Err()
   474  
   475  	case ret := <-doneq:
   476  		acceptAddr, err = ret.acceptAddr, ret.err
   477  		return acceptAddr, err
   478  	}
   479  }
   480  
   481  // VNetDial implements virtnet.Engine .
   482  func (v *vengine) VNetDial(ctx context.Context, src, dst *virtnet.Addr, dstosladdr string) (_ net.Conn, addrAccept *virtnet.Addr, _ error) {
   483  	n := v.subnet
   484  
   485  	// dial to OS addr for host and perform lonet handshake
   486  	osconn, err := tcp4.Dial(ctx, dstosladdr)
   487  	if err != nil {
   488  		return nil, nil, err
   489  	}
   490  
   491  	addrAccept, err = n.loconnect(ctx, osconn, src, dst)
   492  	if err != nil {
   493  		return nil, nil, err
   494  	}
   495  
   496  	return osconn, addrAccept, nil
   497  }
   498  
   499  
   500  // ----------------------------------------
   501  
   502  // VNetNewHost implements virtnet.Engine .
   503  func (v *vengine) VNetNewHost(ctx context.Context, hostname string, registry virtnet.Registry) error {
   504  	n := v.subnet
   505  
   506  	// no need to create host resources - we accept all connections on 1
   507  	// port for whole subnetwork.
   508  	return registry.Announce(ctx, hostname, n.oslistener.Addr().String())
   509  }
   510  
   511  // network returns name of the network this subnetwork is part of.
   512  func (n *subNetwork) network() string {
   513  	return n.vnet.Network()
   514  }
   515  
   516  // Error implements error.
   517  func (e *protocolError) Error() string {
   518  	return fmt.Sprintf("protocol error: %s", e.err)
   519  }
   520  
   521  // protocolErrorf constructs protocolError with error formatted via fmt.Errorf .
   522  func protocolErrorf(format string, argv ...interface{}) *protocolError {
   523  	return &protocolError{fmt.Errorf(format, argv...)}
   524  }
   525  
   526  
   527  // readline reads 1 line from r up to maxlen bytes.
   528  func readline(r io.Reader, maxlen int) (string, error) {
   529  	buf1 := []byte{0}
   530  	var line []byte
   531  	for len(line) < maxlen {
   532  		n, err := r.Read(buf1)
   533  		if n == 1 {
   534  			err = nil
   535  		}
   536  		if err != nil {
   537  			if err == io.EOF {
   538  				err = io.ErrUnexpectedEOF
   539  			}
   540  			return string(line), err
   541  		}
   542  
   543  		line = append(line, buf1...)
   544  		if buf1[0] == '\n' {
   545  			break
   546  		}
   547  	}
   548  
   549  	return string(line), nil
   550  }