github.com/mutagen-io/mutagen@v0.18.0-rc1/pkg/url/parse_ssh.go (about)

     1  package url
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"runtime"
     7  	"strconv"
     8  	"strings"
     9  
    10  	"github.com/mutagen-io/mutagen/pkg/url/forwarding"
    11  )
    12  
    13  // isSCPSSHURL determines whether or not a raw URL is an SCP-style SSH URL.
    14  //
    15  // For synchronization URLs, a URL is classified as such if it contains a colon
    16  // with no forward slashes before it. On Windows, paths beginning with x:\ or
    17  // x:/ (where x is a-z or A-Z) are almost certainly referring to local paths,
    18  // but will trigger the SCP URL detection, so we check for and exclude these
    19  // candidates on Windows. This is, of course, something of a heuristic, but
    20  // we're unlikely to encounter 1-character hostnames and very likely to
    21  // encounter Windows paths, except on POSIX systems (where we don't perform this
    22  // check). If Windows users do have a 1-character hostname, they should just use
    23  // some other addressing scheme for it (e.g. an IP address or alternate
    24  // hostname).
    25  //
    26  // For forwarding URLs, the classification requires the presence of at least two
    27  // colons and exclude candidates which parse directly as forwarding endpoint
    28  // URLs, since those URLs are almost certainly local URLs. This excludes
    29  // hostnames that also happen to be protocol names, but these are also unlikely
    30  // to occur in practice and the same workarounds are available as for the
    31  // one-character hostname case mentioned above.
    32  func isSCPSSHURL(raw string, kind Kind) bool {
    33  	// Handle classification based on URL kind.
    34  	if kind == Kind_Synchronization {
    35  		// If we're on a Windows system and this is a Windows path, then reject
    36  		// it, because it should be treated as a local URL.
    37  		if runtime.GOOS == "windows" && isWindowsPath(raw) {
    38  			return false
    39  		}
    40  
    41  		// Otherwise check if there's a colon that comes before all forward
    42  		// slashes. If so, we treat this as an SCP-style SSH URL.
    43  		for _, c := range raw {
    44  			if c == ':' {
    45  				return true
    46  			} else if c == '/' {
    47  				break
    48  			}
    49  		}
    50  
    51  		// Either there wasn't a colon or a forward slash came first. In any
    52  		// case, this is not an SCP-style SSH URL.
    53  		return false
    54  	} else if kind == Kind_Forwarding {
    55  		// Reject any URL that parses directly as an endpoint URL, since this is
    56  		// almost certainly intended as a local forwarding endpoint URL.
    57  		if _, _, err := forwarding.Parse(raw); err == nil {
    58  			return false
    59  		}
    60  
    61  		// Ensure that there are at least two colons in the URL. This is about
    62  		// the only heuristic we have for invalidating candidate URLs.
    63  		if strings.Count(raw, ":") < 2 {
    64  			return false
    65  		}
    66  
    67  		// In the case of a forwarding URL, there's not really any useful
    68  		// additional classification test that we can perform without fully
    69  		// parsing the URL. We've at least ensured the presence of a colon, so
    70  		// parsing can be attempted.
    71  		return true
    72  	} else {
    73  		panic("unhandled URL kind")
    74  	}
    75  }
    76  
    77  // parseSCPSSH parses an SCP-style SSH URL.
    78  func parseSCPSSH(raw string, kind Kind) (*URL, error) {
    79  	// Parse off the username. If we hit a ':', then we've reached the end of
    80  	// the hostname specification and there was no username. Similarly, if we
    81  	// hit the end of the string without seeing an '@', then there's also no
    82  	// username specified. Ideally we'd want to break on any character that
    83  	// isn't allowed in a username, but that isn't well-defined, even for POSIX
    84  	// (it's effectively determined by a configurable regular expression -
    85  	// NAME_REGEX). We enforce that if a username is specified, that it is
    86  	// non-empty.
    87  	var username string
    88  	for i, r := range raw {
    89  		if r == ':' {
    90  			break
    91  		} else if r == '@' {
    92  			if i == 0 {
    93  				return nil, errors.New("empty username specified")
    94  			}
    95  			username = raw[:i]
    96  			raw = raw[i+1:]
    97  			break
    98  		}
    99  	}
   100  
   101  	// Parse off the host. Again, ideally we'd want to be a bit more stringent
   102  	// here about what characters we accept in hostnames, potentially breaking
   103  	// early with an error if we see a "disallowed" character, but we're better
   104  	// off just allowing SSH to reject hostnames that it doesn't like, because
   105  	// with its aliases it's hard to say what it'll allow. We reject empty
   106  	// hostnames and we reject cases where we've scanned the entire string and
   107  	// not found a colon (which indicates that this is probably not an SCP-style
   108  	// SSH URL).
   109  	var hostname string
   110  	for i, r := range raw {
   111  		if r == ':' {
   112  			if i == 0 {
   113  				return nil, errors.New("empty hostname")
   114  			}
   115  			hostname = raw[:i]
   116  			raw = raw[i+1:]
   117  			break
   118  		}
   119  	}
   120  	if hostname == "" {
   121  		return nil, errors.New("no hostname present")
   122  	}
   123  
   124  	// Parse off the port. This is not a standard SCP URL syntax (and even Git
   125  	// makes you use full SSH URLs if you want to specify a port), so we invent
   126  	// our own rules here, but essentially we just scan until the next colon,
   127  	// and if there is one, and all characters before it are 0-9, we try to
   128  	// parse the preceding segment as a port (restricting to the allowed port
   129  	// range). We allow such digit strings to be empty, because that probably
   130  	// indicates an attempt to specify a port. In the rare case that a path
   131  	// begins with something like "#:" (where # is a (potentially empty) digit
   132  	// sequence that could be mistaken for a port), an absolute or home-relative
   133  	// path can be specified.
   134  	var port uint32
   135  	for i, r := range raw {
   136  		// If we're in a string of digits, keep going.
   137  		if '0' <= r && r <= '9' {
   138  			continue
   139  		}
   140  
   141  		// If we've encountered a colon, then attempt to parse the preceding
   142  		// portion of the string as a port value.
   143  		if r == ':' {
   144  			if port64, err := strconv.ParseUint(raw[:i], 10, 16); err != nil {
   145  				return nil, errors.New("invalid port value specified")
   146  			} else {
   147  				port = uint32(port64)
   148  				raw = raw[i+1:]
   149  			}
   150  		}
   151  
   152  		// No need to continue scanning at this point. Either we successfully
   153  		// parsed, failed to parse, or hit a character that wasn't numeric.
   154  		break
   155  	}
   156  
   157  	// Treat what remains as the path.
   158  	path := raw
   159  
   160  	// Perform path processing based on URL kind.
   161  	if kind == Kind_Synchronization {
   162  		// Ensure that the path is non-empty.
   163  		if path == "" {
   164  			return nil, errors.New("empty path")
   165  		}
   166  	} else if kind == Kind_Forwarding {
   167  		// Parse the forwarding endpoint URL to ensure that it's valid.
   168  		if _, _, err := forwarding.Parse(path); err != nil {
   169  			return nil, fmt.Errorf("invalid forwarding endpoint URL: %w", err)
   170  		}
   171  	} else {
   172  		panic("unhandled URL kind")
   173  	}
   174  
   175  	// Create the URL, using what remains as the path.
   176  	return &URL{
   177  		Kind:     kind,
   178  		Protocol: Protocol_SSH,
   179  		User:     username,
   180  		Host:     hostname,
   181  		Port:     port,
   182  		Path:     path,
   183  	}, nil
   184  }