github.com/grailbio/base@v0.0.11/file/path.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package file
     6  
     7  import (
     8  	"fmt"
     9  	"path/filepath"
    10  	"strings"
    11  )
    12  
    13  const (
    14  	urlSeparator = '/'
    15  )
    16  
    17  // Compute the length of "foo" part of "foo://bar/baz". Returns (0,nil) if the
    18  // path is for a local file system.
    19  func getURLScheme(path string) (int, error) {
    20  	// Scheme is always encoded in ASCII, per RFC3986.
    21  	schemeLimit := -1
    22  	for i := 0; i < len(path); i++ {
    23  		ch := path[i]
    24  		if ch == ':' {
    25  			if len(path) <= i+2 || path[i+1] != '/' || path[i+2] != '/' {
    26  				return -1, fmt.Errorf("parsepath %s: a URL must start with 'scheme://'", path)
    27  			}
    28  			schemeLimit = i
    29  			break
    30  		}
    31  		if !((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '.' || ch == '+' || ch == '=') {
    32  			break
    33  		}
    34  	}
    35  	if schemeLimit == -1 {
    36  		return 0, nil
    37  	}
    38  	return schemeLimit, nil
    39  }
    40  
    41  // ParsePath parses "path" and find the namespace object that can handle the
    42  // path. The path can be of form either "scheme://path" just
    43  // "path0/.../pathN". The latter indicates a local file.
    44  //
    45  // On success, "schema" will be the schema part of the path. "suffix" will be
    46  // the path part after the scheme://. For example, ParsePath("s3://key/bucket")
    47  // will return ("s3", "key/bucket", nil).
    48  //
    49  // For a local-filesystem path, this function returns ("", path, nil).
    50  func ParsePath(path string) (scheme, suffix string, err error) {
    51  	schemeLen, err := getURLScheme(path)
    52  	if err != nil {
    53  		return "", "", err
    54  	}
    55  	if schemeLen == 0 {
    56  		return "", path, nil
    57  	}
    58  	return path[:schemeLen], path[schemeLen+3:], nil
    59  }
    60  
    61  // MustParsePath is similar to ParsePath, but crashes the process on error.
    62  func MustParsePath(path string) (scheme, suffix string) {
    63  	scheme, suffix, err := ParsePath(path)
    64  	if err != nil {
    65  		panic(err)
    66  	}
    67  	return scheme, suffix
    68  }
    69  
    70  // Base returns the last element of the path. It is the same as filepath.Base
    71  // for a local filesystem path.  Else, it acts like filepath.Base, with the
    72  // following differences: (1) the path separator is always '/'. (2) if the URL
    73  // suffix is empty, it returns the path itself.
    74  //
    75  // Example:
    76  //   file.Base("s3://") returns "s3://".
    77  //   file.Base("s3://foo/hah/") returns "hah".
    78  func Base(path string) string {
    79  	scheme, suffix, err := ParsePath(path)
    80  	if scheme == "" || err != nil {
    81  		return filepath.Base(path)
    82  	}
    83  	if suffix == "" {
    84  		// path is "s3://".
    85  		return path
    86  	}
    87  	return filepath.Base(suffix)
    88  }
    89  
    90  // Dir returns the all but the last element of the path. It the same as
    91  // filepath.Dir for a local filesystem path.  Else, it acts like filepath.Base,
    92  // with the following differences: (1) the path separator is always '/'. (2) if
    93  // the URL suffix is empty, it returns the path itself. (3) The path is not
    94  // cleaned; for example repeated "/"s in the path is preserved.
    95  func Dir(path string) string {
    96  	scheme, suffix, err := ParsePath(path)
    97  	if scheme == "" || err != nil {
    98  		return filepath.Dir(path)
    99  	}
   100  	for i := len(suffix) - 1; i >= 0; i-- {
   101  		if suffix[i] == urlSeparator {
   102  			for i > 0 && suffix[i] == urlSeparator {
   103  				i--
   104  			}
   105  			return path[:len(scheme)+3+i+1]
   106  		}
   107  	}
   108  	return path[:len(scheme)+3]
   109  }
   110  
   111  // Join joins any number of path elements into a single path, adding a
   112  // separator if necessary. It works like filepath.Join, with the following
   113  // differences:
   114  // 1. The path separator is always '/' (so this doesn't work on Windows).
   115  // 2. The interior of each element is not cleaned; for example if an element
   116  //    contains repeated "/"s in the middle, they are preserved.
   117  // 3. If elems[0] has a prefix of the form "<scheme>://" or "//", that prefix
   118  //    is retained.  (A prefix of "/" is also retained; that matches
   119  //    filepath.Join's behavior.)
   120  func Join(elems ...string) string {
   121  	if len(elems) == 0 {
   122  		return filepath.Join(elems...)
   123  	}
   124  	var prefix string
   125  	n, err := getURLScheme(elems[0])
   126  	if err == nil && n > 0 {
   127  		prefix = elems[0][:n+3]
   128  		elems[0] = elems[0][n+3:]
   129  	} else if len(elems[0]) > 0 && elems[0][0] == '/' {
   130  		if elems[0][1] == '/' {
   131  			prefix = "//"
   132  			elems[0] = elems[0][2:]
   133  		} else {
   134  			prefix = "/"
   135  			elems[0] = elems[0][1:]
   136  		}
   137  	}
   138  
   139  	// Remove leading (optional) or trailing "/"s from the string.
   140  	clean := func(p string) string {
   141  		var s, e int
   142  		for s = 0; s < len(p); s++ {
   143  			if p[s] != urlSeparator {
   144  				break
   145  			}
   146  		}
   147  		for e = len(p) - 1; e >= 0; e-- {
   148  			if p[e] != urlSeparator {
   149  				break
   150  			}
   151  		}
   152  		if e < s {
   153  			return ""
   154  		}
   155  		return p[s : e+1]
   156  	}
   157  
   158  	newElems := make([]string, 0, len(elems))
   159  	for i := 0; i < len(elems); i++ {
   160  		e := clean(elems[i])
   161  		if e != "" {
   162  			newElems = append(newElems, e)
   163  		}
   164  	}
   165  	return prefix + strings.Join(newElems, "/")
   166  }
   167  
   168  // IsAbs returns true if pathname is absolute local path. For non-local file, it
   169  // always returns true.
   170  func IsAbs(path string) bool {
   171  	if scheme, _, err := ParsePath(path); scheme == "" || err != nil {
   172  		return filepath.IsAbs(path)
   173  	}
   174  	return true
   175  }