github.com/grailbio/base@v0.0.11/file/path.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache-2.0 3 // license that can be found in the LICENSE file. 4 5 package file 6 7 import ( 8 "fmt" 9 "path/filepath" 10 "strings" 11 ) 12 13 const ( 14 urlSeparator = '/' 15 ) 16 17 // Compute the length of "foo" part of "foo://bar/baz". Returns (0,nil) if the 18 // path is for a local file system. 19 func getURLScheme(path string) (int, error) { 20 // Scheme is always encoded in ASCII, per RFC3986. 21 schemeLimit := -1 22 for i := 0; i < len(path); i++ { 23 ch := path[i] 24 if ch == ':' { 25 if len(path) <= i+2 || path[i+1] != '/' || path[i+2] != '/' { 26 return -1, fmt.Errorf("parsepath %s: a URL must start with 'scheme://'", path) 27 } 28 schemeLimit = i 29 break 30 } 31 if !((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '.' || ch == '+' || ch == '=') { 32 break 33 } 34 } 35 if schemeLimit == -1 { 36 return 0, nil 37 } 38 return schemeLimit, nil 39 } 40 41 // ParsePath parses "path" and find the namespace object that can handle the 42 // path. The path can be of form either "scheme://path" just 43 // "path0/.../pathN". The latter indicates a local file. 44 // 45 // On success, "schema" will be the schema part of the path. "suffix" will be 46 // the path part after the scheme://. For example, ParsePath("s3://key/bucket") 47 // will return ("s3", "key/bucket", nil). 48 // 49 // For a local-filesystem path, this function returns ("", path, nil). 50 func ParsePath(path string) (scheme, suffix string, err error) { 51 schemeLen, err := getURLScheme(path) 52 if err != nil { 53 return "", "", err 54 } 55 if schemeLen == 0 { 56 return "", path, nil 57 } 58 return path[:schemeLen], path[schemeLen+3:], nil 59 } 60 61 // MustParsePath is similar to ParsePath, but crashes the process on error. 62 func MustParsePath(path string) (scheme, suffix string) { 63 scheme, suffix, err := ParsePath(path) 64 if err != nil { 65 panic(err) 66 } 67 return scheme, suffix 68 } 69 70 // Base returns the last element of the path. It is the same as filepath.Base 71 // for a local filesystem path. Else, it acts like filepath.Base, with the 72 // following differences: (1) the path separator is always '/'. (2) if the URL 73 // suffix is empty, it returns the path itself. 74 // 75 // Example: 76 // file.Base("s3://") returns "s3://". 77 // file.Base("s3://foo/hah/") returns "hah". 78 func Base(path string) string { 79 scheme, suffix, err := ParsePath(path) 80 if scheme == "" || err != nil { 81 return filepath.Base(path) 82 } 83 if suffix == "" { 84 // path is "s3://". 85 return path 86 } 87 return filepath.Base(suffix) 88 } 89 90 // Dir returns the all but the last element of the path. It the same as 91 // filepath.Dir for a local filesystem path. Else, it acts like filepath.Base, 92 // with the following differences: (1) the path separator is always '/'. (2) if 93 // the URL suffix is empty, it returns the path itself. (3) The path is not 94 // cleaned; for example repeated "/"s in the path is preserved. 95 func Dir(path string) string { 96 scheme, suffix, err := ParsePath(path) 97 if scheme == "" || err != nil { 98 return filepath.Dir(path) 99 } 100 for i := len(suffix) - 1; i >= 0; i-- { 101 if suffix[i] == urlSeparator { 102 for i > 0 && suffix[i] == urlSeparator { 103 i-- 104 } 105 return path[:len(scheme)+3+i+1] 106 } 107 } 108 return path[:len(scheme)+3] 109 } 110 111 // Join joins any number of path elements into a single path, adding a 112 // separator if necessary. It works like filepath.Join, with the following 113 // differences: 114 // 1. The path separator is always '/' (so this doesn't work on Windows). 115 // 2. The interior of each element is not cleaned; for example if an element 116 // contains repeated "/"s in the middle, they are preserved. 117 // 3. If elems[0] has a prefix of the form "<scheme>://" or "//", that prefix 118 // is retained. (A prefix of "/" is also retained; that matches 119 // filepath.Join's behavior.) 120 func Join(elems ...string) string { 121 if len(elems) == 0 { 122 return filepath.Join(elems...) 123 } 124 var prefix string 125 n, err := getURLScheme(elems[0]) 126 if err == nil && n > 0 { 127 prefix = elems[0][:n+3] 128 elems[0] = elems[0][n+3:] 129 } else if len(elems[0]) > 0 && elems[0][0] == '/' { 130 if elems[0][1] == '/' { 131 prefix = "//" 132 elems[0] = elems[0][2:] 133 } else { 134 prefix = "/" 135 elems[0] = elems[0][1:] 136 } 137 } 138 139 // Remove leading (optional) or trailing "/"s from the string. 140 clean := func(p string) string { 141 var s, e int 142 for s = 0; s < len(p); s++ { 143 if p[s] != urlSeparator { 144 break 145 } 146 } 147 for e = len(p) - 1; e >= 0; e-- { 148 if p[e] != urlSeparator { 149 break 150 } 151 } 152 if e < s { 153 return "" 154 } 155 return p[s : e+1] 156 } 157 158 newElems := make([]string, 0, len(elems)) 159 for i := 0; i < len(elems); i++ { 160 e := clean(elems[i]) 161 if e != "" { 162 newElems = append(newElems, e) 163 } 164 } 165 return prefix + strings.Join(newElems, "/") 166 } 167 168 // IsAbs returns true if pathname is absolute local path. For non-local file, it 169 // always returns true. 170 func IsAbs(path string) bool { 171 if scheme, _, err := ParsePath(path); scheme == "" || err != nil { 172 return filepath.IsAbs(path) 173 } 174 return true 175 }