github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/dsref/parse.go (about) 1 package dsref 2 3 import ( 4 "fmt" 5 "regexp" 6 "unicode" 7 ) 8 9 // These functions parse a string to create a dsref. We refer to a "human-friendly reference" 10 // as one with only a username and dataset name, such as "my_user/my_dataset". A "full reference" 11 // can also contain a "concrete reference", which includes an optional datasetID plus a network 12 // and a "commit hash". 13 // 14 // Parse will parse a human-friendly reference, or only a concrete reference, or a full reference. 15 // 16 // ParseHumanFriendly will only successfully parse a human-friendly reference, and nothing else. 17 // 18 // The grammar is here: 19 // 20 // <dsref> = <humanFriendlyPortion> [ <concreteRef> ] | <concreteRef> 21 // <humanFriendlyPortion> = <validName> '/' <validName> 22 // <concreteRef> = '@' [ <datasetID> ] '/' <network> '/' <commitHash> 23 // 24 // Some examples of valid references: 25 // me/dataset 26 // username/dataset 27 // @/ipfs/QmSome1Commit2Hash3 28 // @datasetIdenfitier/ipfs/QmSome1Commit2Hash3 29 // username/dataset@QmProfile4ID5/ipfs/QmSome1Commit2Hash3 30 // An invalid reference: 31 // /ipfs/QmSome1Commit2Hash3 32 33 const ( 34 alphaNumeric = `[a-zA-Z][\w-]*` 35 alphaNumericDsname = `[a-zA-Z][\w-]{0,143}` 36 b58IdRSA = `Qm[0-9a-zA-Z]{0,44}` 37 b58IdED = `12D[0-9a-zA-Z]{0,50}` 38 b32LogbookID = `[a-z2-7]{0,52}` 39 ) 40 41 var ( 42 validName = regexp.MustCompile(`^` + alphaNumeric) 43 dsNameCheck = regexp.MustCompile(`^` + alphaNumericDsname + `$`) 44 concreteRef = regexp.MustCompile(`^@(` + b32LogbookID + `|` + b58IdRSA + `|` + b58IdED + `)?\/(` + alphaNumeric + `)\/(` + b58IdRSA + `|` + b58IdED + `)`) 45 b58StrictCheckRSA = regexp.MustCompile(`^Qm[1-9A-HJ-NP-Za-km-z]*$`) 46 b58StrictCheckED = regexp.MustCompile(`^12D[1-9A-HJ-NP-Za-km-z]*$`) 47 b32LowerCheck = regexp.MustCompile(`^[a-z2-7]*$`) 48 49 // ErrEmptyRef is an error for when a reference is empty 50 ErrEmptyRef = fmt.Errorf("empty reference") 51 // ErrParseError is an error returned when parsing fails 52 ErrParseError = fmt.Errorf("could not parse ref") 53 // ErrUnexpectedChar is an error when a character is unexpected, topic string must be non-empty 54 ErrUnexpectedChar = fmt.Errorf("unexpected character") 55 // ErrNotHumanFriendly is an error returned when a reference is not human-friendly 56 ErrNotHumanFriendly = fmt.Errorf("unexpected character '@', ref can only have username/name") 57 // ErrBadCaseName is the error when a bad case is used in the dataset name 58 ErrBadCaseName = fmt.Errorf("dataset name may not contain any upper-case letters") 59 // ErrBadCaseUsername is for when a username contains upper-case letters 60 ErrBadCaseUsername = fmt.Errorf("username may not contain any upper-case letters") 61 // ErrBadCaseShouldRename is the error when a dataset should be renamed to not use upper case letters 62 ErrBadCaseShouldRename = fmt.Errorf("dataset name should not contain any upper-case letters, rename it to only use lower-case letters, numbers, and underscores") 63 // ErrDescribeValidName is an error describing a valid dataset name 64 ErrDescribeValidName = fmt.Errorf("dataset name must start with a lower-case letter, and only contain lower-case letters, numbers, dashes, and underscore. Maximum length is 144 characters") 65 // ErrDescribeValidUsername describes valid username 66 ErrDescribeValidUsername = fmt.Errorf("username must start with a lower-case letter, and only contain lower-case letters, numbers, dashes, and underscores") 67 ) 68 69 // Parse a reference from a string 70 func Parse(text string) (Ref, error) { 71 return parse(text, false) 72 } 73 74 // ParsePeerRef a reference from a string where the dataset name is non-mandatory 75 func ParsePeerRef(text string) (Ref, error) { 76 return parse(text, true) 77 } 78 79 func parse(text string, peerRef bool) (Ref, error) { 80 var r Ref 81 origLength := len(text) 82 if origLength == 0 { 83 return r, ErrEmptyRef 84 } 85 86 remain, partial, err := parseHumanFriendlyPortion(text) 87 if err == nil { 88 text = remain 89 r.Username = partial.Username 90 r.Name = partial.Name 91 } else if err.Error() == needUsernameSeparatedErr && peerRef == true { 92 return partial, nil 93 } else if err == ErrUnexpectedChar { 94 // This error must only be returned when the topic string is non-empty, so it's safe to 95 // index it at position 0. 96 return r, NewParseError("%s at position %d: '%c'", err, len(text)-len(remain), remain[0]) 97 } else if err != ErrParseError { 98 return r, err 99 } 100 101 remain, partial, err = parseConcreteRef(text) 102 if err == nil { 103 text = remain 104 r.ProfileID = partial.ProfileID 105 r.InitID = partial.InitID 106 r.Path = partial.Path 107 } else if err != ErrParseError { 108 return r, err 109 } 110 111 if text != "" { 112 pos := origLength - len(text) 113 return r, NewParseError("unexpected character at position %d: '%c'", pos, text[0]) 114 } 115 116 // Dataset names are not supposed to contain upper-case characters. For now, return an error 117 // but also the reference; callers should display a warning, but continue to work for now. 118 for _, rune := range r.Name { 119 if unicode.IsUpper(rune) { 120 return r, ErrBadCaseName 121 } 122 } 123 124 return r, nil 125 } 126 127 // ParseHumanFriendly parses a reference that only has a username and a dataset name 128 func ParseHumanFriendly(text string) (Ref, error) { 129 var r Ref 130 origLength := len(text) 131 if origLength == 0 { 132 return r, ErrEmptyRef 133 } 134 135 remain, partial, err := parseHumanFriendlyPortion(text) 136 if err == nil { 137 text = remain 138 r.Username = partial.Username 139 r.Name = partial.Name 140 } else if err != ErrParseError { 141 return r, err 142 } 143 144 if text != "" { 145 if text[0] == '@' { 146 return r, ErrNotHumanFriendly 147 } 148 pos := origLength - len(text) 149 return r, NewParseError("unexpected character at position %d: '%c'", pos, text[0]) 150 } 151 152 // Dataset names are not supposed to contain upper-case characters. For now, return an error 153 // but also the reference; callers should display a warning, but continue to work for now. 154 for _, rune := range r.Name { 155 if unicode.IsUpper(rune) { 156 return r, ErrBadCaseName 157 } 158 } 159 160 return r, nil 161 } 162 163 // ParseError is an error for when a dataset reference fails to parse 164 type ParseError struct { 165 Message string 166 } 167 168 // Error renders the ParseError as a string 169 func (e *ParseError) Error() string { 170 return e.Message 171 } 172 173 // NewParseError returns a new ParseError, its parameters are a format string and arguments 174 func NewParseError(template string, args ...interface{}) error { 175 return &ParseError{Message: fmt.Sprintf(template, args...)} 176 } 177 178 // MustParse parses a dsref from a string, or panics if it fails 179 func MustParse(text string) Ref { 180 ref, err := Parse(text) 181 if err != nil { 182 panic(err) 183 } 184 return ref 185 } 186 187 // IsRefString returns whether the string parses as a valid reference 188 func IsRefString(text string) bool { 189 _, err := Parse(text) 190 return err == nil || err == ErrBadCaseName 191 } 192 193 // IsValidName returns whether the dataset name is valid 194 func IsValidName(text string) bool { 195 return dsNameCheck.Match([]byte(text)) 196 } 197 198 // EnsureValidName returns nil if the name is valid, and an error otherwise 199 func EnsureValidName(text string) error { 200 if !dsNameCheck.Match([]byte(text)) { 201 return ErrDescribeValidName 202 } 203 204 // Dataset names are not supposed to contain upper-case characters. For now, return an error 205 // but also the reference; callers should display a warning, but continue to work for now. 206 for _, r := range text { 207 if unicode.IsUpper(r) { 208 return ErrBadCaseName 209 } 210 } 211 212 return nil 213 } 214 215 // EnsureValidUsername is the same as EnsureValidName but returns a different error 216 func EnsureValidUsername(text string) error { 217 err := EnsureValidName(text) 218 if err == ErrDescribeValidName { 219 return ErrDescribeValidUsername 220 } 221 if err == ErrBadCaseName { 222 return ErrBadCaseUsername 223 } 224 return err 225 } 226 227 const needUsernameSeparatedErr = "need username separated by '/' from dataset name" 228 229 // parse the front of a dataset reference, the human friendly portion 230 func parseHumanFriendlyPortion(text string) (string, Ref, error) { 231 var r Ref 232 // Parse as many alphaNumeric characters as possible for the username 233 match := validName.FindString(text) 234 if match == "" { 235 return text, r, ErrParseError 236 } 237 r.Username = match 238 text = text[len(match):] 239 // Check if the remaining text is empty, or there's not a slash next 240 if text == "" { 241 return text, r, NewParseError(needUsernameSeparatedErr) 242 } else if text[0] != '/' { 243 return text, r, ErrUnexpectedChar 244 } 245 text = text[1:] 246 // Parse as many alphaNumeric characters as possible for the dataset name 247 match = validName.FindString(text) 248 if match == "" { 249 return text, r, NewParseError("did not find valid dataset name") 250 } 251 r.Name = match 252 text = text[len(match):] 253 return text, r, nil 254 } 255 256 // parse the back of the dataset reference, the concrete path 257 func parseConcreteRef(text string) (string, Ref, error) { 258 var r Ref 259 matches := concreteRef.FindStringSubmatch(text) 260 if matches == nil { 261 return text, r, ErrParseError 262 } 263 if len(matches) != 4 { 264 return text, r, NewParseError("unexpected number of regex matches %d", len(matches)) 265 } 266 // TODO(b5): this is incorrect. We should be allowing any 2-to-4-character alphanumeric 267 // network identifier 268 if matches[2] != "mem" && matches[2] != "ipfs" { 269 return text, r, NewParseError("invalid network") 270 } 271 matchedLen := len(matches[0]) 272 if matches[1] != "" { 273 if b58StrictCheckRSA.FindString(matches[1]) != "" || b58StrictCheckED.FindString(matches[1]) != "" { 274 r.ProfileID = matches[1] 275 } else { 276 if b32LowerCheck.FindString(matches[1]) == "" { 277 return text, r, NewParseError("datasetID contains invalid base32 characters") 278 } 279 r.InitID = matches[1] 280 } 281 } 282 if matches[3] != "" && b58StrictCheckRSA.FindString(matches[3]) == "" && b58StrictCheckED.FindString(matches[3]) == "" { 283 return text, r, NewParseError("path contains invalid base58 characters") 284 } 285 r.Path = fmt.Sprintf("/%s/%s", matches[2], matches[3]) 286 return text[matchedLen:], r, nil 287 }