github.com/terramate-io/tf@v0.0.0-20230830114523-fce866b4dfcd/registry/regsrc/friendly_host.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package regsrc 5 6 import ( 7 "regexp" 8 "strings" 9 10 svchost "github.com/hashicorp/terraform-svchost" 11 ) 12 13 var ( 14 // InvalidHostString is a placeholder returned when a raw host can't be 15 // converted by IDNA spec. It will never be returned for any host for which 16 // Valid() is true. 17 InvalidHostString = "<invalid host>" 18 19 // urlLabelEndSubRe is a sub-expression that matches any character that's 20 // allowed at the start or end of a URL label according to RFC1123. 21 urlLabelEndSubRe = "[0-9A-Za-z]" 22 23 // urlLabelEndSubRe is a sub-expression that matches any character that's 24 // allowed at in a non-start or end of a URL label according to RFC1123. 25 urlLabelMidSubRe = "[0-9A-Za-z-]" 26 27 // urlLabelUnicodeSubRe is a sub-expression that matches any non-ascii char 28 // in an IDN (Unicode) display URL. It's not strict - there are only ~15k 29 // valid Unicode points in IDN RFC (some with conditions). We are just going 30 // with being liberal with matching and then erroring if we fail to convert 31 // to punycode later (which validates chars fully). This at least ensures 32 // ascii chars dissalowed by the RC1123 parts above don't become legal 33 // again. 34 urlLabelUnicodeSubRe = "[^[:ascii:]]" 35 36 // hostLabelSubRe is the sub-expression that matches a valid hostname label. 37 // It does not anchor the start or end so it can be composed into more 38 // complex RegExps below. Note that for sanity we don't handle disallowing 39 // raw punycode in this regexp (esp. since re2 doesn't support negative 40 // lookbehind, but we can capture it's presence here to check later). 41 hostLabelSubRe = "" + 42 // Match valid initial char, or unicode char 43 "(?:" + urlLabelEndSubRe + "|" + urlLabelUnicodeSubRe + ")" + 44 // Optionally, match 0 to 61 valid URL or Unicode chars, 45 // followed by one valid end char or unicode char 46 "(?:" + 47 "(?:" + urlLabelMidSubRe + "|" + urlLabelUnicodeSubRe + "){0,61}" + 48 "(?:" + urlLabelEndSubRe + "|" + urlLabelUnicodeSubRe + ")" + 49 ")?" 50 51 // hostSubRe is the sub-expression that matches a valid host prefix. 52 // Allows custom port. 53 hostSubRe = hostLabelSubRe + "(?:\\." + hostLabelSubRe + ")+(?::\\d+)?" 54 55 // hostRe is a regexp that matches a valid host prefix. Additional 56 // validation of unicode strings is needed for matches. 57 hostRe = regexp.MustCompile("^" + hostSubRe + "$") 58 ) 59 60 // FriendlyHost describes a registry instance identified in source strings by a 61 // simple bare hostname like registry.terraform.io. 62 type FriendlyHost struct { 63 Raw string 64 } 65 66 func NewFriendlyHost(host string) *FriendlyHost { 67 return &FriendlyHost{Raw: host} 68 } 69 70 // ParseFriendlyHost attempts to parse a valid "friendly host" prefix from the 71 // given string. If no valid prefix is found, host will be nil and rest will 72 // contain the full source string. The host prefix must terminate at the end of 73 // the input or at the first / character. If one or more characters exist after 74 // the first /, they will be returned as rest (without the / delimiter). 75 // Hostnames containing punycode WILL be parsed successfully since they may have 76 // come from an internal normalized source string, however should be considered 77 // invalid if the string came from a user directly. This must be checked 78 // explicitly for user-input strings by calling Valid() on the 79 // returned host. 80 func ParseFriendlyHost(source string) (host *FriendlyHost, rest string) { 81 parts := strings.SplitN(source, "/", 2) 82 83 if hostRe.MatchString(parts[0]) { 84 host = &FriendlyHost{Raw: parts[0]} 85 if len(parts) == 2 { 86 rest = parts[1] 87 } 88 return 89 } 90 91 // No match, return whole string as rest along with nil host 92 rest = source 93 return 94 } 95 96 // Valid returns whether the host prefix is considered valid in any case. 97 // Example of invalid prefixes might include ones that don't conform to the host 98 // name specifications. Not that IDN prefixes containing punycode are not valid 99 // input which we expect to always be in user-input or normalised display form. 100 func (h *FriendlyHost) Valid() bool { 101 return svchost.IsValid(h.Raw) 102 } 103 104 // Display returns the host formatted for display to the user in CLI or web 105 // output. 106 func (h *FriendlyHost) Display() string { 107 return svchost.ForDisplay(h.Raw) 108 } 109 110 // Normalized returns the host formatted for internal reference or comparison. 111 func (h *FriendlyHost) Normalized() string { 112 host, err := svchost.ForComparison(h.Raw) 113 if err != nil { 114 return InvalidHostString 115 } 116 return string(host) 117 } 118 119 // String returns the host formatted as the user originally typed it assuming it 120 // was parsed from user input. 121 func (h *FriendlyHost) String() string { 122 return h.Raw 123 } 124 125 // Equal compares the FriendlyHost against another instance taking normalization 126 // into account. Invalid hosts cannot be compared and will always return false. 127 func (h *FriendlyHost) Equal(other *FriendlyHost) bool { 128 if other == nil { 129 return false 130 } 131 132 otherHost, err := svchost.ForComparison(other.Raw) 133 if err != nil { 134 return false 135 } 136 137 host, err := svchost.ForComparison(h.Raw) 138 if err != nil { 139 return false 140 } 141 142 return otherHost == host 143 }