github.com/opentofu/opentofu@v1.7.1/internal/registry/regsrc/friendly_host.go (about) 1 // Copyright (c) The OpenTofu Authors 2 // SPDX-License-Identifier: MPL-2.0 3 // Copyright (c) 2023 HashiCorp, Inc. 4 // SPDX-License-Identifier: MPL-2.0 5 6 package regsrc 7 8 import ( 9 "regexp" 10 "strings" 11 12 svchost "github.com/hashicorp/terraform-svchost" 13 ) 14 15 var ( 16 // InvalidHostString is a placeholder returned when a raw host can't be 17 // converted by IDNA spec. It will never be returned for any host for which 18 // Valid() is true. 19 InvalidHostString = "<invalid host>" 20 21 // urlLabelEndSubRe is a sub-expression that matches any character that's 22 // allowed at the start or end of a URL label according to RFC1123. 23 urlLabelEndSubRe = "[0-9A-Za-z]" 24 25 // urlLabelEndSubRe is a sub-expression that matches any character that's 26 // allowed at in a non-start or end of a URL label according to RFC1123. 27 urlLabelMidSubRe = "[0-9A-Za-z-]" 28 29 // urlLabelUnicodeSubRe is a sub-expression that matches any non-ascii char 30 // in an IDN (Unicode) display URL. It's not strict - there are only ~15k 31 // valid Unicode points in IDN RFC (some with conditions). We are just going 32 // with being liberal with matching and then erroring if we fail to convert 33 // to punycode later (which validates chars fully). This at least ensures 34 // ascii chars dissalowed by the RC1123 parts above don't become legal 35 // again. 36 urlLabelUnicodeSubRe = "[^[:ascii:]]" 37 38 // hostLabelSubRe is the sub-expression that matches a valid hostname label. 39 // It does not anchor the start or end so it can be composed into more 40 // complex RegExps below. Note that for sanity we don't handle disallowing 41 // raw punycode in this regexp (esp. since re2 doesn't support negative 42 // lookbehind, but we can capture it's presence here to check later). 43 hostLabelSubRe = "" + 44 // Match valid initial char, or unicode char 45 "(?:" + urlLabelEndSubRe + "|" + urlLabelUnicodeSubRe + ")" + 46 // Optionally, match 0 to 61 valid URL or Unicode chars, 47 // followed by one valid end char or unicode char 48 "(?:" + 49 "(?:" + urlLabelMidSubRe + "|" + urlLabelUnicodeSubRe + "){0,61}" + 50 "(?:" + urlLabelEndSubRe + "|" + urlLabelUnicodeSubRe + ")" + 51 ")?" 52 53 // hostSubRe is the sub-expression that matches a valid host prefix. 54 // Allows custom port. 55 hostSubRe = hostLabelSubRe + "(?:\\." + hostLabelSubRe + ")+(?::\\d+)?" 56 57 // hostRe is a regexp that matches a valid host prefix. Additional 58 // validation of unicode strings is needed for matches. 59 hostRe = regexp.MustCompile("^" + hostSubRe + "$") 60 ) 61 62 // FriendlyHost describes a registry instance identified in source strings by a 63 // simple bare hostname like registry.opentofu.org. 64 type FriendlyHost struct { 65 Raw string 66 } 67 68 func NewFriendlyHost(host string) *FriendlyHost { 69 return &FriendlyHost{Raw: host} 70 } 71 72 // ParseFriendlyHost attempts to parse a valid "friendly host" prefix from the 73 // given string. If no valid prefix is found, host will be nil and rest will 74 // contain the full source string. The host prefix must terminate at the end of 75 // the input or at the first / character. If one or more characters exist after 76 // the first /, they will be returned as rest (without the / delimiter). 77 // Hostnames containing punycode WILL be parsed successfully since they may have 78 // come from an internal normalized source string, however should be considered 79 // invalid if the string came from a user directly. This must be checked 80 // explicitly for user-input strings by calling Valid() on the 81 // returned host. 82 func ParseFriendlyHost(source string) (host *FriendlyHost, rest string) { 83 parts := strings.SplitN(source, "/", 2) 84 85 if hostRe.MatchString(parts[0]) { 86 host = &FriendlyHost{Raw: parts[0]} 87 if len(parts) == 2 { 88 rest = parts[1] 89 } 90 return 91 } 92 93 // No match, return whole string as rest along with nil host 94 rest = source 95 return 96 } 97 98 // Valid returns whether the host prefix is considered valid in any case. 99 // Example of invalid prefixes might include ones that don't conform to the host 100 // name specifications. Not that IDN prefixes containing punycode are not valid 101 // input which we expect to always be in user-input or normalised display form. 102 func (h *FriendlyHost) Valid() bool { 103 return svchost.IsValid(h.Raw) 104 } 105 106 // Display returns the host formatted for display to the user in CLI or web 107 // output. 108 func (h *FriendlyHost) Display() string { 109 return svchost.ForDisplay(h.Raw) 110 } 111 112 // Normalized returns the host formatted for internal reference or comparison. 113 func (h *FriendlyHost) Normalized() string { 114 host, err := svchost.ForComparison(h.Raw) 115 if err != nil { 116 return InvalidHostString 117 } 118 return string(host) 119 } 120 121 // String returns the host formatted as the user originally typed it assuming it 122 // was parsed from user input. 123 func (h *FriendlyHost) String() string { 124 return h.Raw 125 } 126 127 // Equal compares the FriendlyHost against another instance taking normalization 128 // into account. Invalid hosts cannot be compared and will always return false. 129 func (h *FriendlyHost) Equal(other *FriendlyHost) bool { 130 if other == nil { 131 return false 132 } 133 134 otherHost, err := svchost.ForComparison(other.Raw) 135 if err != nil { 136 return false 137 } 138 139 host, err := svchost.ForComparison(h.Raw) 140 if err != nil { 141 return false 142 } 143 144 return otherHost == host 145 }