cuelabs.dev/go/oci/ociregistry@v0.0.0-20240906074133-82eb438dd565/ociref/reference.go (about) 1 // Copyright 2023 CUE Labs AG 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package ociref supports parsing cross-registry OCI registry references. 16 package ociref 17 18 import ( 19 "fmt" 20 "regexp" 21 "strings" 22 "sync" 23 24 "github.com/opencontainers/go-digest" 25 ) 26 27 // The following regular expressions derived from code in the 28 // [github.com/distribution/distribution/v3/reference] package. 29 const ( 30 // alphanumeric defines the alphanumeric atom, typically a 31 // component of names. This only allows lower case characters and digits. 32 alphanumeric = `[a-z0-9]+` 33 34 // separator defines the separators allowed to be embedded in name 35 // components. This allows one period, one or two underscore and multiple 36 // dashes. Repeated dashes and underscores are intentionally treated 37 // differently. In order to support valid hostnames as name components, 38 // supporting repeated dash was added. Additionally double underscore is 39 // now allowed as a separator to loosen the restriction for previously 40 // supported names. 41 // TODO the distribution spec doesn't allow these variations. 42 separator = `(?:[._]|__|[-]+)` 43 44 // domainNameComponent restricts the registry domain component of a 45 // repository name to start with a component as defined by DomainRegexp. 46 domainNameComponent = `(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)` 47 48 // ipv6address are enclosed between square brackets and may be represented 49 // in many ways, see rfc5952. Only IPv6 in compressed or uncompressed format 50 // are allowed, IPv6 zone identifiers (rfc6874) or Special addresses such as 51 // IPv4-Mapped are deliberately excluded. 52 ipv6address = `(?:\[[a-fA-F0-9:]+\])` 53 54 // optionalPort matches an optional port-number including the port separator 55 // (e.g. ":80"). 56 port = `[0-9]+` 57 58 // domainName defines the structure of potential domain components 59 // that may be part of image names. This is purposely a subset of what is 60 // allowed by DNS to ensure backwards compatibility with Docker image 61 // names. This includes IPv4 addresses on decimal format. 62 // 63 // Note: we purposely exclude domain names without dots here, 64 // because otherwise we can't tell if the first component is 65 // a host name or not when it doesn't have a port. 66 // When it does have a port, the distinction is clear. 67 // 68 domainName = `(?:` + domainNameComponent + `(?:\.` + domainNameComponent + `)+` + `)` 69 70 // host defines the structure of potential domains based on the URI 71 // Host subcomponent on rfc3986. It may be a subset of DNS domain name, 72 // or an IPv4 address in decimal format, or an IPv6 address between square 73 // brackets (excluding zone identifiers as defined by rfc6874 or special 74 // addresses such as IPv4-Mapped). 75 host = `(?:` + domainName + `|` + ipv6address + `)` 76 77 // allowed by the URI Host subcomponent on rfc3986 to ensure backwards 78 // compatibility with Docker image names. 79 // Note: that we require the port when the host name looks like a regular 80 // name component. 81 domainAndPort = `(?:` + host + `(?:` + `:` + port + `)?` + `|` + domainNameComponent + `:` + port + `)` 82 83 // pathComponent restricts path-components to start with an alphanumeric 84 // character, with following parts able to be separated by a separator 85 // (one period, one or two underscore and multiple dashes). 86 pathComponent = `(?:` + alphanumeric + `(?:` + separator + alphanumeric + `)*` + `)` 87 88 // repoName matches the name of a repository. It consists of one 89 // or more forward slash (/) delimited path-components: 90 // 91 // pathComponent[[/pathComponent] ...] // e.g., "library/ubuntu" 92 repoName = pathComponent + `(?:` + `/` + pathComponent + `)*` 93 ) 94 95 var referencePat = sync.OnceValue(func() *regexp.Regexp { 96 return regexp.MustCompile( 97 `^(?:` + 98 `(?:` + `(` + domainAndPort + `)` + `/` + `)?` + // capture 1: host 99 `(` + repoName + `)` + // capture 2: repository name 100 `(?:` + `:([^@]+))?` + // capture 3: tag; rely on Go logic to test validity. 101 `(?:` + `@(.+))?` + // capture 4: digest; rely on go-digest to find issues 102 `)$`, 103 ) 104 }) 105 106 var hostPat = sync.OnceValue(func() *regexp.Regexp { 107 return regexp.MustCompile(`^(?:` + domainAndPort + `)$`) 108 }) 109 var repoPat = sync.OnceValue(func() *regexp.Regexp { 110 return regexp.MustCompile(`^(?:` + repoName + `)$`) 111 }) 112 113 // Reference represents an entry in an OCI repository. 114 type Reference struct { 115 // Host holds the host name of the registry 116 // within which the repository is stored, optionally in 117 // the form host:port. This might be empty. 118 Host string 119 120 // Repository holds the repository name. 121 Repository string 122 123 // Tag holds the TAG part of a :TAG or :TAG@DIGEST reference. 124 // When Digest is set as well as Tag, the tag will be verified 125 // to exist and have the expected digest. 126 Tag string 127 128 // Digest holds the DIGEST part of an @DIGEST reference 129 // or of a :TAG@DIGEST reference. 130 Digest Digest 131 } 132 133 type Digest = digest.Digest 134 135 // IsValidHost reports whether s is a valid host (or host:port) part of a reference string. 136 func IsValidHost(s string) bool { 137 return hostPat().MatchString(s) 138 } 139 140 // IsValidHost reports whether s is a valid repository part 141 // of a reference string. 142 func IsValidRepository(s string) bool { 143 return repoPat().MatchString(s) 144 } 145 146 // IsValidTag reports whether s is a valid reference tag. 147 func IsValidTag(s string) bool { 148 return checkTag(s) == nil 149 } 150 151 // IsValidDigest reports whether the digest d is well formed. 152 func IsValidDigest(d string) bool { 153 _, err := digest.Parse(d) 154 return err == nil 155 } 156 157 // Parse parses a reference string that must include 158 // a host name (or host:port pair) component. 159 // 160 // It is represented in string form as HOST[:PORT]/NAME[:TAG|@DIGEST] 161 // form: the same syntax accepted by "docker pull". 162 // Unlike "docker pull" however, there is no default registry: when 163 // presented with a bare repository name, Parse will return an error. 164 func Parse(refStr string) (Reference, error) { 165 ref, err := ParseRelative(refStr) 166 if err != nil { 167 return Reference{}, err 168 } 169 if ref.Host == "" { 170 return Reference{}, fmt.Errorf("reference does not contain host name") 171 } 172 return ref, nil 173 } 174 175 // ParseRelative parses a reference string that may 176 // or may not include a host name component. 177 // 178 // It is represented in string form as [HOST[:PORT]/]NAME[:TAG|@DIGEST] 179 // form: the same syntax accepted by "docker pull". 180 // Unlike "docker pull" however, there is no default registry: when 181 // presented with a bare repository name, the Host field will be empty. 182 func ParseRelative(refStr string) (Reference, error) { 183 m := referencePat().FindStringSubmatch(refStr) 184 if m == nil { 185 return Reference{}, fmt.Errorf("invalid reference syntax (%q)", refStr) 186 } 187 var ref Reference 188 ref.Host, ref.Repository, ref.Tag, ref.Digest = m[1], m[2], m[3], Digest(m[4]) 189 // Check lengths and digest: we don't check these as part of the regexp 190 // because it's more efficient to do it in Go and we get 191 // nicer error messages as a result. 192 if len(ref.Digest) > 0 { 193 if err := ref.Digest.Validate(); err != nil { 194 return Reference{}, fmt.Errorf("invalid digest %q: %v", ref.Digest, err) 195 } 196 } 197 if len(ref.Tag) > 0 { 198 if err := checkTag(ref.Tag); err != nil { 199 return Reference{}, err 200 } 201 } 202 if len(ref.Repository) > 255 { 203 return Reference{}, fmt.Errorf("repository name too long") 204 } 205 return ref, nil 206 } 207 208 func checkTag(s string) error { 209 if len(s) > 128 { 210 return fmt.Errorf("tag too long") 211 } 212 if !isWord(s[0]) { 213 return fmt.Errorf("tag %q does not start with word character", s) 214 } 215 for i := 1; i < len(s); i++ { 216 c := s[i] 217 if !isWord(c) && c != '.' && c != '-' { 218 return fmt.Errorf("tag %q contains invalid invalid character %q", s, c) 219 } 220 } 221 return nil 222 } 223 224 func isWord(c byte) bool { 225 return c == '_' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') 226 } 227 228 // String returns the string form of a reference in the form 229 // 230 // [HOST/]NAME[:TAG|@DIGEST] 231 func (ref Reference) String() string { 232 var buf strings.Builder 233 buf.Grow(len(ref.Host) + 1 + len(ref.Repository) + 1 + len(ref.Tag) + 1 + len(ref.Digest)) 234 if ref.Host != "" { 235 buf.WriteString(ref.Host) 236 buf.WriteByte('/') 237 } 238 buf.WriteString(ref.Repository) 239 if len(ref.Tag) > 0 { 240 buf.WriteByte(':') 241 buf.WriteString(ref.Tag) 242 } 243 if len(ref.Digest) > 0 { 244 buf.WriteByte('@') 245 buf.WriteString(string(ref.Digest)) 246 } 247 return buf.String() 248 }