cuelabs.dev/go/oci/ociregistry@v0.0.0-20240906074133-82eb438dd565/ociref/reference.go (about)

     1  // Copyright 2023 CUE Labs AG
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package ociref supports parsing cross-registry OCI registry references.
    16  package ociref
    17  
    18  import (
    19  	"fmt"
    20  	"regexp"
    21  	"strings"
    22  	"sync"
    23  
    24  	"github.com/opencontainers/go-digest"
    25  )
    26  
    27  // The following regular expressions derived from code in the
    28  // [github.com/distribution/distribution/v3/reference] package.
    29  const (
    30  	// alphanumeric defines the alphanumeric atom, typically a
    31  	// component of names. This only allows lower case characters and digits.
    32  	alphanumeric = `[a-z0-9]+`
    33  
    34  	// separator defines the separators allowed to be embedded in name
    35  	// components. This allows one period, one or two underscore and multiple
    36  	// dashes. Repeated dashes and underscores are intentionally treated
    37  	// differently. In order to support valid hostnames as name components,
    38  	// supporting repeated dash was added. Additionally double underscore is
    39  	// now allowed as a separator to loosen the restriction for previously
    40  	// supported names.
    41  	// TODO the distribution spec doesn't allow these variations.
    42  	separator = `(?:[._]|__|[-]+)`
    43  
    44  	// domainNameComponent restricts the registry domain component of a
    45  	// repository name to start with a component as defined by DomainRegexp.
    46  	domainNameComponent = `(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)`
    47  
    48  	// ipv6address are enclosed between square brackets and may be represented
    49  	// in many ways, see rfc5952. Only IPv6 in compressed or uncompressed format
    50  	// are allowed, IPv6 zone identifiers (rfc6874) or Special addresses such as
    51  	// IPv4-Mapped are deliberately excluded.
    52  	ipv6address = `(?:\[[a-fA-F0-9:]+\])`
    53  
    54  	// optionalPort matches an optional port-number including the port separator
    55  	// (e.g. ":80").
    56  	port = `[0-9]+`
    57  
    58  	// domainName defines the structure of potential domain components
    59  	// that may be part of image names. This is purposely a subset of what is
    60  	// allowed by DNS to ensure backwards compatibility with Docker image
    61  	// names. This includes IPv4 addresses on decimal format.
    62  	//
    63  	// Note: we purposely exclude domain names without dots here,
    64  	// because otherwise we can't tell if the first component is
    65  	// a host name or not when it doesn't have a port.
    66  	// When it does have a port, the distinction is clear.
    67  	//
    68  	domainName = `(?:` + domainNameComponent + `(?:\.` + domainNameComponent + `)+` + `)`
    69  
    70  	// host defines the structure of potential domains based on the URI
    71  	// Host subcomponent on rfc3986. It may be a subset of DNS domain name,
    72  	// or an IPv4 address in decimal format, or an IPv6 address between square
    73  	// brackets (excluding zone identifiers as defined by rfc6874 or special
    74  	// addresses such as IPv4-Mapped).
    75  	host = `(?:` + domainName + `|` + ipv6address + `)`
    76  
    77  	// allowed by the URI Host subcomponent on rfc3986 to ensure backwards
    78  	// compatibility with Docker image names.
    79  	// Note: that we require the port when the host name looks like a regular
    80  	// name component.
    81  	domainAndPort = `(?:` + host + `(?:` + `:` + port + `)?` + `|` + domainNameComponent + `:` + port + `)`
    82  
    83  	// pathComponent restricts path-components to start with an alphanumeric
    84  	// character, with following parts able to be separated by a separator
    85  	// (one period, one or two underscore and multiple dashes).
    86  	pathComponent = `(?:` + alphanumeric + `(?:` + separator + alphanumeric + `)*` + `)`
    87  
    88  	// repoName matches the name of a repository. It consists of one
    89  	// or more forward slash (/) delimited path-components:
    90  	//
    91  	//	pathComponent[[/pathComponent] ...] // e.g., "library/ubuntu"
    92  	repoName = pathComponent + `(?:` + `/` + pathComponent + `)*`
    93  )
    94  
    95  var referencePat = sync.OnceValue(func() *regexp.Regexp {
    96  	return regexp.MustCompile(
    97  		`^(?:` +
    98  			`(?:` + `(` + domainAndPort + `)` + `/` + `)?` + // capture 1: host
    99  			`(` + repoName + `)` + // capture 2: repository name
   100  			`(?:` + `:([^@]+))?` + // capture 3: tag; rely on Go logic to test validity.
   101  			`(?:` + `@(.+))?` + // capture 4: digest; rely on go-digest to find issues
   102  			`)$`,
   103  	)
   104  })
   105  
   106  var hostPat = sync.OnceValue(func() *regexp.Regexp {
   107  	return regexp.MustCompile(`^(?:` + domainAndPort + `)$`)
   108  })
   109  var repoPat = sync.OnceValue(func() *regexp.Regexp {
   110  	return regexp.MustCompile(`^(?:` + repoName + `)$`)
   111  })
   112  
   113  // Reference represents an entry in an OCI repository.
   114  type Reference struct {
   115  	// Host holds the host name of the registry
   116  	// within which the repository is stored, optionally in
   117  	// the form host:port. This might be empty.
   118  	Host string
   119  
   120  	// Repository holds the repository name.
   121  	Repository string
   122  
   123  	// Tag holds the TAG part of a :TAG or :TAG@DIGEST reference.
   124  	// When Digest is set as well as Tag, the tag will be verified
   125  	// to exist and have the expected digest.
   126  	Tag string
   127  
   128  	// Digest holds the DIGEST part of an @DIGEST reference
   129  	// or of a :TAG@DIGEST reference.
   130  	Digest Digest
   131  }
   132  
   133  type Digest = digest.Digest
   134  
   135  // IsValidHost reports whether s is a valid host (or host:port) part of a reference string.
   136  func IsValidHost(s string) bool {
   137  	return hostPat().MatchString(s)
   138  }
   139  
   140  // IsValidHost reports whether s is a valid repository part
   141  // of a reference string.
   142  func IsValidRepository(s string) bool {
   143  	return repoPat().MatchString(s)
   144  }
   145  
   146  // IsValidTag reports whether s is a valid reference tag.
   147  func IsValidTag(s string) bool {
   148  	return checkTag(s) == nil
   149  }
   150  
   151  // IsValidDigest reports whether the digest d is well formed.
   152  func IsValidDigest(d string) bool {
   153  	_, err := digest.Parse(d)
   154  	return err == nil
   155  }
   156  
   157  // Parse parses a reference string that must include
   158  // a host name (or host:port pair) component.
   159  //
   160  // It is represented in string form as HOST[:PORT]/NAME[:TAG|@DIGEST]
   161  // form: the same syntax accepted by "docker pull".
   162  // Unlike "docker pull" however, there is no default registry: when
   163  // presented with a bare repository name, Parse will return an error.
   164  func Parse(refStr string) (Reference, error) {
   165  	ref, err := ParseRelative(refStr)
   166  	if err != nil {
   167  		return Reference{}, err
   168  	}
   169  	if ref.Host == "" {
   170  		return Reference{}, fmt.Errorf("reference does not contain host name")
   171  	}
   172  	return ref, nil
   173  }
   174  
   175  // ParseRelative parses a reference string that may
   176  // or may not include a host name component.
   177  //
   178  // It is represented in string form as [HOST[:PORT]/]NAME[:TAG|@DIGEST]
   179  // form: the same syntax accepted by "docker pull".
   180  // Unlike "docker pull" however, there is no default registry: when
   181  // presented with a bare repository name, the Host field will be empty.
   182  func ParseRelative(refStr string) (Reference, error) {
   183  	m := referencePat().FindStringSubmatch(refStr)
   184  	if m == nil {
   185  		return Reference{}, fmt.Errorf("invalid reference syntax (%q)", refStr)
   186  	}
   187  	var ref Reference
   188  	ref.Host, ref.Repository, ref.Tag, ref.Digest = m[1], m[2], m[3], Digest(m[4])
   189  	// Check lengths and digest: we don't check these as part of the regexp
   190  	// because it's more efficient to do it in Go and we get
   191  	// nicer error messages as a result.
   192  	if len(ref.Digest) > 0 {
   193  		if err := ref.Digest.Validate(); err != nil {
   194  			return Reference{}, fmt.Errorf("invalid digest %q: %v", ref.Digest, err)
   195  		}
   196  	}
   197  	if len(ref.Tag) > 0 {
   198  		if err := checkTag(ref.Tag); err != nil {
   199  			return Reference{}, err
   200  		}
   201  	}
   202  	if len(ref.Repository) > 255 {
   203  		return Reference{}, fmt.Errorf("repository name too long")
   204  	}
   205  	return ref, nil
   206  }
   207  
   208  func checkTag(s string) error {
   209  	if len(s) > 128 {
   210  		return fmt.Errorf("tag too long")
   211  	}
   212  	if !isWord(s[0]) {
   213  		return fmt.Errorf("tag %q does not start with word character", s)
   214  	}
   215  	for i := 1; i < len(s); i++ {
   216  		c := s[i]
   217  		if !isWord(c) && c != '.' && c != '-' {
   218  			return fmt.Errorf("tag %q contains invalid invalid character %q", s, c)
   219  		}
   220  	}
   221  	return nil
   222  }
   223  
   224  func isWord(c byte) bool {
   225  	return c == '_' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')
   226  }
   227  
   228  // String returns the string form of a reference in the form
   229  //
   230  //	[HOST/]NAME[:TAG|@DIGEST]
   231  func (ref Reference) String() string {
   232  	var buf strings.Builder
   233  	buf.Grow(len(ref.Host) + 1 + len(ref.Repository) + 1 + len(ref.Tag) + 1 + len(ref.Digest))
   234  	if ref.Host != "" {
   235  		buf.WriteString(ref.Host)
   236  		buf.WriteByte('/')
   237  	}
   238  	buf.WriteString(ref.Repository)
   239  	if len(ref.Tag) > 0 {
   240  		buf.WriteByte(':')
   241  		buf.WriteString(ref.Tag)
   242  	}
   243  	if len(ref.Digest) > 0 {
   244  		buf.WriteByte('@')
   245  		buf.WriteString(string(ref.Digest))
   246  	}
   247  	return buf.String()
   248  }