github.com/Racer159/jackal@v0.32.7-0.20240401174413-0bd2339e4f2e/src/pkg/transform/artifact.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // SPDX-FileCopyrightText: 2021-Present The Jackal Authors 3 4 // Package transform provides helper functions to transform URLs to airgap equivalents 5 package transform 6 7 import ( 8 "fmt" 9 "net/url" 10 "regexp" 11 "strings" 12 13 "github.com/defenseunicorns/pkg/helpers" 14 ) 15 16 const ( 17 // NoTransform is the URL prefix added to HTTP 3xx or text URLs that instructs Jackal not to transform on a subsequent request. 18 NoTransform = "/jackal-3xx-no-transform" 19 ) 20 21 // NoTransformTarget takes an address that Jackal should not transform, and removes the NoTransform prefix. 22 func NoTransformTarget(address string, path string) (*url.URL, error) { 23 targetURL, err := url.Parse(address) 24 if err != nil { 25 return nil, err 26 } 27 28 targetURL.Path = strings.TrimPrefix(path, NoTransform) 29 30 return targetURL, nil 31 } 32 33 // NpmTransformURL finds the npm API path on a given URL and transforms that to align with the offline registry. 34 func NpmTransformURL(targetBaseURL string, sourceURL string) (*url.URL, error) { 35 // For further explanation: https://regex101.com/r/RRyazc/3 36 // This regex was created with information from https://github.com/go-gitea/gitea/blob/0e58201d1a8247561809d832eb8f576e05e5d26d/routers/api/packages/api.go#L210 37 npmURLRegex := regexp.MustCompile(`^(?P<proto>[a-z]+:\/\/)(?P<hostPath>.+?)` + 38 `(?P<npmPath>(\/(@[\w\.\-\~]+(\/|%2[fF]))?[\w\.\-\~]+(\/-\/([\w\.\-\~]+\/)?[\w\.\-\~]+\.[\w]+)?(\/-rev\/.+)?)|(\/-\/(npm|v1|user|package)\/.+))$`) 39 40 return transformRegistryPath(targetBaseURL, sourceURL, npmURLRegex, "npmPath", "npm") 41 } 42 43 // PipTransformURL finds the pip API path on a given URL and transforms that to align with the offline registry. 44 func PipTransformURL(targetBaseURL string, sourceURL string) (*url.URL, error) { 45 // For further explanation: https://regex101.com/r/lreZiD/2 46 // This regex was created with information from https://github.com/go-gitea/gitea/blob/0e58201d1a8247561809d832eb8f576e05e5d26d/routers/api/packages/api.go#L267 47 pipURLRegex := regexp.MustCompile(`^(?P<proto>[a-z]+:\/\/)(?P<hostPath>.+?)(?P<pipPath>\/((simple|files\/)[\/\w\-\.\?\=&%#]*?)?)?$`) 48 49 return transformRegistryPath(targetBaseURL, sourceURL, pipURLRegex, "pipPath", "pypi") 50 } 51 52 // GenTransformURL finds the generic API path on a given URL and transforms that to align with the offline registry. 53 func GenTransformURL(targetBaseURL string, sourceURL string) (*url.URL, error) { 54 // For further explanation: https://regex101.com/r/bwMkCm/5 55 // This regex was created with information from https://www.rfc-editor.org/rfc/rfc3986#section-2 56 genURLRegex := regexp.MustCompile(`^(?P<proto>[a-z]+:\/\/)(?P<host>[a-zA-Z0-9\-\.]+)(?P<port>:[0-9]+?)?(?P<startPath>\/[\w\-\.+~%]+?\/[\w\-\.+~%]+?)?(?P<midPath>\/.+?)??(?P<version>\/[\w\-\.+~%]+?)??(?P<fileName>\/[\w\-\.+~%]*)?(?P<query>[\w\-\.\?\=,;+~!$'*&%#()\[\]]*?)?$`) 57 58 matches := genURLRegex.FindStringSubmatch(sourceURL) 59 idx := genURLRegex.SubexpIndex 60 61 if len(matches) == 0 { 62 // Unable to find a substring match for the regex 63 return nil, fmt.Errorf("unable to extract the genericPath from the url %s", sourceURL) 64 } 65 66 fileName := strings.ReplaceAll(matches[idx("fileName")], "/", "") 67 if fileName == "" { 68 fileName = matches[idx("host")] 69 } 70 71 // NOTE: We remove the protocol, port and file name so that https://jackal.dev:443/package/package1.zip and http://jackal.dev/package/package2.zip 72 // resolve to the same "folder" (as they would in real life) 73 sanitizedURL := fmt.Sprintf("%s%s%s", matches[idx("host")], matches[idx("startPath")], matches[idx("midPath")]) 74 75 packageName := strings.ReplaceAll(matches[idx("startPath")], "/", "") 76 if packageName == "" { 77 packageName = fileName 78 } 79 // Add crc32 hash of the url to the end of the package name 80 packageNameGlobal := fmt.Sprintf("%s-%d", packageName, helpers.GetCRCHash(sanitizedURL)) 81 82 version := strings.ReplaceAll(matches[idx("version")], "/", "") 83 if version == "" { 84 version = fileName 85 } 86 87 // Rebuild the generic URL 88 transformedURL := fmt.Sprintf("%s/generic/%s/%s/%s", targetBaseURL, packageNameGlobal, version, fileName) 89 90 url, err := url.Parse(transformedURL) 91 if err != nil { 92 return url, err 93 } 94 95 // Drop the RawQuery and Fragment to avoid them being interpreted for generic packages 96 url.RawQuery = "" 97 url.Fragment = "" 98 99 return url, err 100 } 101 102 // transformRegistryPath transforms a given request path using a new base URL and regex. 103 // - pathGroup specifies the named group for the registry's URL path inside the regex (i.e. pipPath) and registryType specifies the registry type (i.e. pypi). 104 func transformRegistryPath(targetBaseURL string, sourceURL string, regex *regexp.Regexp, pathGroup string, registryType string) (*url.URL, error) { 105 matches := regex.FindStringSubmatch(sourceURL) 106 idx := regex.SubexpIndex 107 108 if len(matches) == 0 { 109 // Unable to find a substring match for the regex 110 return nil, fmt.Errorf("unable to extract the %s from the url %s", pathGroup, sourceURL) 111 } 112 113 // Rebuild the URL based on registry type 114 transformedURL := fmt.Sprintf("%s/%s%s", targetBaseURL, registryType, matches[idx(pathGroup)]) 115 116 return url.Parse(transformedURL) 117 }