github.com/SamarSidharth/kpt@v0.0.0-20231122062228-c7d747ae3ace/internal/util/fetch/fetch.go (about) 1 // Copyright 2019 The kpt Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fetch 16 17 import ( 18 "context" 19 "fmt" 20 "os" 21 "path" 22 "path/filepath" 23 "strings" 24 25 "github.com/otiai10/copy" 26 "sigs.k8s.io/kustomize/kyaml/filesys" 27 28 "github.com/GoogleContainerTools/kpt/internal/errors" 29 "github.com/GoogleContainerTools/kpt/internal/gitutil" 30 "github.com/GoogleContainerTools/kpt/internal/pkg" 31 "github.com/GoogleContainerTools/kpt/internal/types" 32 "github.com/GoogleContainerTools/kpt/internal/util/git" 33 "github.com/GoogleContainerTools/kpt/internal/util/pkgutil" 34 kptfilev1 "github.com/GoogleContainerTools/kpt/pkg/api/kptfile/v1" 35 "github.com/GoogleContainerTools/kpt/pkg/kptfile/kptfileutil" 36 "github.com/GoogleContainerTools/kpt/pkg/printer" 37 ) 38 39 // Command takes the upstream information in the Kptfile at the path for the 40 // provided package, and fetches the package referenced if it isn't already 41 // there. 42 type Command struct { 43 Pkg *pkg.Pkg 44 } 45 46 // Run runs the Command. 47 func (c Command) Run(ctx context.Context) error { 48 const op errors.Op = "fetch.Run" 49 kf, err := c.Pkg.Kptfile() 50 if err != nil { 51 return errors.E(op, c.Pkg.UniquePath, fmt.Errorf("no Kptfile found")) 52 } 53 54 if err := c.validate(kf); err != nil { 55 return errors.E(op, c.Pkg.UniquePath, err) 56 } 57 58 g := kf.Upstream.Git 59 repoSpec := &git.RepoSpec{ 60 OrgRepo: g.Repo, 61 Path: g.Directory, 62 Ref: g.Ref, 63 } 64 err = NewCloner(repoSpec).cloneAndCopy(ctx, c.Pkg.UniquePath.String()) 65 if err != nil { 66 return errors.E(op, c.Pkg.UniquePath, err) 67 } 68 return nil 69 } 70 71 // validate makes sure the Kptfile has the necessary information to fetch 72 // the package. 73 func (c Command) validate(kf *kptfilev1.KptFile) error { 74 const op errors.Op = "validate" 75 if kf.Upstream == nil { 76 return errors.E(op, errors.MissingParam, fmt.Errorf("kptfile doesn't contain upstream information")) 77 } 78 79 if kf.Upstream.Git == nil { 80 return errors.E(op, errors.MissingParam, fmt.Errorf("kptfile upstream doesn't have git information")) 81 } 82 83 g := kf.Upstream.Git 84 if len(g.Repo) == 0 { 85 return errors.E(op, errors.MissingParam, fmt.Errorf("must specify repo")) 86 } 87 if len(g.Ref) == 0 { 88 return errors.E(op, errors.MissingParam, fmt.Errorf("must specify ref")) 89 } 90 if len(g.Directory) == 0 { 91 return errors.E(op, errors.MissingParam, fmt.Errorf("must specify directory")) 92 } 93 return nil 94 } 95 96 // Cloner clones an upstream repo defined by a repoSpec. 97 // Optionally, previously cloned repos can be cached 98 // rather than recloning them each time. 99 type Cloner struct { 100 // repoSpec spec to clone 101 repoSpec *git.RepoSpec 102 103 // cachedRepos 104 cachedRepo map[string]*gitutil.GitUpstreamRepo 105 } 106 107 type NewClonerOption func(*Cloner) 108 109 func WithCachedRepo(r map[string]*gitutil.GitUpstreamRepo) NewClonerOption { 110 return func(c *Cloner) { 111 c.cachedRepo = r 112 } 113 } 114 115 func NewCloner(r *git.RepoSpec, opts ...NewClonerOption) *Cloner { 116 c := &Cloner{ 117 repoSpec: r, 118 } 119 for _, opt := range opts { 120 opt(c) 121 } 122 if c.cachedRepo == nil { 123 c.cachedRepo = make(map[string]*gitutil.GitUpstreamRepo) 124 } 125 return c 126 } 127 128 // cloneAndCopy fetches the provided repo and copies the content into the 129 // directory specified by dest. The provided name is set as `metadata.name` 130 // of the Kptfile of the package. 131 func (c *Cloner) cloneAndCopy(ctx context.Context, dest string) error { 132 const op errors.Op = "fetch.cloneAndCopy" 133 pr := printer.FromContextOrDie(ctx) 134 135 err := c.ClonerUsingGitExec(ctx) 136 if err != nil { 137 return errors.E(op, errors.Git, types.UniquePath(dest), err) 138 } 139 defer os.RemoveAll(c.repoSpec.Dir) 140 // update cache before removing clone dir 141 defer delete(c.cachedRepo, c.repoSpec.CloneSpec()) 142 143 sourcePath := filepath.Join(c.repoSpec.Dir, c.repoSpec.Path) 144 pr.Printf("Adding package %q.\n", strings.TrimPrefix(c.repoSpec.Path, "/")) 145 if err := pkgutil.CopyPackage(sourcePath, dest, true, pkg.All); err != nil { 146 return errors.E(op, types.UniquePath(dest), err) 147 } 148 149 if err := kptfileutil.UpdateKptfileWithoutOrigin(dest, sourcePath, false); err != nil { 150 return errors.E(op, types.UniquePath(dest), err) 151 } 152 153 if err := kptfileutil.UpdateUpstreamLockFromGit(dest, c.repoSpec); err != nil { 154 return errors.E(op, errors.Git, types.UniquePath(dest), err) 155 } 156 return nil 157 } 158 159 // ClonerUsingGitExec uses a local git install, as opposed 160 // to say, some remote API, to obtain a local clone of 161 // a remote repo. It looks for tags with the directory as a prefix to allow 162 // for versioning multiple kpt packages in a single repo independently. It 163 // relies on the private clonerUsingGitExec function to try fetching different 164 // refs. 165 func (c *Cloner) ClonerUsingGitExec(ctx context.Context) error { 166 const op errors.Op = "fetch.ClonerUsingGitExec" 167 168 // Create a local representation of the upstream repo. This will initialize 169 // the cache for the specified repo uri if it isn't already there. It also 170 // fetches and caches all tag and branch refs from the upstream repo. 171 upstreamRepo, exists := c.cachedRepo[c.repoSpec.CloneSpec()] 172 if !exists { 173 newUpstreamRemp, err := gitutil.NewGitUpstreamRepo(ctx, c.repoSpec.CloneSpec()) 174 if err != nil { 175 return errors.E(op, errors.Git, errors.Repo(c.repoSpec.CloneSpec()), err) 176 } 177 upstreamRepo = newUpstreamRemp 178 c.cachedRepo[c.repoSpec.CloneSpec()] = upstreamRepo 179 } 180 181 // Check if we have a ref in the upstream that matches the package-specific 182 // reference. If we do, we use that reference. 183 ps := strings.Split(c.repoSpec.Path, "/") 184 for len(ps) != 0 { 185 p := path.Join(ps...) 186 packageRef := path.Join(strings.TrimLeft(p, "/"), c.repoSpec.Ref) 187 if _, found := upstreamRepo.ResolveTag(packageRef); found { 188 c.repoSpec.Ref = packageRef 189 break 190 } 191 ps = ps[:len(ps)-1] 192 } 193 194 // Pull the required ref into the repo git cache. 195 dir, err := upstreamRepo.GetRepo(ctx, []string{c.repoSpec.Ref}) 196 if err != nil { 197 return errors.E(op, errors.Git, errors.Repo(c.repoSpec.CloneSpec()), err) 198 } 199 200 gitRunner, err := gitutil.NewLocalGitRunner(dir) 201 if err != nil { 202 return errors.E(op, errors.Git, errors.Repo(c.repoSpec.CloneSpec()), err) 203 } 204 205 // Find the commit SHA for the ref that was just fetched. We need the SHA 206 // rather than the ref to be able to do a hard reset of the cache repo. 207 commit, found := upstreamRepo.ResolveRef(c.repoSpec.Ref) 208 if !found { 209 commit = c.repoSpec.Ref 210 } 211 212 // Reset the local repo to the commit we need. Doing a hard reset instead of 213 // a checkout means we don't create any local branches so we don't need to 214 // worry about fast-forwarding them with changes from upstream. It also makes 215 // sure that any changes in the local worktree are cleaned out. 216 _, err = gitRunner.Run(ctx, "reset", "--hard", commit) 217 if err != nil { 218 gitutil.AmendGitExecError(err, func(e *gitutil.GitExecError) { 219 e.Repo = c.repoSpec.CloneSpec() 220 e.Ref = commit 221 }) 222 return errors.E(op, errors.Git, errors.Repo(c.repoSpec.CloneSpec()), err) 223 } 224 225 // We need to create a temp directory where we can copy the content of the repo. 226 // During update, we need to checkout multiple versions of the same repo, so 227 // we can't do merges directly from the cache. 228 c.repoSpec.Dir, err = os.MkdirTemp("", "kpt-get-") 229 if err != nil { 230 return errors.E(op, errors.Internal, fmt.Errorf("error creating temp directory: %w", err)) 231 } 232 c.repoSpec.Commit = commit 233 234 pkgPath := filepath.Join(dir, c.repoSpec.Path) 235 // Verify that the requested path exists in the repo. 236 _, err = os.Stat(pkgPath) 237 if os.IsNotExist(err) { 238 return errors.E(op, 239 errors.Internal, 240 err, 241 fmt.Errorf("path %q does not exist in repo %q", c.repoSpec.Path, c.repoSpec.OrgRepo)) 242 } 243 244 // Copy the content of the pkg into the temp directory. 245 // Note that we skip the content outside the package directory. 246 err = copyDir(ctx, pkgPath, c.repoSpec.AbsPath()) 247 if err != nil { 248 return errors.E(op, errors.Internal, fmt.Errorf("error copying package: %w", err)) 249 } 250 251 // Verify that if a Kptfile exists in the package, it contains the correct 252 // version of the Kptfile. 253 _, err = pkg.ReadKptfile(filesys.FileSystemOrOnDisk{}, pkgPath) 254 if err != nil { 255 // A Kptfile isn't required, so it is fine if there is no Kptfile. 256 if errors.Is(err, os.ErrNotExist) { 257 return nil 258 } 259 260 // If the error is of type KptfileError, we replace it with a 261 // RemoteKptfileError. This allows us to provide information about the 262 // git source of the Kptfile instead of the path to some random 263 // temporary directory. 264 var kfError *pkg.KptfileError 265 if errors.As(err, &kfError) { 266 return &pkg.RemoteKptfileError{ 267 RepoSpec: c.repoSpec, 268 Err: kfError.Err, 269 } 270 } 271 } 272 return nil 273 } 274 275 // copyDir copies a src directory to a dst directory. 276 // copyDir skips copying the .git directory from the src and ignores symlinks. 277 func copyDir(ctx context.Context, srcDir string, dstDir string) error { 278 pr := printer.FromContextOrDie(ctx) 279 opts := copy.Options{ 280 Skip: func(src string) (bool, error) { 281 return strings.HasSuffix(src, ".git"), nil 282 }, 283 OnSymlink: func(src string) copy.SymlinkAction { 284 // try to print relative path of symlink 285 // if we can, else absolute path which is not 286 // pretty because it contains path to temporary repo dir 287 displayPath, err := filepath.Rel(srcDir, src) 288 if err != nil { 289 displayPath = src 290 } 291 pr.Printf("[Warn] Ignoring symlink %q \n", displayPath) 292 return copy.Skip 293 }, 294 } 295 return copy.Copy(srcDir, dstDir, opts) 296 }