github.com/vmware/govmomi@v0.51.0/toolbox/hgfs/archive.go (about)

     1  // © Broadcom. All Rights Reserved.
     2  // The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
     3  // SPDX-License-Identifier: Apache-2.0
     4  
     5  package hgfs
     6  
     7  import (
     8  	"archive/tar"
     9  	"bufio"
    10  	"bytes"
    11  	"compress/gzip"
    12  	"io"
    13  	"log"
    14  	"math"
    15  	"net/url"
    16  	"os"
    17  	"path/filepath"
    18  	"strings"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/vmware/govmomi/toolbox/vix"
    23  )
    24  
    25  // ArchiveScheme is the default scheme used to register the archive FileHandler
    26  var ArchiveScheme = "archive"
    27  
    28  // ArchiveHandler implements a FileHandler for transferring directories.
    29  type ArchiveHandler struct {
    30  	Read  func(*url.URL, *tar.Reader) error
    31  	Write func(*url.URL, *tar.Writer) error
    32  }
    33  
    34  // NewArchiveHandler returns a FileHandler implementation for transferring directories using gzip'd tar files.
    35  func NewArchiveHandler() FileHandler {
    36  	return &ArchiveHandler{
    37  		Read:  archiveRead,
    38  		Write: archiveWrite,
    39  	}
    40  }
    41  
    42  // Stat implements FileHandler.Stat
    43  func (*ArchiveHandler) Stat(u *url.URL) (os.FileInfo, error) {
    44  	switch u.Query().Get("format") {
    45  	case "", "tar", "tgz":
    46  		// ok
    47  	default:
    48  		log.Printf("unknown archive format: %q", u)
    49  		return nil, vix.Error(vix.InvalidArg)
    50  	}
    51  
    52  	return &archive{
    53  		name: u.Path,
    54  		size: math.MaxInt64,
    55  	}, nil
    56  }
    57  
    58  // Open implements FileHandler.Open
    59  func (h *ArchiveHandler) Open(u *url.URL, mode int32) (File, error) {
    60  	switch mode {
    61  	case OpenModeReadOnly:
    62  		return h.newArchiveFromGuest(u)
    63  	case OpenModeWriteOnly:
    64  		return h.newArchiveToGuest(u)
    65  	default:
    66  		return nil, os.ErrNotExist
    67  	}
    68  }
    69  
    70  // archive implements the hgfs.File and os.FileInfo interfaces.
    71  type archive struct {
    72  	name string
    73  	size int64
    74  	done func() error
    75  
    76  	io.Reader
    77  	io.Writer
    78  }
    79  
    80  // Name implementation of the os.FileInfo interface method.
    81  func (a *archive) Name() string {
    82  	return a.name
    83  }
    84  
    85  // Size implementation of the os.FileInfo interface method.
    86  func (a *archive) Size() int64 {
    87  	return a.size
    88  }
    89  
    90  // Mode implementation of the os.FileInfo interface method.
    91  func (a *archive) Mode() os.FileMode {
    92  	return 0600
    93  }
    94  
    95  // ModTime implementation of the os.FileInfo interface method.
    96  func (a *archive) ModTime() time.Time {
    97  	return time.Now()
    98  }
    99  
   100  // IsDir implementation of the os.FileInfo interface method.
   101  func (a *archive) IsDir() bool {
   102  	return false
   103  }
   104  
   105  // Sys implementation of the os.FileInfo interface method.
   106  func (a *archive) Sys() any {
   107  	return nil
   108  }
   109  
   110  // The trailer is required since TransferFromGuest requires a Content-Length,
   111  // which toolbox doesn't know ahead of time as the gzip'd tarball never touches the disk.
   112  // HTTP clients need to be aware of this and stop reading when they see the 2nd gzip header.
   113  var gzipHeader = []byte{0x1f, 0x8b, 0x08} // rfc1952 {ID1, ID2, CM}
   114  
   115  var gzipTrailer = true
   116  
   117  // newArchiveFromGuest returns an hgfs.File implementation to read a directory as a gzip'd tar.
   118  func (h *ArchiveHandler) newArchiveFromGuest(u *url.URL) (File, error) {
   119  	r, w := io.Pipe()
   120  
   121  	a := &archive{
   122  		name:   u.Path,
   123  		done:   r.Close,
   124  		Reader: r,
   125  		Writer: w,
   126  	}
   127  
   128  	var z io.Writer = w
   129  	var c io.Closer = io.NopCloser(nil)
   130  
   131  	switch u.Query().Get("format") {
   132  	case "tgz":
   133  		gz := gzip.NewWriter(w)
   134  		z = gz
   135  		c = gz
   136  	}
   137  
   138  	tw := tar.NewWriter(z)
   139  
   140  	go func() {
   141  		err := h.Write(u, tw)
   142  
   143  		_ = tw.Close()
   144  		_ = c.Close()
   145  		if gzipTrailer {
   146  			_, _ = w.Write(gzipHeader)
   147  		}
   148  		_ = w.CloseWithError(err)
   149  	}()
   150  
   151  	return a, nil
   152  }
   153  
   154  // newArchiveToGuest returns an hgfs.File implementation to expand a gzip'd tar into a directory.
   155  func (h *ArchiveHandler) newArchiveToGuest(u *url.URL) (File, error) {
   156  	r, w := io.Pipe()
   157  
   158  	buf := bufio.NewReader(r)
   159  
   160  	a := &archive{
   161  		name:   u.Path,
   162  		Reader: buf,
   163  		Writer: w,
   164  	}
   165  
   166  	var cerr error
   167  	var wg sync.WaitGroup
   168  
   169  	a.done = func() error {
   170  		_ = w.Close()
   171  		// We need to wait for unpack to finish to complete its work
   172  		// and to propagate the error if any to Close.
   173  		wg.Wait()
   174  		return cerr
   175  	}
   176  
   177  	wg.Add(1)
   178  	go func() {
   179  		defer wg.Done()
   180  
   181  		c := func() error {
   182  			// Drain the pipe of tar trailer data (two null blocks)
   183  			if cerr == nil {
   184  				_, _ = io.Copy(io.Discard, a.Reader)
   185  			}
   186  			return nil
   187  		}
   188  
   189  		header, _ := buf.Peek(len(gzipHeader))
   190  
   191  		if bytes.Equal(header, gzipHeader) {
   192  			gz, err := gzip.NewReader(a.Reader)
   193  			if err != nil {
   194  				_ = r.CloseWithError(err)
   195  				cerr = err
   196  				return
   197  			}
   198  
   199  			c = gz.Close
   200  			a.Reader = gz
   201  		}
   202  
   203  		tr := tar.NewReader(a.Reader)
   204  
   205  		cerr = h.Read(u, tr)
   206  
   207  		_ = c()
   208  		_ = r.CloseWithError(cerr)
   209  	}()
   210  
   211  	return a, nil
   212  }
   213  
   214  func (a *archive) Close() error {
   215  	return a.done()
   216  }
   217  
   218  // archiveRead writes the contents of the given tar.Reader to the given directory.
   219  func archiveRead(u *url.URL, tr *tar.Reader) error {
   220  	for {
   221  		header, err := tr.Next()
   222  		if err != nil {
   223  			if err == io.EOF {
   224  				return nil
   225  			}
   226  			return err
   227  		}
   228  
   229  		// validate to prevent directory traversal
   230  		if strings.Contains(header.Name, "..") {
   231  			log.Printf("skipping invalid entry with '..' in name: %s", header.Name)
   232  			continue
   233  		}
   234  
   235  		name := filepath.Join(u.Path, header.Name)
   236  		mode := os.FileMode(header.Mode)
   237  
   238  		switch header.Typeflag {
   239  		case tar.TypeDir:
   240  			err = os.MkdirAll(name, mode)
   241  		case tar.TypeReg:
   242  			_ = os.MkdirAll(filepath.Dir(name), 0750)
   243  
   244  			var f *os.File
   245  
   246  			f, err = os.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_TRUNC, mode)
   247  			if err == nil {
   248  				_, cerr := io.Copy(f, tr)
   249  				err = f.Close()
   250  				if cerr != nil {
   251  					err = cerr
   252  				}
   253  			}
   254  		case tar.TypeSymlink:
   255  			err = os.Symlink(header.Linkname, name)
   256  		}
   257  
   258  		// TODO: Uid/Gid may not be meaningful here without some mapping.
   259  		// The other option to consider would be making use of the guest auth user ID.
   260  		// os.Lchown(name, header.Uid, header.Gid)
   261  
   262  		if err != nil {
   263  			return err
   264  		}
   265  	}
   266  }
   267  
   268  // archiveWrite writes the contents of the given source directory to the given tar.Writer.
   269  func archiveWrite(u *url.URL, tw *tar.Writer) error {
   270  	info, err := os.Stat(u.Path)
   271  	if err != nil {
   272  		return err
   273  	}
   274  
   275  	// Note that the VMX will trim any trailing slash.  For example:
   276  	// "/foo/bar/?prefix=bar/" will end up here as "/foo/bar/?prefix=bar"
   277  	// Escape to avoid this: "/for/bar/?prefix=bar%2F"
   278  	prefix := u.Query().Get("prefix")
   279  
   280  	dir := u.Path
   281  
   282  	f := func(file string, fi os.FileInfo, err error) error {
   283  		if err != nil {
   284  			return filepath.SkipDir
   285  		}
   286  
   287  		name := strings.TrimPrefix(file, dir)
   288  		name = strings.TrimPrefix(name, "/")
   289  
   290  		if name == "" {
   291  			return nil // this is u.Path itself (which may or may not have a trailing "/")
   292  		}
   293  
   294  		if prefix != "" {
   295  			name = prefix + name
   296  		}
   297  
   298  		header, _ := tar.FileInfoHeader(fi, name)
   299  
   300  		header.Name = name
   301  
   302  		if header.Typeflag == tar.TypeDir {
   303  			header.Name += "/"
   304  		}
   305  
   306  		var f *os.File
   307  
   308  		if header.Typeflag == tar.TypeReg && fi.Size() != 0 {
   309  			f, err = os.Open(filepath.Clean(file))
   310  			if err != nil {
   311  				if os.IsPermission(err) {
   312  					return nil
   313  				}
   314  				return err
   315  			}
   316  		}
   317  
   318  		_ = tw.WriteHeader(header)
   319  
   320  		if f != nil {
   321  			_, err = io.Copy(tw, f)
   322  			_ = f.Close()
   323  		}
   324  
   325  		return err
   326  	}
   327  
   328  	if info.IsDir() {
   329  		return filepath.Walk(u.Path, f)
   330  	}
   331  
   332  	dir = filepath.Dir(dir)
   333  
   334  	return f(u.Path, info, nil)
   335  }