github.com/joey-fossa/fossa-cli@v0.7.34-0.20190708193710-569f1e8679f0/api/fossa/tar.go (about)

     1  package fossa
     2  
     3  import (
     4  	"archive/tar"
     5  	"compress/gzip"
     6  	"crypto/md5"
     7  	"encoding/hex"
     8  	"encoding/json"
     9  	"io"
    10  	"io/ioutil"
    11  	"net/http"
    12  	"net/url"
    13  	"os"
    14  	"path/filepath"
    15  	"strings"
    16  
    17  	"github.com/apex/log"
    18  	"github.com/pkg/errors"
    19  
    20  	"github.com/fossas/fossa-cli/files"
    21  )
    22  
    23  var (
    24  	SignedURLAPI       = "/api/components/signed_url"
    25  	ComponentsBuildAPI = "/api/components/build"
    26  )
    27  
    28  type ComponentSpec struct {
    29  	Archives []Component `json:"archives"`
    30  }
    31  
    32  type Component struct {
    33  	PackageSpec string `json:"packageSpec"`
    34  	Revision    string `json:"revision"`
    35  }
    36  
    37  type SignedURL struct {
    38  	SignedURL string
    39  }
    40  
    41  // UploadTarballDependency uploads the directory specified to be treated on FOSSA as a dependency.
    42  func UploadTarballDependency(dir string, upload, rawLicenseScan bool) (Locator, error) {
    43  	return UploadTarball(dir, true, rawLicenseScan, upload)
    44  }
    45  
    46  // UploadTarballProject uploads the directory specified to be treated on FOSSA as a project.
    47  func UploadTarballProject(dir string, rawLicenseScan bool) (Locator, error) {
    48  	return UploadTarball(dir, false, rawLicenseScan, true)
    49  }
    50  
    51  // UploadTarball archives, compresses, and uploads a specified directory. It
    52  // uses the directory name as the project name and the MD5 of the uploaded
    53  // tarball as the revision name. It returns the locator of the uploaded tarball.
    54  //
    55  // In order to upload the tarball, we need the revision name. In order to get
    56  // the revision name, we need to compute the tarball's MD5 hash. In order to
    57  // compute the MD5 hash, we need to see every byte of the final tarball.
    58  //
    59  // To do this, we actually stream through the tarball _twice_: once to create
    60  // the compressed tarball (and writing it to disk) while simultaneously
    61  // calculating its hash, and again to perform the upload.
    62  //
    63  // The alternative is to hold the entire tarball in memory while we upload.
    64  // Since this will be running within CI machines, this is probably not a good
    65  // idea. (See https://circleci.com/docs/2.0/configuration-reference/#resource_class
    66  // for an example of our memory constraints.)
    67  func UploadTarball(dir string, dependency, rawLicenseScan, upload bool) (Locator, error) {
    68  	p, err := filepath.Abs(dir)
    69  	name := filepath.Base(p)
    70  	if err != nil {
    71  		return Locator{}, err
    72  	}
    73  	_, err = os.Stat(p)
    74  	if err != nil {
    75  		return Locator{}, err
    76  	}
    77  
    78  	// Run first pass: tarball creation and hashing.
    79  	tarball, hash, err := CreateTarball(p)
    80  	if err != nil {
    81  		return Locator{}, err
    82  	}
    83  
    84  	return tarballUpload(name, dependency, rawLicenseScan, upload, tarball, hash)
    85  }
    86  
    87  // CreateTarball archives and compresses a directory's contents to a temporary
    88  // file while simultaneously computing its MD5 hash. The caller is responsible
    89  // for closing the file handle.
    90  func CreateTarball(dir string) (*os.File, []byte, error) {
    91  	dir, err := filepath.Abs(dir)
    92  	if err != nil {
    93  		return nil, nil, err
    94  	}
    95  
    96  	tmp, err := ioutil.TempFile("", "fossa-tar-"+filepath.Base(dir)+"-")
    97  	if err != nil {
    98  		return nil, nil, err
    99  	}
   100  
   101  	h := md5.New()
   102  
   103  	g := gzip.NewWriter(tmp)
   104  	defer g.Close()
   105  
   106  	t := tar.NewWriter(g)
   107  	defer t.Close()
   108  
   109  	err = filepath.Walk(dir, func(filename string, info os.FileInfo, err error) error {
   110  		if err != nil {
   111  			return err
   112  		}
   113  
   114  		// Potential future space optimization: don't upload VCS history.
   115  		// if info.IsDir() && (info.Name() == ".git" ||
   116  		// 	info.Name() == ".svn" ||
   117  		// 	info.Name() == ".hg" ||
   118  		// 	info.Name() == ".bzr") {
   119  		// 	return filepath.SkipDir
   120  		// }
   121  
   122  		_, err = io.WriteString(h, info.Name())
   123  		if err != nil {
   124  			return err
   125  		}
   126  		header, err := tar.FileInfoHeader(info, info.Name())
   127  		if err != nil {
   128  			return err
   129  		}
   130  		header.Name, err = filepath.Rel(filepath.Dir(dir), filename)
   131  		if err != nil {
   132  			return err
   133  		}
   134  
   135  		err = t.WriteHeader(header)
   136  		if err != nil {
   137  			return err
   138  		}
   139  
   140  		// Exit early for directories, symlinks, etc.
   141  		if !info.Mode().IsRegular() {
   142  			return nil
   143  		}
   144  
   145  		// For regular files, write the file.
   146  		file, err := os.Open(filename)
   147  		if err != nil {
   148  			return err
   149  		}
   150  		defer file.Close()
   151  
   152  		log.Debugf("Archiving: %#v", filename)
   153  		_, err = io.Copy(t, file)
   154  		if err != nil {
   155  			return err
   156  		}
   157  		_, err = io.Copy(h, file)
   158  		if err != nil {
   159  			return err
   160  		}
   161  		// Close again to force a disk flush. Closing an *os.File twice is
   162  		// undefined, but safe in practice.
   163  		// See https://github.com/golang/go/issues/20705.
   164  		file.Close()
   165  
   166  		return nil
   167  	})
   168  	if err != nil {
   169  		return nil, nil, err
   170  	}
   171  
   172  	// Clean up and flush writers.
   173  	err = t.Flush()
   174  	if err != nil {
   175  		return nil, nil, err
   176  	}
   177  	err = g.Flush()
   178  	if err != nil {
   179  		return nil, nil, err
   180  	}
   181  	err = tmp.Sync()
   182  	if err != nil {
   183  		return nil, nil, err
   184  	}
   185  
   186  	return tmp, h.Sum(nil), nil
   187  }
   188  
   189  // UploadTarballDependencyFiles generates and uploads a tarball from the provided list of files to FOSSA.
   190  // The tarball's contents are marked as a component (as opposed to a project). The `rawLicenseScan` query parameter
   191  // is automatically added to ensure that FOSSA does not try to discover more dependencies from the uploaded files.
   192  func UploadTarballDependencyFiles(dir string, fileList []string, name string, upload bool) (Locator, error) {
   193  	absFiles := make([]string, len(fileList))
   194  	for i, file := range fileList {
   195  		p := filepath.Join(dir, file)
   196  		pExists, err := files.Exists(p)
   197  		if err != nil && !pExists {
   198  			return Locator{}, errors.Errorf("File: %s does not exist: %s", p, err)
   199  		}
   200  		absFiles[i] = p
   201  	}
   202  
   203  	// Run first pass: tarball creation and hashing.
   204  	tarball, hash, err := CreateTarballFromFiles(absFiles, name)
   205  	if err != nil {
   206  		return Locator{}, err
   207  	}
   208  
   209  	return tarballUpload(name, true, true, upload, tarball, hash)
   210  }
   211  
   212  // CreateTarballFromFiles archives and compresses a list of files to a temporary
   213  // file while simultaneously computing its MD5 hash. The caller is responsible
   214  // for closing the file handle.
   215  func CreateTarballFromFiles(files []string, name string) (*os.File, []byte, error) {
   216  	tmp, err := ioutil.TempFile("", "fossa-tar-tempfile-"+name+"-")
   217  	if err != nil {
   218  		return nil, nil, err
   219  	}
   220  	h := md5.New()
   221  
   222  	g := gzip.NewWriter(tmp)
   223  	defer g.Close()
   224  
   225  	t := tar.NewWriter(g)
   226  	defer t.Close()
   227  
   228  	for _, file := range files {
   229  		info, _ := os.Lstat(file)
   230  		_, err = io.WriteString(h, info.Name())
   231  		if err != nil {
   232  			return nil, nil, err
   233  		}
   234  		header, err := tar.FileInfoHeader(info, info.Name())
   235  		if err != nil {
   236  			return nil, nil, err
   237  		}
   238  		header.Name = strings.TrimPrefix(file, "/")
   239  
   240  		err = t.WriteHeader(header)
   241  		if err != nil {
   242  			return nil, nil, err
   243  		}
   244  
   245  		// Exit early for directories, symlinks, etc.
   246  		if !info.Mode().IsRegular() {
   247  			return nil, nil, nil
   248  		}
   249  
   250  		// For regular files, write the file.
   251  		filename, err := os.Open(file)
   252  		if err != nil {
   253  			return nil, nil, err
   254  		}
   255  		defer filename.Close()
   256  
   257  		log.Debugf("Archiving: %#v", file)
   258  		_, err = io.Copy(t, filename)
   259  		if err != nil {
   260  			return nil, nil, err
   261  		}
   262  		_, err = io.Copy(h, filename)
   263  		if err != nil {
   264  			return nil, nil, err
   265  		}
   266  		// Close again to force a disk flush. Closing an *os.File twice is
   267  		// undefined, but safe in practice.
   268  		// See https://github.com/golang/go/issues/20705.
   269  		filename.Close()
   270  	}
   271  
   272  	// Clean up and flush writers.
   273  	err = t.Flush()
   274  	if err != nil {
   275  		return nil, nil, err
   276  	}
   277  	err = g.Flush()
   278  	if err != nil {
   279  		return nil, nil, err
   280  	}
   281  	err = tmp.Sync()
   282  	if err != nil {
   283  		return nil, nil, err
   284  	}
   285  
   286  	return tmp, h.Sum(nil), nil
   287  }
   288  
   289  // Upload the supplied tarball to the given endpoint.
   290  // Note: "name" should not have any "/"s to ensure core can parse it. Setting rawLicenseScan ensures
   291  // that FOSSA will not attempt to find dependencies in the uploaded files and that a full license scan
   292  // will be run on directories which are normally ignored, such as `vendor` or `node_modules`.
   293  func tarballUpload(name string, dependency, rawLicenseScan, upload bool, tarball *os.File, hash []byte) (Locator, error) {
   294  	info, err := tarball.Stat()
   295  	if err != nil {
   296  		return Locator{}, err
   297  	}
   298  
   299  	revision := hex.EncodeToString(hash)
   300  
   301  	if !upload {
   302  		return Locator{
   303  			Fetcher:  "archive",
   304  			Project:  name,
   305  			Revision: revision,
   306  		}, nil
   307  	}
   308  
   309  	q := url.Values{}
   310  	q.Add("packageSpec", name)
   311  	q.Add("revision", revision)
   312  
   313  	// Get signed URL for uploading.
   314  	var signed SignedURL
   315  	_, err = GetJSON(SignedURLAPI+"?"+q.Encode(), &signed)
   316  	if err != nil {
   317  		return Locator{}, err
   318  	}
   319  
   320  	// Run second pass: multi-part uploading.
   321  	r, w := io.Pipe()
   322  	// In parallel, stream temporary file to PUT.
   323  	go func() {
   324  		defer w.Close()
   325  		defer tarball.Close()
   326  		_, err := tarball.Seek(0, 0)
   327  		if err != nil {
   328  			log.Fatalf("Unable to upload: %s", err.Error())
   329  		}
   330  		_, err = io.Copy(w, tarball)
   331  
   332  		if err != nil {
   333  			log.Fatalf("Unable to upload: %s", err.Error())
   334  		}
   335  	}()
   336  
   337  	// TODO: should this be a new base API method?
   338  	req, err := http.NewRequest(http.MethodPut, signed.SignedURL, r)
   339  	if err != nil {
   340  		return Locator{}, err
   341  	}
   342  	req.Header.Set("Content-Type", "binary/octet-stream")
   343  	req.ContentLength = info.Size()
   344  	req.GetBody = func() (io.ReadCloser, error) {
   345  		return r, nil
   346  	}
   347  	log.Debugf("req: %#v", req)
   348  	res, err := http.DefaultClient.Do(req)
   349  	if err != nil {
   350  		return Locator{}, err
   351  	}
   352  	defer res.Body.Close()
   353  
   354  	body, err := ioutil.ReadAll(res.Body)
   355  	if err != nil {
   356  		return Locator{}, err
   357  	}
   358  	log.Debugf("%#v", string(body))
   359  
   360  	// Queue the component build.
   361  	build := ComponentSpec{
   362  		Archives: []Component{
   363  			Component{PackageSpec: name, Revision: revision},
   364  		},
   365  	}
   366  	data, err := json.Marshal(build)
   367  	if err != nil {
   368  		return Locator{}, err
   369  	}
   370  
   371  	parameters := url.Values{}
   372  	if dependency {
   373  		parameters.Add("dependency", "true")
   374  	}
   375  
   376  	if rawLicenseScan {
   377  		parameters.Add("rawLicenseScan", "true")
   378  	}
   379  
   380  	_, _, err = Post(ComponentsBuildAPI+"?"+parameters.Encode(), data)
   381  	if err != nil {
   382  		return Locator{}, err
   383  	}
   384  
   385  	return Locator{
   386  		Fetcher:  "archive",
   387  		Project:  name,
   388  		Revision: revision,
   389  	}, nil
   390  }