github.com/joey-fossa/fossa-cli@v0.7.34-0.20190708193710-569f1e8679f0/api/fossa/tar.go (about) 1 package fossa 2 3 import ( 4 "archive/tar" 5 "compress/gzip" 6 "crypto/md5" 7 "encoding/hex" 8 "encoding/json" 9 "io" 10 "io/ioutil" 11 "net/http" 12 "net/url" 13 "os" 14 "path/filepath" 15 "strings" 16 17 "github.com/apex/log" 18 "github.com/pkg/errors" 19 20 "github.com/fossas/fossa-cli/files" 21 ) 22 23 var ( 24 SignedURLAPI = "/api/components/signed_url" 25 ComponentsBuildAPI = "/api/components/build" 26 ) 27 28 type ComponentSpec struct { 29 Archives []Component `json:"archives"` 30 } 31 32 type Component struct { 33 PackageSpec string `json:"packageSpec"` 34 Revision string `json:"revision"` 35 } 36 37 type SignedURL struct { 38 SignedURL string 39 } 40 41 // UploadTarballDependency uploads the directory specified to be treated on FOSSA as a dependency. 42 func UploadTarballDependency(dir string, upload, rawLicenseScan bool) (Locator, error) { 43 return UploadTarball(dir, true, rawLicenseScan, upload) 44 } 45 46 // UploadTarballProject uploads the directory specified to be treated on FOSSA as a project. 47 func UploadTarballProject(dir string, rawLicenseScan bool) (Locator, error) { 48 return UploadTarball(dir, false, rawLicenseScan, true) 49 } 50 51 // UploadTarball archives, compresses, and uploads a specified directory. It 52 // uses the directory name as the project name and the MD5 of the uploaded 53 // tarball as the revision name. It returns the locator of the uploaded tarball. 54 // 55 // In order to upload the tarball, we need the revision name. In order to get 56 // the revision name, we need to compute the tarball's MD5 hash. In order to 57 // compute the MD5 hash, we need to see every byte of the final tarball. 58 // 59 // To do this, we actually stream through the tarball _twice_: once to create 60 // the compressed tarball (and writing it to disk) while simultaneously 61 // calculating its hash, and again to perform the upload. 62 // 63 // The alternative is to hold the entire tarball in memory while we upload. 64 // Since this will be running within CI machines, this is probably not a good 65 // idea. (See https://circleci.com/docs/2.0/configuration-reference/#resource_class 66 // for an example of our memory constraints.) 67 func UploadTarball(dir string, dependency, rawLicenseScan, upload bool) (Locator, error) { 68 p, err := filepath.Abs(dir) 69 name := filepath.Base(p) 70 if err != nil { 71 return Locator{}, err 72 } 73 _, err = os.Stat(p) 74 if err != nil { 75 return Locator{}, err 76 } 77 78 // Run first pass: tarball creation and hashing. 79 tarball, hash, err := CreateTarball(p) 80 if err != nil { 81 return Locator{}, err 82 } 83 84 return tarballUpload(name, dependency, rawLicenseScan, upload, tarball, hash) 85 } 86 87 // CreateTarball archives and compresses a directory's contents to a temporary 88 // file while simultaneously computing its MD5 hash. The caller is responsible 89 // for closing the file handle. 90 func CreateTarball(dir string) (*os.File, []byte, error) { 91 dir, err := filepath.Abs(dir) 92 if err != nil { 93 return nil, nil, err 94 } 95 96 tmp, err := ioutil.TempFile("", "fossa-tar-"+filepath.Base(dir)+"-") 97 if err != nil { 98 return nil, nil, err 99 } 100 101 h := md5.New() 102 103 g := gzip.NewWriter(tmp) 104 defer g.Close() 105 106 t := tar.NewWriter(g) 107 defer t.Close() 108 109 err = filepath.Walk(dir, func(filename string, info os.FileInfo, err error) error { 110 if err != nil { 111 return err 112 } 113 114 // Potential future space optimization: don't upload VCS history. 115 // if info.IsDir() && (info.Name() == ".git" || 116 // info.Name() == ".svn" || 117 // info.Name() == ".hg" || 118 // info.Name() == ".bzr") { 119 // return filepath.SkipDir 120 // } 121 122 _, err = io.WriteString(h, info.Name()) 123 if err != nil { 124 return err 125 } 126 header, err := tar.FileInfoHeader(info, info.Name()) 127 if err != nil { 128 return err 129 } 130 header.Name, err = filepath.Rel(filepath.Dir(dir), filename) 131 if err != nil { 132 return err 133 } 134 135 err = t.WriteHeader(header) 136 if err != nil { 137 return err 138 } 139 140 // Exit early for directories, symlinks, etc. 141 if !info.Mode().IsRegular() { 142 return nil 143 } 144 145 // For regular files, write the file. 146 file, err := os.Open(filename) 147 if err != nil { 148 return err 149 } 150 defer file.Close() 151 152 log.Debugf("Archiving: %#v", filename) 153 _, err = io.Copy(t, file) 154 if err != nil { 155 return err 156 } 157 _, err = io.Copy(h, file) 158 if err != nil { 159 return err 160 } 161 // Close again to force a disk flush. Closing an *os.File twice is 162 // undefined, but safe in practice. 163 // See https://github.com/golang/go/issues/20705. 164 file.Close() 165 166 return nil 167 }) 168 if err != nil { 169 return nil, nil, err 170 } 171 172 // Clean up and flush writers. 173 err = t.Flush() 174 if err != nil { 175 return nil, nil, err 176 } 177 err = g.Flush() 178 if err != nil { 179 return nil, nil, err 180 } 181 err = tmp.Sync() 182 if err != nil { 183 return nil, nil, err 184 } 185 186 return tmp, h.Sum(nil), nil 187 } 188 189 // UploadTarballDependencyFiles generates and uploads a tarball from the provided list of files to FOSSA. 190 // The tarball's contents are marked as a component (as opposed to a project). The `rawLicenseScan` query parameter 191 // is automatically added to ensure that FOSSA does not try to discover more dependencies from the uploaded files. 192 func UploadTarballDependencyFiles(dir string, fileList []string, name string, upload bool) (Locator, error) { 193 absFiles := make([]string, len(fileList)) 194 for i, file := range fileList { 195 p := filepath.Join(dir, file) 196 pExists, err := files.Exists(p) 197 if err != nil && !pExists { 198 return Locator{}, errors.Errorf("File: %s does not exist: %s", p, err) 199 } 200 absFiles[i] = p 201 } 202 203 // Run first pass: tarball creation and hashing. 204 tarball, hash, err := CreateTarballFromFiles(absFiles, name) 205 if err != nil { 206 return Locator{}, err 207 } 208 209 return tarballUpload(name, true, true, upload, tarball, hash) 210 } 211 212 // CreateTarballFromFiles archives and compresses a list of files to a temporary 213 // file while simultaneously computing its MD5 hash. The caller is responsible 214 // for closing the file handle. 215 func CreateTarballFromFiles(files []string, name string) (*os.File, []byte, error) { 216 tmp, err := ioutil.TempFile("", "fossa-tar-tempfile-"+name+"-") 217 if err != nil { 218 return nil, nil, err 219 } 220 h := md5.New() 221 222 g := gzip.NewWriter(tmp) 223 defer g.Close() 224 225 t := tar.NewWriter(g) 226 defer t.Close() 227 228 for _, file := range files { 229 info, _ := os.Lstat(file) 230 _, err = io.WriteString(h, info.Name()) 231 if err != nil { 232 return nil, nil, err 233 } 234 header, err := tar.FileInfoHeader(info, info.Name()) 235 if err != nil { 236 return nil, nil, err 237 } 238 header.Name = strings.TrimPrefix(file, "/") 239 240 err = t.WriteHeader(header) 241 if err != nil { 242 return nil, nil, err 243 } 244 245 // Exit early for directories, symlinks, etc. 246 if !info.Mode().IsRegular() { 247 return nil, nil, nil 248 } 249 250 // For regular files, write the file. 251 filename, err := os.Open(file) 252 if err != nil { 253 return nil, nil, err 254 } 255 defer filename.Close() 256 257 log.Debugf("Archiving: %#v", file) 258 _, err = io.Copy(t, filename) 259 if err != nil { 260 return nil, nil, err 261 } 262 _, err = io.Copy(h, filename) 263 if err != nil { 264 return nil, nil, err 265 } 266 // Close again to force a disk flush. Closing an *os.File twice is 267 // undefined, but safe in practice. 268 // See https://github.com/golang/go/issues/20705. 269 filename.Close() 270 } 271 272 // Clean up and flush writers. 273 err = t.Flush() 274 if err != nil { 275 return nil, nil, err 276 } 277 err = g.Flush() 278 if err != nil { 279 return nil, nil, err 280 } 281 err = tmp.Sync() 282 if err != nil { 283 return nil, nil, err 284 } 285 286 return tmp, h.Sum(nil), nil 287 } 288 289 // Upload the supplied tarball to the given endpoint. 290 // Note: "name" should not have any "/"s to ensure core can parse it. Setting rawLicenseScan ensures 291 // that FOSSA will not attempt to find dependencies in the uploaded files and that a full license scan 292 // will be run on directories which are normally ignored, such as `vendor` or `node_modules`. 293 func tarballUpload(name string, dependency, rawLicenseScan, upload bool, tarball *os.File, hash []byte) (Locator, error) { 294 info, err := tarball.Stat() 295 if err != nil { 296 return Locator{}, err 297 } 298 299 revision := hex.EncodeToString(hash) 300 301 if !upload { 302 return Locator{ 303 Fetcher: "archive", 304 Project: name, 305 Revision: revision, 306 }, nil 307 } 308 309 q := url.Values{} 310 q.Add("packageSpec", name) 311 q.Add("revision", revision) 312 313 // Get signed URL for uploading. 314 var signed SignedURL 315 _, err = GetJSON(SignedURLAPI+"?"+q.Encode(), &signed) 316 if err != nil { 317 return Locator{}, err 318 } 319 320 // Run second pass: multi-part uploading. 321 r, w := io.Pipe() 322 // In parallel, stream temporary file to PUT. 323 go func() { 324 defer w.Close() 325 defer tarball.Close() 326 _, err := tarball.Seek(0, 0) 327 if err != nil { 328 log.Fatalf("Unable to upload: %s", err.Error()) 329 } 330 _, err = io.Copy(w, tarball) 331 332 if err != nil { 333 log.Fatalf("Unable to upload: %s", err.Error()) 334 } 335 }() 336 337 // TODO: should this be a new base API method? 338 req, err := http.NewRequest(http.MethodPut, signed.SignedURL, r) 339 if err != nil { 340 return Locator{}, err 341 } 342 req.Header.Set("Content-Type", "binary/octet-stream") 343 req.ContentLength = info.Size() 344 req.GetBody = func() (io.ReadCloser, error) { 345 return r, nil 346 } 347 log.Debugf("req: %#v", req) 348 res, err := http.DefaultClient.Do(req) 349 if err != nil { 350 return Locator{}, err 351 } 352 defer res.Body.Close() 353 354 body, err := ioutil.ReadAll(res.Body) 355 if err != nil { 356 return Locator{}, err 357 } 358 log.Debugf("%#v", string(body)) 359 360 // Queue the component build. 361 build := ComponentSpec{ 362 Archives: []Component{ 363 Component{PackageSpec: name, Revision: revision}, 364 }, 365 } 366 data, err := json.Marshal(build) 367 if err != nil { 368 return Locator{}, err 369 } 370 371 parameters := url.Values{} 372 if dependency { 373 parameters.Add("dependency", "true") 374 } 375 376 if rawLicenseScan { 377 parameters.Add("rawLicenseScan", "true") 378 } 379 380 _, _, err = Post(ComponentsBuildAPI+"?"+parameters.Encode(), data) 381 if err != nil { 382 return Locator{}, err 383 } 384 385 return Locator{ 386 Fetcher: "archive", 387 Project: name, 388 Revision: revision, 389 }, nil 390 }