github.com/pkalwak/bagins@v0.0.0-20210317172317-694ac5ce2f54/payload.go (about)

     1  package bagins
     2  
     3  /*
     4  
     5  "Faithless is he that says farewell when the road darkens."
     6  
     7  - Gimli
     8  
     9  */
    10  
    11  import (
    12  	"fmt"
    13  	"hash"
    14  	"io"
    15  	"os"
    16  	"path/filepath"
    17  	"strings"
    18  )
    19  
    20  // Payloads describes a filepath location to serve as the data directory of
    21  // a Bag and methods around managing content inside of it.
    22  type Payload struct {
    23  	dir string // Path of the payload directory to manage.
    24  }
    25  
    26  // Returns a new Payload struct managing the path provied.
    27  func NewPayload(location string) (*Payload, error) {
    28  
    29  	if _, err := FS.Stat(filepath.Clean(location)); os.IsNotExist(err) {
    30  		return nil, fmt.Errorf("Payload directory does not exist! Returned: %v", err)
    31  	}
    32  	p := new(Payload)
    33  	p.dir = filepath.Clean(location)
    34  	return p, nil
    35  }
    36  
    37  func (p *Payload) Name() string {
    38  	return p.dir
    39  }
    40  
    41  // Adds the file at srcPath to the payload directory as dstPath and returns
    42  // a checksum value as calulated by the provided hash. This function also
    43  // writes the checksums into the proper manifests, so you don't have to.
    44  //
    45  // Param manifests should be a slice of payload manifests, which you can get
    46  // from a bag by calling:
    47  //
    48  // bag.GetManifests(PayloadManifest)
    49  //
    50  // Returns the checksums in the form of a map whose keys are the algorithms
    51  // and values are the digests.
    52  //
    53  // If you have an md5 manifest and a sha256 manifest, you'll get back a map
    54  // that looks like this:
    55  //
    56  // checksums["md5"] = "0a0a0a0a"
    57  // checksums["sha256"] = "0b0b0b0b"
    58  func (p *Payload) Add(srcPath string, dstPath string, manifests []*Manifest) (map[string]string, error) {
    59  
    60  	src, err := FS.Open(srcPath)
    61  	if err != nil {
    62  		return nil, err
    63  	}
    64  	defer src.Close()
    65  
    66  	dstFile := filepath.Join(p.dir, dstPath)
    67  
    68  	var wrtr io.Writer = nil
    69  
    70  	absSrcPath, err := filepath.Abs(srcPath)
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  	absDestPath, err := filepath.Abs(dstFile)
    75  	if err != nil {
    76  		return nil, err
    77  	}
    78  
    79  	hashWriters := make([]io.Writer, 0)
    80  	hashFunctions := make([]hash.Hash, 0)
    81  	hashFunctionNames := make([]string, 0)
    82  
    83  	// Note that we're putting the same object into
    84  	// hashWriters and hashFunctions, because we need
    85  	// them to behave as both io.Writer and hash.Hash.
    86  	for _, m := range manifests {
    87  		hashObj := m.hashFunc()
    88  		hashWriters = append(hashWriters, hashObj)
    89  		hashFunctions = append(hashFunctions, hashObj)
    90  		hashFunctionNames = append(hashFunctionNames, m.Algorithm())
    91  	}
    92  
    93  	// If src and dst are the same, copying with destroy the src.
    94  	// Just compute the hash.
    95  	if absSrcPath == absDestPath {
    96  		wrtr = io.MultiWriter(hashWriters...)
    97  	} else {
    98  		// TODO simplify this! returns on windows paths are messing with me so I'm
    99  		// going through this step wise.
   100  		if err := FS.MkdirAll(filepath.Dir(dstFile), 0766); err != nil {
   101  			return nil, err
   102  		}
   103  		dst, err := FS.Create(dstFile)
   104  		if err != nil {
   105  			return nil, err
   106  		}
   107  		// Append the destination file to our group of hashWriters,
   108  		// so the file actually gets copied.
   109  		hashWriters = append(hashWriters, dst)
   110  		wrtr = io.MultiWriter(hashWriters...)
   111  		defer dst.Close()
   112  	}
   113  
   114  	// Copy the file and compute the hashes. Note that if src and dest
   115  	// are the same, we're only only computing the hash without actually
   116  	// copying the bits.
   117  	_, err = io.Copy(wrtr, src)
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  
   122  	// Calculate the checksums in hex format, so we can return them
   123  	// and write them into the manifests.
   124  	checksums := make(map[string]string)
   125  	for index := range hashFunctions {
   126  		manifest := manifests[index]
   127  		name := hashFunctionNames[index]
   128  		hashFunc := hashFunctions[index]
   129  		digest := fmt.Sprintf("%x", hashFunc.Sum(nil))
   130  		checksums[name] = digest
   131  
   132  		// Add the path and digest to the manifest
   133  		manifest.Data[filepath.Join("data", dstPath)] = digest
   134  	}
   135  	return checksums, err
   136  }
   137  
   138  // Performs an add on every file under the directory supplied to the
   139  // method. Returns a map of filenames and fixity values based
   140  // on the hash function in the manifests.
   141  //
   142  // Param manifests should be a slice of payload manifests, which you can get
   143  // from a bag by calling:
   144  //
   145  // bag.GetManifests(PayloadManifest)
   146  //
   147  // If you have an md5 manifest and a sha256 manifest, you'll get back a map
   148  // that looks like this:
   149  //
   150  // checksums["file1.txt"] = { "md5": "0a0a0a0a", "sha256": "0b0b0b0b" }
   151  // checksums["file2.xml"] = { "md5": "1a1a1a1a", "sha256": "1b1b1b1b" }
   152  // checksums["file3.jpg"] = { "md5": "2a2a2a2a", "sha256": "2b2b2b2b" }
   153  func (p *Payload) AddAll(src string, manifests []*Manifest) (checksums map[string]map[string]string, errs []error) {
   154  
   155  	checksums = make(map[string]map[string]string)
   156  
   157  	// Collect files to add in scr directory.
   158  	var files []string
   159  	visit := func(pth string, info os.FileInfo, err error) error {
   160  		if !info.IsDir() {
   161  			files = append(files, pth)
   162  		}
   163  		return err
   164  	}
   165  
   166  	if err := FS.Walk(src, visit); err != nil {
   167  		errs = append(errs, err)
   168  	}
   169  
   170  	for _, file := range files {
   171  		dstPath := strings.TrimPrefix(file, src)
   172  		fixities, err := p.Add(file, dstPath, manifests)
   173  		if err != nil {
   174  			errs = append(errs, err)
   175  		}
   176  		checksums[dstPath] = fixities
   177  	}
   178  
   179  	return
   180  }
   181  
   182  // Returns the octetstream sum and number of files of all the files in the
   183  // payload directory.  See the BagIt specification "Oxsum" field of the
   184  // bag-info.txt file for more information.
   185  func (p *Payload) OctetStreamSum() (int64, int) {
   186  	var sum int64
   187  	var count int
   188  
   189  	visit := func(pth string, info os.FileInfo, err error) error {
   190  		if !info.IsDir() {
   191  			sum = sum + info.Size()
   192  			count = count + 1
   193  		}
   194  		return err
   195  	}
   196  
   197  	FS.Walk(p.dir, visit)
   198  
   199  	return sum, count
   200  }