github.com/pkalwak/bagins@v0.0.0-20210317172317-694ac5ce2f54/manifest.go (about)

     1  package bagins
     2  
     3  /*
     4  
     5  "Oft in lies truth is hidden."
     6  
     7  - Glorfindel
     8  
     9  */
    10  
    11  import (
    12  	"bufio"
    13  	"errors"
    14  	"fmt"
    15  	"github.com/spf13/afero"
    16  	"hash"
    17  	"os"
    18  	"path"
    19  	"path/filepath"
    20  	"regexp"
    21  	"strings"
    22  )
    23  
    24  /*
    25   Manifest represents information about a BagIt manifest file.  As of BagIt spec
    26   0.97 this means only manifest-<algo>.txt and tagmanifest-<algo>.txt files.
    27  
    28   For more information see:
    29     manifest: http://tools.ietf.org/html/draft-kunze-bagit-09#section-2.1.3
    30     tagmanifest: http://tools.ietf.org/html/draft-kunze-bagit-09#section-2.2.1
    31  */
    32  type Manifest struct {
    33  	name         string            // Path to the manifest file
    34  	manifestType string            // payload manifest or tag manifest?
    35  	Data         map[string]string // Key is file path, value is checksum
    36  	hashName     string
    37  	hashFunc     func() hash.Hash
    38  }
    39  
    40  const (
    41  	PayloadManifest = "payload_manifest"
    42  	TagManifest     = "tag_manifest"
    43  )
    44  
    45  // Returns a pointer to a new manifest or returns an error if improperly named.
    46  func NewManifest(pathToFile string, hashName string, manifestType string) (*Manifest, error) {
    47  
    48  	if manifestType != PayloadManifest && manifestType != TagManifest {
    49  		return nil, fmt.Errorf("Param manifestType must be either bagins.PayloadManifest " +
    50  			"or bagins.TagManifest")
    51  	}
    52  	if _, err := FS.Stat(filepath.Dir(pathToFile)); err != nil {
    53  		if os.IsNotExist(err) {
    54  			return nil, fmt.Errorf("Unable to create manifest. Path does not exist: %s", pathToFile)
    55  		} else {
    56  			return nil, fmt.Errorf("Unexpected error creating manifest: %s", err)
    57  		}
    58  	}
    59  	m := new(Manifest)
    60  	m.hashName = strings.ToLower(hashName)
    61  	hashFunc, err := LookupHash(hashName)
    62  	if err != nil {
    63  		return nil, err
    64  	}
    65  	m.hashFunc = hashFunc
    66  	m.Data = make(map[string]string)
    67  
    68  	// Older versions allow pathToFile to be empty...
    69  	if !strings.HasSuffix(pathToFile, "manifest-"+hashName+".txt") {
    70  		if manifestType == PayloadManifest {
    71  			pathToFile = filepath.Join(pathToFile, "manifest-"+m.hashName+".txt")
    72  		} else {
    73  			pathToFile = filepath.Join(pathToFile, "tagmanifest-"+m.hashName+".txt")
    74  		}
    75  	}
    76  
    77  	m.name = pathToFile
    78  	m.manifestType = PayloadManifest
    79  	if manifestType == TagManifest {
    80  		m.manifestType = TagManifest
    81  	}
    82  
    83  	return m, nil
    84  }
    85  
    86  /*
    87    Opens a manifest file, parses attemps to parse the hashtype from the filename, parses
    88    the file contents and returns a pointer to a Manifest.  Error slice may comprise multiple
    89    parsing errors when attempting to read data for fault tolerance.
    90  */
    91  func ReadManifest(name string) (*Manifest, []error) {
    92  	var errs []error
    93  
    94  	hashName, err := parseAlgoName(name)
    95  	if err != nil {
    96  		return nil, append(errs, err)
    97  	}
    98  
    99  	file, err := FS.Open(name)
   100  	if err != nil {
   101  		return nil, append(errs, err)
   102  	}
   103  
   104  	data, e := parseManifestData(file)
   105  	if e != nil {
   106  		errs = append(errs, e...)
   107  	}
   108  
   109  	manifestType := PayloadManifest
   110  	if strings.HasPrefix(path.Base(name), "tagmanifest-") {
   111  		manifestType = TagManifest
   112  	}
   113  	m, err := NewManifest(name, hashName, manifestType)
   114  	if err != nil {
   115  		return nil, append(errs, err)
   116  	}
   117  	m.Data = data
   118  
   119  	return m, errs
   120  
   121  }
   122  
   123  /*
   124    Calculates a checksum for files listed in the manifest and compares it to the value
   125    stored in manifest file.  Returns an error for each file that fails the fixity check.
   126  */
   127  func (m *Manifest) RunChecksums() []error {
   128  
   129  	var invalidSums []error
   130  
   131  	for key, sum := range m.Data {
   132  		pathToFile := filepath.Join(filepath.Dir(m.name), key)
   133  		fileChecksum, err := FileChecksum(pathToFile, m.hashFunc())
   134  		if sum != fileChecksum {
   135  			invalidSums = append(invalidSums, fmt.Errorf("File checksum %s is not valid for %s:%s", sum, key, fileChecksum))
   136  		}
   137  		if err != nil {
   138  			invalidSums = append(invalidSums, err)
   139  		}
   140  	}
   141  
   142  	return invalidSums
   143  }
   144  
   145  // Writes key value pairs to a manifest file.
   146  func (m *Manifest) Create() error {
   147  	if m.Name() == "" {
   148  		return errors.New("Manifest must have values for basename and algo set to create a file.")
   149  	}
   150  	// Create directory if needed.
   151  	basepath := filepath.Dir(m.name)
   152  
   153  	if err := FS.MkdirAll(basepath, 0777); err != nil {
   154  		return err
   155  	}
   156  
   157  	// Create the tagfile.
   158  	fileOut, err := FS.Create(m.name)
   159  	if err != nil {
   160  		return err
   161  	}
   162  	defer fileOut.Close()
   163  
   164  	// Write fields and data to the file.
   165  	for fName, ckSum := range m.Data {
   166  		_, err := fmt.Fprintln(fileOut, ckSum, fName)
   167  		if err != nil {
   168  			return errors.New("Error writing line to manifest: " + err.Error())
   169  		}
   170  	}
   171  	return nil
   172  }
   173  
   174  // Returns the contents of the manifest in the form of a string.
   175  // Useful if you don't want to write directly to disk.
   176  func (m *Manifest) ToString() string {
   177  	str := ""
   178  	for fName, ckSum := range m.Data {
   179  		str += fmt.Sprintf("%s %s\n", ckSum, fName)
   180  	}
   181  	return str
   182  }
   183  
   184  // Returns a sting of the filename for this manifest file based on Path, BaseName and Algo
   185  func (m *Manifest) Name() string {
   186  	return filepath.Clean(m.name)
   187  }
   188  
   189  // Returns the name of the manifest's hashing algorithm.
   190  // "sha256", "md5", etc.
   191  func (m *Manifest) Algorithm() string {
   192  	return m.hashName
   193  }
   194  
   195  // Returns the type of manifest. Either 'payload' or 'tag'.
   196  func (m *Manifest) Type() string {
   197  	return m.manifestType
   198  }
   199  
   200  // Tries to parse the algorithm name from a manifest filename.  Returns
   201  // an error if unable to do so.
   202  func parseAlgoName(name string) (string, error) {
   203  	filename := filepath.Base(name)
   204  	re, err := regexp.Compile(`(^.*\-)(.*)(\.txt$)`)
   205  	if err != nil {
   206  		return "", err
   207  	}
   208  	matches := re.FindStringSubmatch(filename)
   209  	if len(matches) < 2 {
   210  		return "", errors.New("Unable to determine algorithm from filename!")
   211  	}
   212  	algo := matches[2]
   213  	return algo, nil
   214  }
   215  
   216  // Reads the contents of file and parses checksum and file information in manifest format as
   217  // per the bagit specification.
   218  func parseManifestData(file afero.File) (map[string]string, []error) {
   219  	var errs []error
   220  	// See regexp examples at http://play.golang.org/p/_msLJ-lBEu
   221  	// Regex matches these reqs from the bagit spec: "One or
   222  	// more linear whitespace characters (spaces or tabs) MUST separate
   223  	// CHECKSUM from FILENAME." as specified here:
   224  	// http://tools.ietf.org/html/draft-kunze-bagit-10#section-2.1.3
   225  	re := regexp.MustCompile(`^(\S*)\s*(.*)`)
   226  
   227  	scanner := bufio.NewScanner(file)
   228  	values := make(map[string]string)
   229  
   230  	for scanner.Scan() {
   231  		line := scanner.Text()
   232  		if re.MatchString(line) {
   233  			data := re.FindStringSubmatch(line)
   234  			values[data[2]] = data[1]
   235  		} else {
   236  			errs = append(errs, fmt.Errorf("Unable to parse data from line: %s", line))
   237  		}
   238  	}
   239  
   240  	return values, errs
   241  }