github.com/pkalwak/bagins@v0.0.0-20210317172317-694ac5ce2f54/manifest.go (about) 1 package bagins 2 3 /* 4 5 "Oft in lies truth is hidden." 6 7 - Glorfindel 8 9 */ 10 11 import ( 12 "bufio" 13 "errors" 14 "fmt" 15 "github.com/spf13/afero" 16 "hash" 17 "os" 18 "path" 19 "path/filepath" 20 "regexp" 21 "strings" 22 ) 23 24 /* 25 Manifest represents information about a BagIt manifest file. As of BagIt spec 26 0.97 this means only manifest-<algo>.txt and tagmanifest-<algo>.txt files. 27 28 For more information see: 29 manifest: http://tools.ietf.org/html/draft-kunze-bagit-09#section-2.1.3 30 tagmanifest: http://tools.ietf.org/html/draft-kunze-bagit-09#section-2.2.1 31 */ 32 type Manifest struct { 33 name string // Path to the manifest file 34 manifestType string // payload manifest or tag manifest? 35 Data map[string]string // Key is file path, value is checksum 36 hashName string 37 hashFunc func() hash.Hash 38 } 39 40 const ( 41 PayloadManifest = "payload_manifest" 42 TagManifest = "tag_manifest" 43 ) 44 45 // Returns a pointer to a new manifest or returns an error if improperly named. 46 func NewManifest(pathToFile string, hashName string, manifestType string) (*Manifest, error) { 47 48 if manifestType != PayloadManifest && manifestType != TagManifest { 49 return nil, fmt.Errorf("Param manifestType must be either bagins.PayloadManifest " + 50 "or bagins.TagManifest") 51 } 52 if _, err := FS.Stat(filepath.Dir(pathToFile)); err != nil { 53 if os.IsNotExist(err) { 54 return nil, fmt.Errorf("Unable to create manifest. Path does not exist: %s", pathToFile) 55 } else { 56 return nil, fmt.Errorf("Unexpected error creating manifest: %s", err) 57 } 58 } 59 m := new(Manifest) 60 m.hashName = strings.ToLower(hashName) 61 hashFunc, err := LookupHash(hashName) 62 if err != nil { 63 return nil, err 64 } 65 m.hashFunc = hashFunc 66 m.Data = make(map[string]string) 67 68 // Older versions allow pathToFile to be empty... 69 if !strings.HasSuffix(pathToFile, "manifest-"+hashName+".txt") { 70 if manifestType == PayloadManifest { 71 pathToFile = filepath.Join(pathToFile, "manifest-"+m.hashName+".txt") 72 } else { 73 pathToFile = filepath.Join(pathToFile, "tagmanifest-"+m.hashName+".txt") 74 } 75 } 76 77 m.name = pathToFile 78 m.manifestType = PayloadManifest 79 if manifestType == TagManifest { 80 m.manifestType = TagManifest 81 } 82 83 return m, nil 84 } 85 86 /* 87 Opens a manifest file, parses attemps to parse the hashtype from the filename, parses 88 the file contents and returns a pointer to a Manifest. Error slice may comprise multiple 89 parsing errors when attempting to read data for fault tolerance. 90 */ 91 func ReadManifest(name string) (*Manifest, []error) { 92 var errs []error 93 94 hashName, err := parseAlgoName(name) 95 if err != nil { 96 return nil, append(errs, err) 97 } 98 99 file, err := FS.Open(name) 100 if err != nil { 101 return nil, append(errs, err) 102 } 103 104 data, e := parseManifestData(file) 105 if e != nil { 106 errs = append(errs, e...) 107 } 108 109 manifestType := PayloadManifest 110 if strings.HasPrefix(path.Base(name), "tagmanifest-") { 111 manifestType = TagManifest 112 } 113 m, err := NewManifest(name, hashName, manifestType) 114 if err != nil { 115 return nil, append(errs, err) 116 } 117 m.Data = data 118 119 return m, errs 120 121 } 122 123 /* 124 Calculates a checksum for files listed in the manifest and compares it to the value 125 stored in manifest file. Returns an error for each file that fails the fixity check. 126 */ 127 func (m *Manifest) RunChecksums() []error { 128 129 var invalidSums []error 130 131 for key, sum := range m.Data { 132 pathToFile := filepath.Join(filepath.Dir(m.name), key) 133 fileChecksum, err := FileChecksum(pathToFile, m.hashFunc()) 134 if sum != fileChecksum { 135 invalidSums = append(invalidSums, fmt.Errorf("File checksum %s is not valid for %s:%s", sum, key, fileChecksum)) 136 } 137 if err != nil { 138 invalidSums = append(invalidSums, err) 139 } 140 } 141 142 return invalidSums 143 } 144 145 // Writes key value pairs to a manifest file. 146 func (m *Manifest) Create() error { 147 if m.Name() == "" { 148 return errors.New("Manifest must have values for basename and algo set to create a file.") 149 } 150 // Create directory if needed. 151 basepath := filepath.Dir(m.name) 152 153 if err := FS.MkdirAll(basepath, 0777); err != nil { 154 return err 155 } 156 157 // Create the tagfile. 158 fileOut, err := FS.Create(m.name) 159 if err != nil { 160 return err 161 } 162 defer fileOut.Close() 163 164 // Write fields and data to the file. 165 for fName, ckSum := range m.Data { 166 _, err := fmt.Fprintln(fileOut, ckSum, fName) 167 if err != nil { 168 return errors.New("Error writing line to manifest: " + err.Error()) 169 } 170 } 171 return nil 172 } 173 174 // Returns the contents of the manifest in the form of a string. 175 // Useful if you don't want to write directly to disk. 176 func (m *Manifest) ToString() string { 177 str := "" 178 for fName, ckSum := range m.Data { 179 str += fmt.Sprintf("%s %s\n", ckSum, fName) 180 } 181 return str 182 } 183 184 // Returns a sting of the filename for this manifest file based on Path, BaseName and Algo 185 func (m *Manifest) Name() string { 186 return filepath.Clean(m.name) 187 } 188 189 // Returns the name of the manifest's hashing algorithm. 190 // "sha256", "md5", etc. 191 func (m *Manifest) Algorithm() string { 192 return m.hashName 193 } 194 195 // Returns the type of manifest. Either 'payload' or 'tag'. 196 func (m *Manifest) Type() string { 197 return m.manifestType 198 } 199 200 // Tries to parse the algorithm name from a manifest filename. Returns 201 // an error if unable to do so. 202 func parseAlgoName(name string) (string, error) { 203 filename := filepath.Base(name) 204 re, err := regexp.Compile(`(^.*\-)(.*)(\.txt$)`) 205 if err != nil { 206 return "", err 207 } 208 matches := re.FindStringSubmatch(filename) 209 if len(matches) < 2 { 210 return "", errors.New("Unable to determine algorithm from filename!") 211 } 212 algo := matches[2] 213 return algo, nil 214 } 215 216 // Reads the contents of file and parses checksum and file information in manifest format as 217 // per the bagit specification. 218 func parseManifestData(file afero.File) (map[string]string, []error) { 219 var errs []error 220 // See regexp examples at http://play.golang.org/p/_msLJ-lBEu 221 // Regex matches these reqs from the bagit spec: "One or 222 // more linear whitespace characters (spaces or tabs) MUST separate 223 // CHECKSUM from FILENAME." as specified here: 224 // http://tools.ietf.org/html/draft-kunze-bagit-10#section-2.1.3 225 re := regexp.MustCompile(`^(\S*)\s*(.*)`) 226 227 scanner := bufio.NewScanner(file) 228 values := make(map[string]string) 229 230 for scanner.Scan() { 231 line := scanner.Text() 232 if re.MatchString(line) { 233 data := re.FindStringSubmatch(line) 234 values[data[2]] = data[1] 235 } else { 236 errs = append(errs, fmt.Errorf("Unable to parse data from line: %s", line)) 237 } 238 } 239 240 return values, errs 241 }