github.com/kjk/siser@v0.0.0-20220410204903-1b1e84ea1397/pak/reader.go (about)

     1  package pak
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"strconv"
    11  
    12  	"github.com/kjk/siser"
    13  )
    14  
    15  var (
    16  	// ErrNoPath is returned when path is not provided
    17  	ErrNoPath = errors.New("no Path provided")
    18  )
    19  
    20  // Entry represents a single file in the archive
    21  type Entry struct {
    22  	// Metadata is arbitrary metadata.
    23  	// Has at least Size, Path and Sha1 values
    24  	Metadata Metadata
    25  
    26  	// Path of the file. Recomended to use '/' for path separator
    27  	Path string
    28  
    29  	// offset within the file
    30  	Offset int64
    31  
    32  	// size of the entry, in bytes
    33  	Size int64
    34  
    35  	// sha1 of content, in hex format
    36  	Sha1 string
    37  
    38  	// fields only used when writing
    39  	// set if this was AddFile()
    40  	srcFilePath string
    41  	// data from AddData() or content of file from AddFile()
    42  	data []byte
    43  }
    44  
    45  // Archive represents an archive
    46  type Archive struct {
    47  	Path    string
    48  	Entries []*Entry
    49  
    50  	// if true, will disable validating sha1 on reading
    51  	DisableValidateSha1 bool
    52  }
    53  
    54  // ReadArchive reads archive from a file
    55  func ReadArchive(path string) (*Archive, error) {
    56  	f, err := os.Open(path)
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  	defer f.Close()
    61  	a, err := ReadArchiveFromReader(f)
    62  	if err != nil {
    63  		return nil, err
    64  	}
    65  	a.Path = path
    66  	return a, nil
    67  }
    68  
    69  // ReadArchiveFromReader reads archive entries
    70  func ReadArchiveFromReader(r io.Reader) (*Archive, error) {
    71  	br := bufio.NewReader(r)
    72  	sr := siser.NewReader(br)
    73  
    74  	// read the header which is a siser-formatted block of data
    75  	// containing siser-formatted records for entries
    76  	sr.ReadNextData()
    77  	if sr.Err() != nil {
    78  		return nil, sr.Err()
    79  	}
    80  	if sr.Name != archiveName {
    81  		return nil, fmt.Errorf("expected header named '%s', got '%s'", archiveName, sr.Name)
    82  	}
    83  	// this is where data starts in the file
    84  	// this is the size of the header
    85  	entriesOffset := sr.NextRecordPos
    86  	dataBuf := bytes.NewBuffer(sr.Data)
    87  	hdrDataBuf := bufio.NewReader(dataBuf)
    88  	sr = siser.NewReader(hdrDataBuf)
    89  
    90  	currOffset := entriesOffset
    91  
    92  	var entries []*Entry
    93  	for sr.ReadNextRecord() {
    94  		var meta Metadata
    95  		for _, e := range sr.Record.Entries {
    96  			meta.Set(e.Key, e.Value)
    97  		}
    98  
    99  		sizeStr, ok := meta.Get(MetaKeySize)
   100  		if !ok {
   101  			return nil, fmt.Errorf("missing '%s' value", MetaKeySize)
   102  		}
   103  		size, err := strconv.ParseInt(sizeStr, 10, 64)
   104  		if err != nil {
   105  			return nil, fmt.Errorf("value '%s' for 'Size' is not a valid number. Error: %s", sizeStr, err)
   106  		}
   107  
   108  		path, ok := meta.Get(MetaKeyPath)
   109  		if !ok {
   110  			return nil, fmt.Errorf("missing '%s' value", MetaKeyPath)
   111  		}
   112  		sha1, ok := meta.Get(MetaKeySha1)
   113  		if !ok {
   114  			return nil, fmt.Errorf("missing '%s' value", MetaKeySha1)
   115  		}
   116  
   117  		e := &Entry{
   118  			Metadata: meta,
   119  			Path:     path,
   120  			Offset:   currOffset,
   121  			Size:     size,
   122  			Sha1:     sha1,
   123  		}
   124  		entries = append(entries, e)
   125  
   126  		currOffset += size
   127  	}
   128  
   129  	if sr.Err() != nil {
   130  		return nil, sr.Err()
   131  	}
   132  
   133  	a := &Archive{
   134  		Entries: entries,
   135  	}
   136  	return a, nil
   137  }
   138  
   139  // reads a part of a file of a given size at an offset
   140  func readFileChunk(path string, offset, size int64) ([]byte, error) {
   141  	f, err := os.Open(path)
   142  	if err != nil {
   143  		return nil, err
   144  	}
   145  	defer f.Close()
   146  
   147  	d := make([]byte, int(size))
   148  	_, err = f.ReadAt(d, offset)
   149  	if err != nil {
   150  		return nil, err
   151  	}
   152  	return d, nil
   153  }
   154  
   155  // ReadEntry reads a given entry from file in Path
   156  func (a *Archive) ReadEntry(e *Entry) ([]byte, error) {
   157  	if a.Path == "" {
   158  		return nil, ErrNoPath
   159  	}
   160  	d, err := readFileChunk(a.Path, e.Offset, e.Size)
   161  	if err != nil {
   162  		return nil, err
   163  	}
   164  	if !a.DisableValidateSha1 {
   165  		sha1Got := sha1HexOfBytes(d)
   166  		if e.Sha1 != sha1Got {
   167  			return nil, fmt.Errorf("mismatched sha1 for file '%s'. Expected: %s, got: %s", e.Path, e.Sha1, sha1Got)
   168  		}
   169  	}
   170  	return d, nil
   171  }