github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/base/archive/archive.go (about)

     1  // Package archive creates and consumes high-fidelity conversions of dataset
     2  // documents for export & import
     3  package archive
     4  
     5  import (
     6  	"archive/zip"
     7  	"context"
     8  	"encoding/json"
     9  	"fmt"
    10  	"io"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  	"strings"
    15  	"time"
    16  
    17  	"github.com/ghodss/yaml"
    18  	logger "github.com/ipfs/go-log"
    19  	"github.com/qri-io/dataset"
    20  	"github.com/qri-io/dataset/dsio"
    21  	"github.com/qri-io/qfs"
    22  	"github.com/qri-io/qri/base"
    23  	"github.com/qri-io/qri/dsref"
    24  )
    25  
    26  var log = logger.Logger("archive")
    27  
    28  // Export generates a high-fidelity copy of a dataset that doesn't require qri
    29  // software to read.
    30  // TODO (b5) - this currently has a lot of overlap with "get" and "checkout"
    31  // commands, we should emphasize those (more common) tools instead. See
    32  // https://github.com/qri-io/qri/issues/1176 for discussion
    33  func Export(ctx context.Context, fs qfs.Filesystem, ds *dataset.Dataset, refStr, targetDir, output, outputFormat string, zipped bool) (string, error) {
    34  	var err error
    35  	defer base.CloseDataset(ds)
    36  
    37  	format := outputFormat
    38  	if format == "" {
    39  		if zipped {
    40  			// Default format, if --zip flag is set, is zip
    41  			format = "zip"
    42  		} else {
    43  			// Default format is json, otherwise
    44  			format = "json"
    45  		}
    46  	}
    47  
    48  	var fileWritten string
    49  	if output == "" || isDirectory(output) {
    50  		// If output is blank or a directory, derive filename from repo name and commit timestamp.
    51  		baseName, err := GenerateFilename(ds, format)
    52  		if err != nil {
    53  			return "", err
    54  		}
    55  		fileWritten = path.Join(output, baseName)
    56  	} else {
    57  		// If output filename is not blank, check that the file extension matches the format. Or
    58  		// if format is not specified, use the file extension to derive the format.
    59  		ext := filepath.Ext(output)
    60  		if strings.HasPrefix(ext, ".") {
    61  			ext = ext[1:]
    62  		}
    63  		// If format was not supplied as a flag, and we're not outputting a zip, derive format
    64  		// from file extension.
    65  		if outputFormat == "" && !zipped {
    66  			format = ext
    67  		}
    68  		// Make sure the format doesn't contradict the file extension.
    69  		if ext != format {
    70  			return "", fmt.Errorf("file extension doesn't match format %s <> %s", ext, format)
    71  		}
    72  		fileWritten = output
    73  	}
    74  
    75  	// fileWritten represents the human-readable name of where the export is written to, while
    76  	// outputPath is an absolute path used in the implementation
    77  	var outputPath string
    78  	if path.IsAbs(fileWritten) {
    79  		outputPath = fileWritten
    80  	} else {
    81  		outputPath = path.Join(targetDir, fileWritten)
    82  	}
    83  
    84  	// If output is a format wrapped in a zip file, fixup the output name.
    85  	if zipped && format != "zip" {
    86  		outputPath = replaceExt(outputPath, ".zip")
    87  		fileWritten = replaceExt(fileWritten, ".zip")
    88  	}
    89  
    90  	// Make sure output doesn't already exist.
    91  	if _, err = os.Stat(outputPath); err == nil {
    92  		return "", fmt.Errorf(`already exists: "%s"`, fileWritten)
    93  	}
    94  
    95  	// Create output writer.
    96  	var writer io.Writer
    97  	writer, err = os.Create(outputPath)
    98  	if err != nil {
    99  		return "", err
   100  	}
   101  
   102  	// If outputting a wrapped zip file, create the zip wrapper.
   103  	if zipped && format != "zip" {
   104  		zipWriter := zip.NewWriter(writer)
   105  
   106  		writer, err = zipWriter.Create(fmt.Sprintf("dataset.%s", format))
   107  		if err != nil {
   108  			return "", err
   109  		}
   110  
   111  		defer func() {
   112  			zipWriter.Close()
   113  		}()
   114  	}
   115  
   116  	// Create entry reader.
   117  	reader, err := dsio.NewEntryReader(ds.Structure, ds.BodyFile())
   118  	if err != nil {
   119  		return "", err
   120  	}
   121  
   122  	switch format {
   123  	case "json":
   124  
   125  		// TODO (dlong): Look into combining this functionality (reading body, changing structure),
   126  		// with some of the functions in `base`.
   127  		bodyEntries, err := base.ReadEntries(reader)
   128  		if err != nil {
   129  			return "", err
   130  		}
   131  		ds.Body = bodyEntries
   132  
   133  		ds.Structure = &dataset.Structure{
   134  			Format:   "json",
   135  			Schema:   ds.Structure.Schema,
   136  			Depth:    ds.Structure.Depth,
   137  			ErrCount: ds.Structure.ErrCount,
   138  		}
   139  		// drop any transform stuff
   140  		ds.Transform = nil
   141  
   142  		if err := json.NewEncoder(writer).Encode(ds); err != nil {
   143  			return "", err
   144  		}
   145  		return fileWritten, nil
   146  
   147  	case "yaml":
   148  
   149  		bodyEntries, err := base.ReadEntries(reader)
   150  		if err != nil {
   151  			return "", err
   152  		}
   153  		ds.Body = bodyEntries
   154  
   155  		ds.Structure = &dataset.Structure{
   156  			Format:   "yaml",
   157  			Schema:   ds.Structure.Schema,
   158  			Depth:    ds.Structure.Depth,
   159  			ErrCount: ds.Structure.ErrCount,
   160  		}
   161  		// drop any transform stuff
   162  		ds.Transform = nil
   163  		dsBytes, err := yaml.Marshal(ds)
   164  		if err != nil {
   165  			return "", err
   166  		}
   167  
   168  		_, err = writer.Write(dsBytes)
   169  		if err != nil {
   170  			return "", err
   171  		}
   172  		return fileWritten, nil
   173  
   174  	case "xlsx":
   175  		st := &dataset.Structure{
   176  			Format: "xlsx",
   177  			Schema: ds.Structure.Schema,
   178  		}
   179  		w, err := dsio.NewEntryWriter(st, writer)
   180  		if err != nil {
   181  			return "", err
   182  		}
   183  
   184  		if err := dsio.Copy(reader, w); err != nil {
   185  			return "", err
   186  		}
   187  		return fileWritten, w.Close()
   188  
   189  	case "zip":
   190  		ref, err := dsref.Parse(refStr)
   191  		if err != nil {
   192  			return "", err
   193  		}
   194  		blankInitID := ""
   195  		if err = WriteZip(ctx, fs, ds, "json", blankInitID, ref, writer); err != nil {
   196  			return "", err
   197  		}
   198  
   199  		return fileWritten, nil
   200  
   201  	default:
   202  		return "", fmt.Errorf("unknown file format \"%s\"", format)
   203  	}
   204  }
   205  
   206  func isDirectory(path string) bool {
   207  	st, err := os.Stat(path)
   208  	if err != nil {
   209  		return false
   210  	}
   211  	return st.Mode().IsDir()
   212  }
   213  
   214  func replaceExt(filename, newExt string) string {
   215  	ext := path.Ext(filename)
   216  	return filename[:len(filename)-len(ext)] + newExt
   217  }
   218  
   219  // GenerateFilename takes a dataset and generates a filename
   220  // if no timestamp exists, it will default to the empty time.Time
   221  // in the form [peername]-[datasetName]_-_[timestamp].[format]
   222  func GenerateFilename(ds *dataset.Dataset, format string) (string, error) {
   223  	ts := time.Time{}
   224  	if ds.Commit != nil {
   225  		ts = ds.Commit.Timestamp
   226  	}
   227  	if format == "" {
   228  		if ds.Structure == nil || ds.Structure.Format == "" {
   229  			return "", fmt.Errorf("no format specified and no format present in the dataset Structure")
   230  		}
   231  		format = ds.Structure.Format
   232  	}
   233  	timeText := fmt.Sprintf("%04d-%02d-%02d-%02d-%02d-%02d", ts.Year(), ts.Month(), ts.Day(),
   234  		ts.Hour(), ts.Minute(), ts.Second())
   235  	return fmt.Sprintf("%s-%s_-_%s.%s", ds.Peername, ds.Name, timeText, format), nil
   236  }