github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/base/archive/zip.go (about)

     1  package archive
     2  
     3  import (
     4  	"archive/zip"
     5  	"bytes"
     6  	"context"
     7  	"encoding/json"
     8  	"fmt"
     9  	"io"
    10  	"io/ioutil"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/qri-io/dataset"
    15  	"github.com/qri-io/qfs"
    16  	"github.com/qri-io/qri/base/component"
    17  	"github.com/qri-io/qri/base/linkfile"
    18  	"github.com/qri-io/qri/dsref"
    19  )
    20  
    21  // WriteZip generates a zip archive of a dataset and writes it to w
    22  func WriteZip(ctx context.Context, fs qfs.Filesystem, ds *dataset.Dataset, format, initID string, ref dsref.Ref, w io.Writer) error {
    23  	zw := zip.NewWriter(w)
    24  	defer zw.Close()
    25  
    26  	st := ds.Structure
    27  
    28  	if ref.Path == "" && ds.Path != "" {
    29  		ref.Path = ds.Path
    30  	}
    31  
    32  	// Iterate the individual components of the dataset
    33  	dsComp := component.ConvertDatasetToComponents(ds, fs)
    34  	for _, compName := range component.AllSubcomponentNames() {
    35  		aComp := dsComp.Base().GetSubcomponent(compName)
    36  		if aComp == nil {
    37  			continue
    38  		}
    39  
    40  		data, err := aComp.StructuredData()
    41  		if err != nil {
    42  			log.Error("component %q, geting structured data: %s", compName, err)
    43  			continue
    44  		}
    45  
    46  		// Specially serialize the body to a file in the zip
    47  		if compName == "body" && st != nil {
    48  			body, err := component.SerializeBody(data, st)
    49  			if err != nil {
    50  				log.Error("component %q, serializing body: %s", compName, err)
    51  				continue
    52  			}
    53  
    54  			w, err := zw.Create(fmt.Sprintf("%s.%s", compName, st.Format))
    55  			if err != nil {
    56  				log.Error("component %q, creating zip writer: %s", compName, err)
    57  				continue
    58  			}
    59  
    60  			w.Write(body)
    61  			continue
    62  		}
    63  
    64  		// TODO(dustmop): The transform component outputs a json file, with a path string
    65  		// to the transform script in IPFS. Consider if Components should have a
    66  		// serialize method that gets the script for transform, and maybe the body contents,
    67  		// but a json struct for everything else. Follow up in another PR.
    68  
    69  		// For any other component, serialize it as json in the zip
    70  		w, err := zw.Create(fmt.Sprintf("%s.json", compName))
    71  		if err != nil {
    72  			log.Error("component %q, creating zip writer: %s", compName, err)
    73  			continue
    74  		}
    75  
    76  		text, err := json.MarshalIndent(data, "", " ")
    77  		if err != nil {
    78  			log.Error("component %q, marshalling data: %s", compName, err)
    79  			continue
    80  		}
    81  		w.Write(text)
    82  	}
    83  
    84  	// Add a linkfile in the zip, which can be used to connect the dataset back to its history
    85  	w, err := zw.Create(linkfile.RefLinkTextFilename)
    86  	if err != nil {
    87  		log.Error(err)
    88  	} else {
    89  		linkfile.WriteRef(w, ref)
    90  	}
    91  
    92  	return nil
    93  }
    94  
    95  // TODO (b5) - rendered viz isn't always being properly added to the
    96  // encoded DAG, causing this to hang indefinitely on a network lookup.
    97  // Use a short timeout for now to prevent the process from running too
    98  // long. We should come up with a more permanent fix for this.
    99  func maybeWriteRenderedViz(ctx context.Context, fs qfs.Filesystem, zw *zip.Writer, vizPath string) error {
   100  	withTimeout, done := context.WithTimeout(ctx, time.Millisecond*250)
   101  	defer done()
   102  	rendered, err := fs.Get(withTimeout, vizPath)
   103  	if err != nil {
   104  		if strings.Contains(err.Error(), "not found") {
   105  			return nil
   106  		}
   107  		return err
   108  	}
   109  
   110  	target, err := zw.Create("index.html")
   111  	if err != nil {
   112  		return err
   113  	}
   114  	_, err = io.Copy(target, rendered)
   115  	return err
   116  }
   117  
   118  // UnzipDatasetBytes is a convenince wrapper for UnzipDataset
   119  func UnzipDatasetBytes(zipData []byte, ds *dataset.Dataset) error {
   120  	return UnzipDataset(bytes.NewReader(zipData), int64(len(zipData)), ds)
   121  }
   122  
   123  // UnzipDataset reads a zip file from a filename and returns a full dataset with components
   124  func UnzipDataset(r io.ReaderAt, size int64, ds *dataset.Dataset) error {
   125  	zr, err := zip.NewReader(r, size)
   126  	if err != nil {
   127  		return err
   128  	}
   129  
   130  	contents, err := unzipGetContents(zr)
   131  	if err != nil {
   132  		return err
   133  	}
   134  
   135  	fileData, ok := contents["dataset.json"]
   136  	if !ok {
   137  		return fmt.Errorf("no dataset.json found in the provided zip")
   138  	}
   139  	if err = json.Unmarshal(fileData, ds); err != nil {
   140  		return err
   141  	}
   142  
   143  	// TODO - do a smarter iteration for body format
   144  	if bodyData, ok := contents["body.json"]; ok {
   145  		ds.BodyBytes = bodyData
   146  		ds.BodyPath = ""
   147  	}
   148  	if bodyData, ok := contents["body.csv"]; ok {
   149  		ds.BodyBytes = bodyData
   150  		ds.BodyPath = ""
   151  	}
   152  	if bodyData, ok := contents["body.cbor"]; ok {
   153  		ds.BodyBytes = bodyData
   154  		ds.BodyPath = ""
   155  	}
   156  
   157  	if tfScriptData, ok := contents["transform.star"]; ok {
   158  		if ds.Transform == nil {
   159  			ds.Transform = &dataset.Transform{}
   160  		}
   161  		ds.Transform.Text = string(tfScriptData)
   162  		ds.Transform.ScriptPath = ""
   163  	}
   164  
   165  	if vizScriptData, ok := contents["viz.html"]; ok {
   166  		if ds.Viz == nil {
   167  			ds.Viz = &dataset.Viz{}
   168  		}
   169  		ds.Viz.Text = string(vizScriptData)
   170  		ds.Viz.ScriptPath = ""
   171  	}
   172  
   173  	// Get ref to existing dataset
   174  	if refText, ok := contents["ref.txt"]; ok {
   175  		refStr := string(refText)
   176  		atPos := strings.Index(refStr, "@")
   177  		if atPos == -1 {
   178  			return fmt.Errorf("invalid dataset ref: no '@' found")
   179  		}
   180  		// Get name and peername
   181  		datasetName := refStr[:atPos]
   182  		sepPos := strings.Index(datasetName, "/")
   183  		if sepPos == -1 {
   184  			return fmt.Errorf("invalid dataset name: no '/' found")
   185  		}
   186  		ds.Peername = datasetName[:sepPos]
   187  		ds.Name = datasetName[sepPos+1:]
   188  	}
   189  	return nil
   190  }
   191  
   192  // UnzipGetContents is a generic zip-unpack to a map of filename: contents
   193  // with contents represented as strings
   194  func UnzipGetContents(data []byte) (map[string]string, error) {
   195  	zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
   196  	if err != nil {
   197  		return nil, err
   198  	}
   199  	contents, err := unzipGetContents(zr)
   200  	if err != nil {
   201  		return nil, err
   202  	}
   203  
   204  	res := map[string]string{}
   205  	for k, val := range contents {
   206  		res[k] = string(val)
   207  	}
   208  	return res, nil
   209  }
   210  
   211  // unzipGetContents reads a zip file's contents and returns a map from filename to file data
   212  func unzipGetContents(zr *zip.Reader) (map[string][]byte, error) {
   213  	// Create a map from filenames in the zip to their json encoded contents.
   214  	contents := make(map[string][]byte)
   215  	for _, f := range zr.File {
   216  		rc, err := f.Open()
   217  		if err != nil {
   218  			return nil, err
   219  		}
   220  		data, err := ioutil.ReadAll(rc)
   221  		if err != nil {
   222  			return nil, err
   223  		}
   224  		contents[f.Name] = data
   225  	}
   226  	return contents, nil
   227  }