github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/base/archive/zip.go (about) 1 package archive 2 3 import ( 4 "archive/zip" 5 "bytes" 6 "context" 7 "encoding/json" 8 "fmt" 9 "io" 10 "io/ioutil" 11 "strings" 12 "time" 13 14 "github.com/qri-io/dataset" 15 "github.com/qri-io/qfs" 16 "github.com/qri-io/qri/base/component" 17 "github.com/qri-io/qri/base/linkfile" 18 "github.com/qri-io/qri/dsref" 19 ) 20 21 // WriteZip generates a zip archive of a dataset and writes it to w 22 func WriteZip(ctx context.Context, fs qfs.Filesystem, ds *dataset.Dataset, format, initID string, ref dsref.Ref, w io.Writer) error { 23 zw := zip.NewWriter(w) 24 defer zw.Close() 25 26 st := ds.Structure 27 28 if ref.Path == "" && ds.Path != "" { 29 ref.Path = ds.Path 30 } 31 32 // Iterate the individual components of the dataset 33 dsComp := component.ConvertDatasetToComponents(ds, fs) 34 for _, compName := range component.AllSubcomponentNames() { 35 aComp := dsComp.Base().GetSubcomponent(compName) 36 if aComp == nil { 37 continue 38 } 39 40 data, err := aComp.StructuredData() 41 if err != nil { 42 log.Error("component %q, geting structured data: %s", compName, err) 43 continue 44 } 45 46 // Specially serialize the body to a file in the zip 47 if compName == "body" && st != nil { 48 body, err := component.SerializeBody(data, st) 49 if err != nil { 50 log.Error("component %q, serializing body: %s", compName, err) 51 continue 52 } 53 54 w, err := zw.Create(fmt.Sprintf("%s.%s", compName, st.Format)) 55 if err != nil { 56 log.Error("component %q, creating zip writer: %s", compName, err) 57 continue 58 } 59 60 w.Write(body) 61 continue 62 } 63 64 // TODO(dustmop): The transform component outputs a json file, with a path string 65 // to the transform script in IPFS. Consider if Components should have a 66 // serialize method that gets the script for transform, and maybe the body contents, 67 // but a json struct for everything else. Follow up in another PR. 68 69 // For any other component, serialize it as json in the zip 70 w, err := zw.Create(fmt.Sprintf("%s.json", compName)) 71 if err != nil { 72 log.Error("component %q, creating zip writer: %s", compName, err) 73 continue 74 } 75 76 text, err := json.MarshalIndent(data, "", " ") 77 if err != nil { 78 log.Error("component %q, marshalling data: %s", compName, err) 79 continue 80 } 81 w.Write(text) 82 } 83 84 // Add a linkfile in the zip, which can be used to connect the dataset back to its history 85 w, err := zw.Create(linkfile.RefLinkTextFilename) 86 if err != nil { 87 log.Error(err) 88 } else { 89 linkfile.WriteRef(w, ref) 90 } 91 92 return nil 93 } 94 95 // TODO (b5) - rendered viz isn't always being properly added to the 96 // encoded DAG, causing this to hang indefinitely on a network lookup. 97 // Use a short timeout for now to prevent the process from running too 98 // long. We should come up with a more permanent fix for this. 99 func maybeWriteRenderedViz(ctx context.Context, fs qfs.Filesystem, zw *zip.Writer, vizPath string) error { 100 withTimeout, done := context.WithTimeout(ctx, time.Millisecond*250) 101 defer done() 102 rendered, err := fs.Get(withTimeout, vizPath) 103 if err != nil { 104 if strings.Contains(err.Error(), "not found") { 105 return nil 106 } 107 return err 108 } 109 110 target, err := zw.Create("index.html") 111 if err != nil { 112 return err 113 } 114 _, err = io.Copy(target, rendered) 115 return err 116 } 117 118 // UnzipDatasetBytes is a convenince wrapper for UnzipDataset 119 func UnzipDatasetBytes(zipData []byte, ds *dataset.Dataset) error { 120 return UnzipDataset(bytes.NewReader(zipData), int64(len(zipData)), ds) 121 } 122 123 // UnzipDataset reads a zip file from a filename and returns a full dataset with components 124 func UnzipDataset(r io.ReaderAt, size int64, ds *dataset.Dataset) error { 125 zr, err := zip.NewReader(r, size) 126 if err != nil { 127 return err 128 } 129 130 contents, err := unzipGetContents(zr) 131 if err != nil { 132 return err 133 } 134 135 fileData, ok := contents["dataset.json"] 136 if !ok { 137 return fmt.Errorf("no dataset.json found in the provided zip") 138 } 139 if err = json.Unmarshal(fileData, ds); err != nil { 140 return err 141 } 142 143 // TODO - do a smarter iteration for body format 144 if bodyData, ok := contents["body.json"]; ok { 145 ds.BodyBytes = bodyData 146 ds.BodyPath = "" 147 } 148 if bodyData, ok := contents["body.csv"]; ok { 149 ds.BodyBytes = bodyData 150 ds.BodyPath = "" 151 } 152 if bodyData, ok := contents["body.cbor"]; ok { 153 ds.BodyBytes = bodyData 154 ds.BodyPath = "" 155 } 156 157 if tfScriptData, ok := contents["transform.star"]; ok { 158 if ds.Transform == nil { 159 ds.Transform = &dataset.Transform{} 160 } 161 ds.Transform.Text = string(tfScriptData) 162 ds.Transform.ScriptPath = "" 163 } 164 165 if vizScriptData, ok := contents["viz.html"]; ok { 166 if ds.Viz == nil { 167 ds.Viz = &dataset.Viz{} 168 } 169 ds.Viz.Text = string(vizScriptData) 170 ds.Viz.ScriptPath = "" 171 } 172 173 // Get ref to existing dataset 174 if refText, ok := contents["ref.txt"]; ok { 175 refStr := string(refText) 176 atPos := strings.Index(refStr, "@") 177 if atPos == -1 { 178 return fmt.Errorf("invalid dataset ref: no '@' found") 179 } 180 // Get name and peername 181 datasetName := refStr[:atPos] 182 sepPos := strings.Index(datasetName, "/") 183 if sepPos == -1 { 184 return fmt.Errorf("invalid dataset name: no '/' found") 185 } 186 ds.Peername = datasetName[:sepPos] 187 ds.Name = datasetName[sepPos+1:] 188 } 189 return nil 190 } 191 192 // UnzipGetContents is a generic zip-unpack to a map of filename: contents 193 // with contents represented as strings 194 func UnzipGetContents(data []byte) (map[string]string, error) { 195 zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) 196 if err != nil { 197 return nil, err 198 } 199 contents, err := unzipGetContents(zr) 200 if err != nil { 201 return nil, err 202 } 203 204 res := map[string]string{} 205 for k, val := range contents { 206 res[k] = string(val) 207 } 208 return res, nil 209 } 210 211 // unzipGetContents reads a zip file's contents and returns a map from filename to file data 212 func unzipGetContents(zr *zip.Reader) (map[string][]byte, error) { 213 // Create a map from filenames in the zip to their json encoded contents. 214 contents := make(map[string][]byte) 215 for _, f := range zr.File { 216 rc, err := f.Open() 217 if err != nil { 218 return nil, err 219 } 220 data, err := ioutil.ReadAll(rc) 221 if err != nil { 222 return nil, err 223 } 224 contents[f.Name] = data 225 } 226 return contents, nil 227 }