github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/base/archive/archive.go (about) 1 // Package archive creates and consumes high-fidelity conversions of dataset 2 // documents for export & import 3 package archive 4 5 import ( 6 "archive/zip" 7 "context" 8 "encoding/json" 9 "fmt" 10 "io" 11 "os" 12 "path" 13 "path/filepath" 14 "strings" 15 "time" 16 17 "github.com/ghodss/yaml" 18 logger "github.com/ipfs/go-log" 19 "github.com/qri-io/dataset" 20 "github.com/qri-io/dataset/dsio" 21 "github.com/qri-io/qfs" 22 "github.com/qri-io/qri/base" 23 "github.com/qri-io/qri/dsref" 24 ) 25 26 var log = logger.Logger("archive") 27 28 // Export generates a high-fidelity copy of a dataset that doesn't require qri 29 // software to read. 30 // TODO (b5) - this currently has a lot of overlap with "get" and "checkout" 31 // commands, we should emphasize those (more common) tools instead. See 32 // https://github.com/qri-io/qri/issues/1176 for discussion 33 func Export(ctx context.Context, fs qfs.Filesystem, ds *dataset.Dataset, refStr, targetDir, output, outputFormat string, zipped bool) (string, error) { 34 var err error 35 defer base.CloseDataset(ds) 36 37 format := outputFormat 38 if format == "" { 39 if zipped { 40 // Default format, if --zip flag is set, is zip 41 format = "zip" 42 } else { 43 // Default format is json, otherwise 44 format = "json" 45 } 46 } 47 48 var fileWritten string 49 if output == "" || isDirectory(output) { 50 // If output is blank or a directory, derive filename from repo name and commit timestamp. 51 baseName, err := GenerateFilename(ds, format) 52 if err != nil { 53 return "", err 54 } 55 fileWritten = path.Join(output, baseName) 56 } else { 57 // If output filename is not blank, check that the file extension matches the format. Or 58 // if format is not specified, use the file extension to derive the format. 59 ext := filepath.Ext(output) 60 if strings.HasPrefix(ext, ".") { 61 ext = ext[1:] 62 } 63 // If format was not supplied as a flag, and we're not outputting a zip, derive format 64 // from file extension. 65 if outputFormat == "" && !zipped { 66 format = ext 67 } 68 // Make sure the format doesn't contradict the file extension. 69 if ext != format { 70 return "", fmt.Errorf("file extension doesn't match format %s <> %s", ext, format) 71 } 72 fileWritten = output 73 } 74 75 // fileWritten represents the human-readable name of where the export is written to, while 76 // outputPath is an absolute path used in the implementation 77 var outputPath string 78 if path.IsAbs(fileWritten) { 79 outputPath = fileWritten 80 } else { 81 outputPath = path.Join(targetDir, fileWritten) 82 } 83 84 // If output is a format wrapped in a zip file, fixup the output name. 85 if zipped && format != "zip" { 86 outputPath = replaceExt(outputPath, ".zip") 87 fileWritten = replaceExt(fileWritten, ".zip") 88 } 89 90 // Make sure output doesn't already exist. 91 if _, err = os.Stat(outputPath); err == nil { 92 return "", fmt.Errorf(`already exists: "%s"`, fileWritten) 93 } 94 95 // Create output writer. 96 var writer io.Writer 97 writer, err = os.Create(outputPath) 98 if err != nil { 99 return "", err 100 } 101 102 // If outputting a wrapped zip file, create the zip wrapper. 103 if zipped && format != "zip" { 104 zipWriter := zip.NewWriter(writer) 105 106 writer, err = zipWriter.Create(fmt.Sprintf("dataset.%s", format)) 107 if err != nil { 108 return "", err 109 } 110 111 defer func() { 112 zipWriter.Close() 113 }() 114 } 115 116 // Create entry reader. 117 reader, err := dsio.NewEntryReader(ds.Structure, ds.BodyFile()) 118 if err != nil { 119 return "", err 120 } 121 122 switch format { 123 case "json": 124 125 // TODO (dlong): Look into combining this functionality (reading body, changing structure), 126 // with some of the functions in `base`. 127 bodyEntries, err := base.ReadEntries(reader) 128 if err != nil { 129 return "", err 130 } 131 ds.Body = bodyEntries 132 133 ds.Structure = &dataset.Structure{ 134 Format: "json", 135 Schema: ds.Structure.Schema, 136 Depth: ds.Structure.Depth, 137 ErrCount: ds.Structure.ErrCount, 138 } 139 // drop any transform stuff 140 ds.Transform = nil 141 142 if err := json.NewEncoder(writer).Encode(ds); err != nil { 143 return "", err 144 } 145 return fileWritten, nil 146 147 case "yaml": 148 149 bodyEntries, err := base.ReadEntries(reader) 150 if err != nil { 151 return "", err 152 } 153 ds.Body = bodyEntries 154 155 ds.Structure = &dataset.Structure{ 156 Format: "yaml", 157 Schema: ds.Structure.Schema, 158 Depth: ds.Structure.Depth, 159 ErrCount: ds.Structure.ErrCount, 160 } 161 // drop any transform stuff 162 ds.Transform = nil 163 dsBytes, err := yaml.Marshal(ds) 164 if err != nil { 165 return "", err 166 } 167 168 _, err = writer.Write(dsBytes) 169 if err != nil { 170 return "", err 171 } 172 return fileWritten, nil 173 174 case "xlsx": 175 st := &dataset.Structure{ 176 Format: "xlsx", 177 Schema: ds.Structure.Schema, 178 } 179 w, err := dsio.NewEntryWriter(st, writer) 180 if err != nil { 181 return "", err 182 } 183 184 if err := dsio.Copy(reader, w); err != nil { 185 return "", err 186 } 187 return fileWritten, w.Close() 188 189 case "zip": 190 ref, err := dsref.Parse(refStr) 191 if err != nil { 192 return "", err 193 } 194 blankInitID := "" 195 if err = WriteZip(ctx, fs, ds, "json", blankInitID, ref, writer); err != nil { 196 return "", err 197 } 198 199 return fileWritten, nil 200 201 default: 202 return "", fmt.Errorf("unknown file format \"%s\"", format) 203 } 204 } 205 206 func isDirectory(path string) bool { 207 st, err := os.Stat(path) 208 if err != nil { 209 return false 210 } 211 return st.Mode().IsDir() 212 } 213 214 func replaceExt(filename, newExt string) string { 215 ext := path.Ext(filename) 216 return filename[:len(filename)-len(ext)] + newExt 217 } 218 219 // GenerateFilename takes a dataset and generates a filename 220 // if no timestamp exists, it will default to the empty time.Time 221 // in the form [peername]-[datasetName]_-_[timestamp].[format] 222 func GenerateFilename(ds *dataset.Dataset, format string) (string, error) { 223 ts := time.Time{} 224 if ds.Commit != nil { 225 ts = ds.Commit.Timestamp 226 } 227 if format == "" { 228 if ds.Structure == nil || ds.Structure.Format == "" { 229 return "", fmt.Errorf("no format specified and no format present in the dataset Structure") 230 } 231 format = ds.Structure.Format 232 } 233 timeText := fmt.Sprintf("%04d-%02d-%02d-%02d-%02d-%02d", ts.Year(), ts.Month(), ts.Day(), 234 ts.Hour(), ts.Minute(), ts.Second()) 235 return fmt.Sprintf("%s-%s_-_%s.%s", ds.Peername, ds.Name, timeText, format), nil 236 }