github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/lib/file.go (about) 1 package lib 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io/ioutil" 7 "net/http" 8 "os" 9 "path" 10 "path/filepath" 11 "strings" 12 13 "github.com/qri-io/dataset" 14 "github.com/qri-io/qfs" 15 "github.com/qri-io/qri/base/archive" 16 "github.com/qri-io/qri/base/fill" 17 "gopkg.in/yaml.v2" 18 ) 19 20 // PathJoinPosix joins two paths, and makes it explicitly clear we want POSIX slashes 21 func PathJoinPosix(left, right string) string { 22 return path.Join(left, right) 23 } 24 25 // ReadDatasetFiles reads zero or more files, each representing a dataset or component of a 26 // dataset, and deserializes them, merging the results into a single dataset object. It is an 27 // error to provide any combination of files whose contents overlap (modify the same component). 28 func ReadDatasetFiles(pathList ...string) (*dataset.Dataset, error) { 29 // If there's only a single file provided, read it and return the dataset. 30 if len(pathList) == 1 { 31 ds, _, err := readSingleFile(pathList[0]) 32 return ds, err 33 } 34 35 // If there's multiple files provided, read each one and merge them. Any exclusive 36 // component is an error, any component showing up multiple times is an error. 37 foundKinds := make(map[string]bool) 38 ds := dataset.Dataset{} 39 for _, p := range pathList { 40 component, kind, err := readSingleFile(p) 41 if err != nil { 42 return nil, err 43 } 44 45 if kind == "zip" || kind == "ds" { 46 return nil, fmt.Errorf("conflict, cannot save a full dataset with other components") 47 } 48 if _, ok := foundKinds[kind]; ok { 49 return nil, fmt.Errorf("conflict, multiple components of kind \"%s\"", kind) 50 } 51 foundKinds[kind] = true 52 53 ds.Assign(component) 54 } 55 56 return &ds, nil 57 } 58 59 // readSingleFile reads a single file, either a full dataset or component, and returns it as 60 // a dataset and a string specifying the kind of component that was created 61 func readSingleFile(path string) (*dataset.Dataset, string, error) { 62 ds := dataset.Dataset{} 63 switch qfs.PathKind(path) { 64 case "http": 65 // currently the only supported type of file url is a zip archive 66 resp, err := http.Get(path) 67 if err != nil { 68 return nil, "", err 69 } 70 data, err := ioutil.ReadAll(resp.Body) 71 if err != nil { 72 return nil, "", err 73 } 74 resp.Body.Close() 75 err = archive.UnzipDatasetBytes(data, &ds) 76 return &ds, "zip", nil 77 78 case "ipfs": 79 return nil, "", fmt.Errorf("reading dataset files from IPFS currently unsupported") 80 81 case "local": 82 f, err := os.Open(path) 83 if err != nil { 84 return nil, "", err 85 } 86 87 fileExt := strings.ToLower(filepath.Ext(path)) 88 switch fileExt { 89 case ".yaml", ".yml": 90 data, err := ioutil.ReadAll(f) 91 if err != nil { 92 return nil, "", err 93 } 94 95 fields := make(map[string]interface{}) 96 if err = yaml.Unmarshal(data, fields); err != nil { 97 return nil, "", err 98 } 99 100 kind, err := fillDatasetOrComponent(fields, path, &ds) 101 return &ds, kind, err 102 103 case ".json": 104 fields := make(map[string]interface{}) 105 if err = json.NewDecoder(f).Decode(&fields); err != nil { 106 if strings.HasPrefix(err.Error(), "json: cannot unmarshal array") { 107 err = fmt.Errorf("json has top-level type \"array\", cannot be a dataset file") 108 } 109 return nil, "", err 110 } 111 kind, err := fillDatasetOrComponent(fields, path, &ds) 112 return &ds, kind, err 113 114 case ".zip": 115 data, err := ioutil.ReadAll(f) 116 if err != nil { 117 return nil, "", err 118 } 119 err = archive.UnzipDatasetBytes(data, &ds) 120 return &ds, "zip", err 121 122 case ".star": 123 // starlark files are assumed to be a transform script with no additional 124 // tranform component details: 125 ds.Transform = &dataset.Transform{ScriptPath: path} 126 ds.Transform.SetScriptFile(qfs.NewMemfileReader("transform.star", f)) 127 return &ds, "tf", nil 128 129 case ".html": 130 // html files are assumped to be a viz script with no additional viz 131 // component details 132 // TODO(dlong): Deprecate viz, assume "html" is a readme 133 ds.Viz = &dataset.Viz{ScriptPath: path} 134 ds.Viz.Format = "html" 135 ds.Viz.SetScriptFile(qfs.NewMemfileReader("viz.html", f)) 136 return &ds, "vz", nil 137 138 case ".md": 139 // md files are assumped to be a readme file 140 ds.Readme = &dataset.Readme{ScriptPath: path} 141 ds.Readme.Format = "md" 142 ds.Readme.SetScriptFile(qfs.NewMemfileReader("readme.md", f)) 143 return &ds, "rm", nil 144 145 default: 146 return nil, "", fmt.Errorf("error, unrecognized file extension: \"%s\"", fileExt) 147 } 148 default: 149 return nil, "", fmt.Errorf("error, unknown path kind: \"%s\"", qfs.PathKind(path)) 150 } 151 } 152 153 func fillDatasetOrComponent(fields map[string]interface{}, path string, ds *dataset.Dataset) (string, error) { 154 var target interface{} 155 target = ds 156 kind := "" 157 158 // Look for the component key in the file. 159 if kindStr, ok := fields["qri"].(string); ok && len(kindStr) >= 2 { 160 kind = kindStr[:2] 161 } 162 // If no key found, see if the path matches one of the recognized component filenames 163 if kind == "" { 164 basename := filepath.Base(path) 165 basename = strings.TrimSuffix(basename, filepath.Ext(basename)) 166 switch basename { 167 case "meta": 168 kind = "md" 169 case "structure": 170 kind = "st" 171 } 172 } 173 174 switch kind { 175 case "", "ds": 176 // nothing to do, default case is the Dataset itself 177 kind = "ds" 178 case "rm": 179 ds.Readme = &dataset.Readme{} 180 target = ds.Readme 181 case "md": 182 ds.Meta = &dataset.Meta{} 183 target = ds.Meta 184 case "cm": 185 ds.Commit = &dataset.Commit{} 186 target = ds.Commit 187 case "st": 188 ds.Structure = &dataset.Structure{} 189 target = ds.Structure 190 case "tf": 191 ds.Transform = &dataset.Transform{} 192 target = ds.Transform 193 default: 194 return "", fmt.Errorf("unknown component key %q", kind) 195 } 196 197 if err := fill.Struct(fields, target); err != nil { 198 return "", err 199 } 200 absDatasetPaths(path, ds) 201 return kind, nil 202 } 203 204 // absDatasetPaths converts any relative filepath references in a Dataset to 205 // their absolute counterpart 206 func absDatasetPaths(path string, dsp *dataset.Dataset) { 207 base := filepath.Dir(path) 208 if dsp.BodyPath != "" && qfs.PathKind(dsp.BodyPath) == "local" && !filepath.IsAbs(dsp.BodyPath) { 209 dsp.BodyPath = filepath.Join(base, dsp.BodyPath) 210 } 211 if dsp.Transform != nil && qfs.PathKind(dsp.Transform.ScriptPath) == "local" && !filepath.IsAbs(dsp.Transform.ScriptPath) { 212 dsp.Transform.ScriptPath = filepath.Join(base, dsp.Transform.ScriptPath) 213 } 214 if dsp.Viz != nil && qfs.PathKind(dsp.Viz.ScriptPath) == "local" && !filepath.IsAbs(dsp.Viz.ScriptPath) { 215 dsp.Viz.ScriptPath = filepath.Join(base, dsp.Viz.ScriptPath) 216 } 217 if dsp.Readme != nil && qfs.PathKind(dsp.Readme.ScriptPath) == "local" && !filepath.IsAbs(dsp.Readme.ScriptPath) { 218 dsp.Readme.ScriptPath = filepath.Join(base, dsp.Readme.ScriptPath) 219 } 220 }