github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/lib/file.go (about)

     1  package lib
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"net/http"
     8  	"os"
     9  	"path"
    10  	"path/filepath"
    11  	"strings"
    12  
    13  	"github.com/qri-io/dataset"
    14  	"github.com/qri-io/qfs"
    15  	"github.com/qri-io/qri/base/archive"
    16  	"github.com/qri-io/qri/base/fill"
    17  	"gopkg.in/yaml.v2"
    18  )
    19  
    20  // PathJoinPosix joins two paths, and makes it explicitly clear we want POSIX slashes
    21  func PathJoinPosix(left, right string) string {
    22  	return path.Join(left, right)
    23  }
    24  
    25  // ReadDatasetFiles reads zero or more files, each representing a dataset or component of a
    26  // dataset, and deserializes them, merging the results into a single dataset object. It is an
    27  // error to provide any combination of files whose contents overlap (modify the same component).
    28  func ReadDatasetFiles(pathList ...string) (*dataset.Dataset, error) {
    29  	// If there's only a single file provided, read it and return the dataset.
    30  	if len(pathList) == 1 {
    31  		ds, _, err := readSingleFile(pathList[0])
    32  		return ds, err
    33  	}
    34  
    35  	// If there's multiple files provided, read each one and merge them. Any exclusive
    36  	// component is an error, any component showing up multiple times is an error.
    37  	foundKinds := make(map[string]bool)
    38  	ds := dataset.Dataset{}
    39  	for _, p := range pathList {
    40  		component, kind, err := readSingleFile(p)
    41  		if err != nil {
    42  			return nil, err
    43  		}
    44  
    45  		if kind == "zip" || kind == "ds" {
    46  			return nil, fmt.Errorf("conflict, cannot save a full dataset with other components")
    47  		}
    48  		if _, ok := foundKinds[kind]; ok {
    49  			return nil, fmt.Errorf("conflict, multiple components of kind \"%s\"", kind)
    50  		}
    51  		foundKinds[kind] = true
    52  
    53  		ds.Assign(component)
    54  	}
    55  
    56  	return &ds, nil
    57  }
    58  
    59  // readSingleFile reads a single file, either a full dataset or component, and returns it as
    60  // a dataset and a string specifying the kind of component that was created
    61  func readSingleFile(path string) (*dataset.Dataset, string, error) {
    62  	ds := dataset.Dataset{}
    63  	switch qfs.PathKind(path) {
    64  	case "http":
    65  		// currently the only supported type of file url is a zip archive
    66  		resp, err := http.Get(path)
    67  		if err != nil {
    68  			return nil, "", err
    69  		}
    70  		data, err := ioutil.ReadAll(resp.Body)
    71  		if err != nil {
    72  			return nil, "", err
    73  		}
    74  		resp.Body.Close()
    75  		err = archive.UnzipDatasetBytes(data, &ds)
    76  		return &ds, "zip", nil
    77  
    78  	case "ipfs":
    79  		return nil, "", fmt.Errorf("reading dataset files from IPFS currently unsupported")
    80  
    81  	case "local":
    82  		f, err := os.Open(path)
    83  		if err != nil {
    84  			return nil, "", err
    85  		}
    86  
    87  		fileExt := strings.ToLower(filepath.Ext(path))
    88  		switch fileExt {
    89  		case ".yaml", ".yml":
    90  			data, err := ioutil.ReadAll(f)
    91  			if err != nil {
    92  				return nil, "", err
    93  			}
    94  
    95  			fields := make(map[string]interface{})
    96  			if err = yaml.Unmarshal(data, fields); err != nil {
    97  				return nil, "", err
    98  			}
    99  
   100  			kind, err := fillDatasetOrComponent(fields, path, &ds)
   101  			return &ds, kind, err
   102  
   103  		case ".json":
   104  			fields := make(map[string]interface{})
   105  			if err = json.NewDecoder(f).Decode(&fields); err != nil {
   106  				if strings.HasPrefix(err.Error(), "json: cannot unmarshal array") {
   107  					err = fmt.Errorf("json has top-level type \"array\", cannot be a dataset file")
   108  				}
   109  				return nil, "", err
   110  			}
   111  			kind, err := fillDatasetOrComponent(fields, path, &ds)
   112  			return &ds, kind, err
   113  
   114  		case ".zip":
   115  			data, err := ioutil.ReadAll(f)
   116  			if err != nil {
   117  				return nil, "", err
   118  			}
   119  			err = archive.UnzipDatasetBytes(data, &ds)
   120  			return &ds, "zip", err
   121  
   122  		case ".star":
   123  			// starlark files are assumed to be a transform script with no additional
   124  			// tranform component details:
   125  			ds.Transform = &dataset.Transform{ScriptPath: path}
   126  			ds.Transform.SetScriptFile(qfs.NewMemfileReader("transform.star", f))
   127  			return &ds, "tf", nil
   128  
   129  		case ".html":
   130  			// html files are assumped to be a viz script with no additional viz
   131  			// component details
   132  			// TODO(dlong): Deprecate viz, assume "html" is a readme
   133  			ds.Viz = &dataset.Viz{ScriptPath: path}
   134  			ds.Viz.Format = "html"
   135  			ds.Viz.SetScriptFile(qfs.NewMemfileReader("viz.html", f))
   136  			return &ds, "vz", nil
   137  
   138  		case ".md":
   139  			// md files are assumped to be a readme file
   140  			ds.Readme = &dataset.Readme{ScriptPath: path}
   141  			ds.Readme.Format = "md"
   142  			ds.Readme.SetScriptFile(qfs.NewMemfileReader("readme.md", f))
   143  			return &ds, "rm", nil
   144  
   145  		default:
   146  			return nil, "", fmt.Errorf("error, unrecognized file extension: \"%s\"", fileExt)
   147  		}
   148  	default:
   149  		return nil, "", fmt.Errorf("error, unknown path kind: \"%s\"", qfs.PathKind(path))
   150  	}
   151  }
   152  
   153  func fillDatasetOrComponent(fields map[string]interface{}, path string, ds *dataset.Dataset) (string, error) {
   154  	var target interface{}
   155  	target = ds
   156  	kind := ""
   157  
   158  	// Look for the component key in the file.
   159  	if kindStr, ok := fields["qri"].(string); ok && len(kindStr) >= 2 {
   160  		kind = kindStr[:2]
   161  	}
   162  	// If no key found, see if the path matches one of the recognized component filenames
   163  	if kind == "" {
   164  		basename := filepath.Base(path)
   165  		basename = strings.TrimSuffix(basename, filepath.Ext(basename))
   166  		switch basename {
   167  		case "meta":
   168  			kind = "md"
   169  		case "structure":
   170  			kind = "st"
   171  		}
   172  	}
   173  
   174  	switch kind {
   175  	case "", "ds":
   176  		// nothing to do, default case is the Dataset itself
   177  		kind = "ds"
   178  	case "rm":
   179  		ds.Readme = &dataset.Readme{}
   180  		target = ds.Readme
   181  	case "md":
   182  		ds.Meta = &dataset.Meta{}
   183  		target = ds.Meta
   184  	case "cm":
   185  		ds.Commit = &dataset.Commit{}
   186  		target = ds.Commit
   187  	case "st":
   188  		ds.Structure = &dataset.Structure{}
   189  		target = ds.Structure
   190  	case "tf":
   191  		ds.Transform = &dataset.Transform{}
   192  		target = ds.Transform
   193  	default:
   194  		return "", fmt.Errorf("unknown component key %q", kind)
   195  	}
   196  
   197  	if err := fill.Struct(fields, target); err != nil {
   198  		return "", err
   199  	}
   200  	absDatasetPaths(path, ds)
   201  	return kind, nil
   202  }
   203  
   204  // absDatasetPaths converts any relative filepath references in a Dataset to
   205  // their absolute counterpart
   206  func absDatasetPaths(path string, dsp *dataset.Dataset) {
   207  	base := filepath.Dir(path)
   208  	if dsp.BodyPath != "" && qfs.PathKind(dsp.BodyPath) == "local" && !filepath.IsAbs(dsp.BodyPath) {
   209  		dsp.BodyPath = filepath.Join(base, dsp.BodyPath)
   210  	}
   211  	if dsp.Transform != nil && qfs.PathKind(dsp.Transform.ScriptPath) == "local" && !filepath.IsAbs(dsp.Transform.ScriptPath) {
   212  		dsp.Transform.ScriptPath = filepath.Join(base, dsp.Transform.ScriptPath)
   213  	}
   214  	if dsp.Viz != nil && qfs.PathKind(dsp.Viz.ScriptPath) == "local" && !filepath.IsAbs(dsp.Viz.ScriptPath) {
   215  		dsp.Viz.ScriptPath = filepath.Join(base, dsp.Viz.ScriptPath)
   216  	}
   217  	if dsp.Readme != nil && qfs.PathKind(dsp.Readme.ScriptPath) == "local" && !filepath.IsAbs(dsp.Readme.ScriptPath) {
   218  		dsp.Readme.ScriptPath = filepath.Join(base, dsp.Readme.ScriptPath)
   219  	}
   220  }