github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/base/component/list_dir.go (about)

     1  package component
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"path/filepath"
     7  	"sort"
     8  	"strings"
     9  
    10  	"github.com/qri-io/dataset"
    11  	"github.com/qri-io/qfs"
    12  	"github.com/qri-io/qri/base/fill"
    13  )
    14  
    15  var (
    16  	// ErrNoDatasetFiles indicates no data
    17  	ErrNoDatasetFiles = fmt.Errorf("no dataset files provided")
    18  )
    19  
    20  // ListDirectoryComponents lists the relevant files and reads them into a component collection
    21  // object. The resulting object has stat'ed each file, and has their mtimes, but no files
    22  // have been read from disk. Conflicting files (such as both a "body.csv" and "body.json") will
    23  // cause the "ProblemKind" and "ProblemMessage" fields to be set. Other conflicts may also exist,
    24  // such as "meta" being in both "dataset.json" and "meta.json", but this function does not detect
    25  // these kinds of problems because it does not read any files.
    26  func ListDirectoryComponents(dir string) (Component, error) {
    27  	knownFilenames := GetKnownFilenames()
    28  	topLevel := FilesysComponent{}
    29  
    30  	finfos, err := ioutil.ReadDir(dir)
    31  	if err != nil {
    32  		return nil, err
    33  	}
    34  	// Note that this traversal will be in a non-deterministic order, so nothing in this loop
    35  	// should depend on list order.
    36  	for _, fi := range finfos {
    37  		ext := filepath.Ext(fi.Name())
    38  		componentName := strings.ToLower(strings.TrimSuffix(fi.Name(), ext))
    39  		allowedExtensions, ok := knownFilenames[componentName]
    40  		if !ok {
    41  			// If a file in this directory is not a known filename, ignore it
    42  			continue
    43  		}
    44  		if !sliceContains(allowedExtensions, ext) {
    45  			// Also ignore the file if it has an unknown file extension
    46  			continue
    47  		}
    48  		absPath, _ := filepath.Abs(filepath.Join(dir, fi.Name()))
    49  		// Check for conflict between this file and those already observed
    50  		if holder := topLevel.GetSubcomponent(componentName); holder != nil {
    51  			elem := holder.Base()
    52  			elem.ProblemKind = "conflict"
    53  			// Collect a message containing the paths of conflicting files
    54  			msg := elem.ProblemMessage
    55  			if msg == "" {
    56  				msg = filepath.Base(elem.SourceFile)
    57  			}
    58  			// Sort the problem files so that the message is deterministic
    59  			conflictFiles := append(strings.Split(msg, " "), filepath.Base(absPath))
    60  			sort.Strings(conflictFiles)
    61  			elem.ProblemMessage = strings.Join(conflictFiles, " ")
    62  			continue
    63  		}
    64  		topLevel.SetSubcomponent(
    65  			componentName,
    66  			BaseComponent{
    67  				ModTime:    fi.ModTime(),
    68  				SourceFile: absPath,
    69  				Format:     normalizeExtensionFormat(ext),
    70  			},
    71  		)
    72  	}
    73  	if topLevel.IsEmpty() {
    74  		return nil, ErrNoDatasetFiles
    75  	}
    76  	return &topLevel, nil
    77  }
    78  
    79  // ExpandListedComponents will read whatever is necessary in order to discover all of the components
    80  // that exist within this observation. For example, if a "dataset" exists, it will be read to find
    81  // out if it contains a "meta", a "structure", etc. No other components are expanded, but this
    82  // may change in the future if we decide another component can contain some other component. If
    83  // the "dataset" file does not exist, an empty dataset component will be created.
    84  func ExpandListedComponents(container Component, resolver qfs.Filesystem) error {
    85  	filesysComponent, ok := container.(*FilesysComponent)
    86  	if !ok {
    87  		return fmt.Errorf("cannot expand non-filesys container")
    88  	}
    89  
    90  	ds := dataset.Dataset{}
    91  
    92  	dsComponent := filesysComponent.GetSubcomponent("dataset")
    93  	if dsComponent == nil {
    94  		dsComponent = filesysComponent.SetSubcomponent("dataset", BaseComponent{})
    95  	} else {
    96  		fields, err := dsComponent.Base().LoadFile()
    97  		if err != nil {
    98  			// TODO(dlong): Better
    99  			return err
   100  		}
   101  
   102  		if err := fill.Struct(fields, &ds); err != nil {
   103  			// TODO(dlong): Fix me
   104  			return err
   105  		}
   106  	}
   107  
   108  	dsCont := dsComponent.(*DatasetComponent)
   109  	dsCont.Value = &ds
   110  
   111  	if ds.Commit != nil {
   112  		comp := assignField(filesysComponent, "commit", dsComponent)
   113  		if comp != nil {
   114  			commit := comp.(*CommitComponent)
   115  			commit.Value = ds.Commit
   116  			commit.IsLoaded = true
   117  		}
   118  	}
   119  	if ds.Meta != nil {
   120  		comp := assignField(filesysComponent, "meta", dsComponent)
   121  		if comp != nil {
   122  			meta := comp.(*MetaComponent)
   123  			meta.Value = ds.Meta
   124  			meta.IsLoaded = true
   125  		}
   126  	}
   127  	var bodyStructure *dataset.Structure
   128  	if ds.Structure != nil {
   129  		comp := assignField(filesysComponent, "structure", dsComponent)
   130  		if comp != nil {
   131  			structure := comp.(*StructureComponent)
   132  			structure.Value = ds.Structure
   133  			structure.IsLoaded = true
   134  			bodyStructure = ds.Structure
   135  		}
   136  	}
   137  	if ds.Readme != nil {
   138  		comp := assignField(filesysComponent, "readme", dsComponent)
   139  		if comp != nil {
   140  			readme := comp.(*ReadmeComponent)
   141  			readme.Resolver = resolver
   142  			readme.Value = ds.Readme
   143  			readme.IsLoaded = true
   144  		}
   145  	}
   146  	if ds.Transform != nil {
   147  		comp := assignField(filesysComponent, "transform", dsComponent)
   148  		if comp != nil {
   149  			readme := comp.(*TransformComponent)
   150  			readme.Resolver = resolver
   151  			readme.Value = ds.Transform
   152  			readme.IsLoaded = true
   153  		}
   154  	}
   155  	if ds.Body != nil {
   156  		comp := assignField(filesysComponent, "body", dsComponent)
   157  		if comp != nil {
   158  			body := comp.(*BodyComponent)
   159  			body.Resolver = resolver
   160  			if bodyStructure != nil {
   161  				body.Structure = bodyStructure
   162  			}
   163  		}
   164  	}
   165  
   166  	stComp := filesysComponent.GetSubcomponent("structure")
   167  	bdComp := filesysComponent.GetSubcomponent("body")
   168  	if stComp != nil && bdComp != nil {
   169  		if structure, ok := stComp.(*StructureComponent); ok {
   170  			if body, ok := bdComp.(*BodyComponent); ok {
   171  				if structure.Value == nil || structure.Value.Schema == nil {
   172  					structure.SchemaInference = func(ds *dataset.Dataset) (map[string]interface{}, error) {
   173  						err := body.LoadAndFill(ds)
   174  						if err != nil {
   175  							return nil, err
   176  						}
   177  						return body.InferredSchema, nil
   178  					}
   179  				}
   180  			}
   181  		}
   182  	}
   183  
   184  	return nil
   185  }
   186  
   187  func assignField(target Component, componentName string, parent Component) Component {
   188  	found := target.Base().GetSubcomponent(componentName)
   189  	if found != nil {
   190  		addFile := filepath.Base(parent.Base().SourceFile)
   191  		existingFile := filepath.Base(found.Base().SourceFile)
   192  		found.Base().ProblemKind = "conflict"
   193  		found.Base().ProblemMessage = fmt.Sprintf("%s %s", existingFile, addFile)
   194  		return nil
   195  	}
   196  	return target.Base().SetSubcomponent(
   197  		componentName,
   198  		BaseComponent{
   199  			ModTime:    parent.Base().ModTime,
   200  			SourceFile: parent.Base().SourceFile,
   201  			Format:     parent.Base().Format,
   202  		},
   203  	)
   204  }
   205  
   206  // GetKnownFilenames returns a map containing all possible filenames (filebase and extension) for
   207  // any file that can represent a component of a dataset.
   208  func GetKnownFilenames() map[string][]string {
   209  	componentExtensionTypes := []string{".json", ".yml", ".yaml"}
   210  	bodyExtensionTypes := []string{".csv", ".json", ".cbor", ".xlsx"}
   211  	readmeExtensionTypes := []string{".md", ".html"}
   212  	return map[string][]string{
   213  		"dataset":   componentExtensionTypes,
   214  		"commit":    componentExtensionTypes,
   215  		"meta":      componentExtensionTypes,
   216  		"structure": componentExtensionTypes,
   217  		// TODO(dlong): Viz is deprecated
   218  		"viz":       {".html"},
   219  		"readme":    readmeExtensionTypes,
   220  		"transform": {".star"},
   221  		"body":      bodyExtensionTypes,
   222  	}
   223  }
   224  
   225  // IsKnownFilename returns whether the file is a known component filename.
   226  func IsKnownFilename(fullpath string, known map[string][]string) bool {
   227  	if known == nil {
   228  		known = GetKnownFilenames()
   229  	}
   230  	basename := filepath.Base(fullpath)
   231  	ext := filepath.Ext(basename)
   232  	onlybase := strings.ToLower(basename[:len(basename)-len(ext)])
   233  	allowedExtensions, ok := known[onlybase]
   234  	if !ok {
   235  		return false
   236  	}
   237  	for _, allow := range allowedExtensions {
   238  		if allow == ext {
   239  			return true
   240  		}
   241  	}
   242  	return false
   243  }
   244  
   245  func normalizeExtensionFormat(text string) string {
   246  	text = strings.TrimPrefix(text, ".")
   247  	if text == "yml" {
   248  		text = "yaml"
   249  	}
   250  	return text
   251  }
   252  
   253  func sliceContains(subject []string, needle string) bool {
   254  	for _, elem := range subject {
   255  		if elem == needle {
   256  			return true
   257  		}
   258  	}
   259  	return false
   260  }