github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/base/component/list_dir.go (about) 1 package component 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "path/filepath" 7 "sort" 8 "strings" 9 10 "github.com/qri-io/dataset" 11 "github.com/qri-io/qfs" 12 "github.com/qri-io/qri/base/fill" 13 ) 14 15 var ( 16 // ErrNoDatasetFiles indicates no data 17 ErrNoDatasetFiles = fmt.Errorf("no dataset files provided") 18 ) 19 20 // ListDirectoryComponents lists the relevant files and reads them into a component collection 21 // object. The resulting object has stat'ed each file, and has their mtimes, but no files 22 // have been read from disk. Conflicting files (such as both a "body.csv" and "body.json") will 23 // cause the "ProblemKind" and "ProblemMessage" fields to be set. Other conflicts may also exist, 24 // such as "meta" being in both "dataset.json" and "meta.json", but this function does not detect 25 // these kinds of problems because it does not read any files. 26 func ListDirectoryComponents(dir string) (Component, error) { 27 knownFilenames := GetKnownFilenames() 28 topLevel := FilesysComponent{} 29 30 finfos, err := ioutil.ReadDir(dir) 31 if err != nil { 32 return nil, err 33 } 34 // Note that this traversal will be in a non-deterministic order, so nothing in this loop 35 // should depend on list order. 36 for _, fi := range finfos { 37 ext := filepath.Ext(fi.Name()) 38 componentName := strings.ToLower(strings.TrimSuffix(fi.Name(), ext)) 39 allowedExtensions, ok := knownFilenames[componentName] 40 if !ok { 41 // If a file in this directory is not a known filename, ignore it 42 continue 43 } 44 if !sliceContains(allowedExtensions, ext) { 45 // Also ignore the file if it has an unknown file extension 46 continue 47 } 48 absPath, _ := filepath.Abs(filepath.Join(dir, fi.Name())) 49 // Check for conflict between this file and those already observed 50 if holder := topLevel.GetSubcomponent(componentName); holder != nil { 51 elem := holder.Base() 52 elem.ProblemKind = "conflict" 53 // Collect a message containing the paths of conflicting files 54 msg := elem.ProblemMessage 55 if msg == "" { 56 msg = filepath.Base(elem.SourceFile) 57 } 58 // Sort the problem files so that the message is deterministic 59 conflictFiles := append(strings.Split(msg, " "), filepath.Base(absPath)) 60 sort.Strings(conflictFiles) 61 elem.ProblemMessage = strings.Join(conflictFiles, " ") 62 continue 63 } 64 topLevel.SetSubcomponent( 65 componentName, 66 BaseComponent{ 67 ModTime: fi.ModTime(), 68 SourceFile: absPath, 69 Format: normalizeExtensionFormat(ext), 70 }, 71 ) 72 } 73 if topLevel.IsEmpty() { 74 return nil, ErrNoDatasetFiles 75 } 76 return &topLevel, nil 77 } 78 79 // ExpandListedComponents will read whatever is necessary in order to discover all of the components 80 // that exist within this observation. For example, if a "dataset" exists, it will be read to find 81 // out if it contains a "meta", a "structure", etc. No other components are expanded, but this 82 // may change in the future if we decide another component can contain some other component. If 83 // the "dataset" file does not exist, an empty dataset component will be created. 84 func ExpandListedComponents(container Component, resolver qfs.Filesystem) error { 85 filesysComponent, ok := container.(*FilesysComponent) 86 if !ok { 87 return fmt.Errorf("cannot expand non-filesys container") 88 } 89 90 ds := dataset.Dataset{} 91 92 dsComponent := filesysComponent.GetSubcomponent("dataset") 93 if dsComponent == nil { 94 dsComponent = filesysComponent.SetSubcomponent("dataset", BaseComponent{}) 95 } else { 96 fields, err := dsComponent.Base().LoadFile() 97 if err != nil { 98 // TODO(dlong): Better 99 return err 100 } 101 102 if err := fill.Struct(fields, &ds); err != nil { 103 // TODO(dlong): Fix me 104 return err 105 } 106 } 107 108 dsCont := dsComponent.(*DatasetComponent) 109 dsCont.Value = &ds 110 111 if ds.Commit != nil { 112 comp := assignField(filesysComponent, "commit", dsComponent) 113 if comp != nil { 114 commit := comp.(*CommitComponent) 115 commit.Value = ds.Commit 116 commit.IsLoaded = true 117 } 118 } 119 if ds.Meta != nil { 120 comp := assignField(filesysComponent, "meta", dsComponent) 121 if comp != nil { 122 meta := comp.(*MetaComponent) 123 meta.Value = ds.Meta 124 meta.IsLoaded = true 125 } 126 } 127 var bodyStructure *dataset.Structure 128 if ds.Structure != nil { 129 comp := assignField(filesysComponent, "structure", dsComponent) 130 if comp != nil { 131 structure := comp.(*StructureComponent) 132 structure.Value = ds.Structure 133 structure.IsLoaded = true 134 bodyStructure = ds.Structure 135 } 136 } 137 if ds.Readme != nil { 138 comp := assignField(filesysComponent, "readme", dsComponent) 139 if comp != nil { 140 readme := comp.(*ReadmeComponent) 141 readme.Resolver = resolver 142 readme.Value = ds.Readme 143 readme.IsLoaded = true 144 } 145 } 146 if ds.Transform != nil { 147 comp := assignField(filesysComponent, "transform", dsComponent) 148 if comp != nil { 149 readme := comp.(*TransformComponent) 150 readme.Resolver = resolver 151 readme.Value = ds.Transform 152 readme.IsLoaded = true 153 } 154 } 155 if ds.Body != nil { 156 comp := assignField(filesysComponent, "body", dsComponent) 157 if comp != nil { 158 body := comp.(*BodyComponent) 159 body.Resolver = resolver 160 if bodyStructure != nil { 161 body.Structure = bodyStructure 162 } 163 } 164 } 165 166 stComp := filesysComponent.GetSubcomponent("structure") 167 bdComp := filesysComponent.GetSubcomponent("body") 168 if stComp != nil && bdComp != nil { 169 if structure, ok := stComp.(*StructureComponent); ok { 170 if body, ok := bdComp.(*BodyComponent); ok { 171 if structure.Value == nil || structure.Value.Schema == nil { 172 structure.SchemaInference = func(ds *dataset.Dataset) (map[string]interface{}, error) { 173 err := body.LoadAndFill(ds) 174 if err != nil { 175 return nil, err 176 } 177 return body.InferredSchema, nil 178 } 179 } 180 } 181 } 182 } 183 184 return nil 185 } 186 187 func assignField(target Component, componentName string, parent Component) Component { 188 found := target.Base().GetSubcomponent(componentName) 189 if found != nil { 190 addFile := filepath.Base(parent.Base().SourceFile) 191 existingFile := filepath.Base(found.Base().SourceFile) 192 found.Base().ProblemKind = "conflict" 193 found.Base().ProblemMessage = fmt.Sprintf("%s %s", existingFile, addFile) 194 return nil 195 } 196 return target.Base().SetSubcomponent( 197 componentName, 198 BaseComponent{ 199 ModTime: parent.Base().ModTime, 200 SourceFile: parent.Base().SourceFile, 201 Format: parent.Base().Format, 202 }, 203 ) 204 } 205 206 // GetKnownFilenames returns a map containing all possible filenames (filebase and extension) for 207 // any file that can represent a component of a dataset. 208 func GetKnownFilenames() map[string][]string { 209 componentExtensionTypes := []string{".json", ".yml", ".yaml"} 210 bodyExtensionTypes := []string{".csv", ".json", ".cbor", ".xlsx"} 211 readmeExtensionTypes := []string{".md", ".html"} 212 return map[string][]string{ 213 "dataset": componentExtensionTypes, 214 "commit": componentExtensionTypes, 215 "meta": componentExtensionTypes, 216 "structure": componentExtensionTypes, 217 // TODO(dlong): Viz is deprecated 218 "viz": {".html"}, 219 "readme": readmeExtensionTypes, 220 "transform": {".star"}, 221 "body": bodyExtensionTypes, 222 } 223 } 224 225 // IsKnownFilename returns whether the file is a known component filename. 226 func IsKnownFilename(fullpath string, known map[string][]string) bool { 227 if known == nil { 228 known = GetKnownFilenames() 229 } 230 basename := filepath.Base(fullpath) 231 ext := filepath.Ext(basename) 232 onlybase := strings.ToLower(basename[:len(basename)-len(ext)]) 233 allowedExtensions, ok := known[onlybase] 234 if !ok { 235 return false 236 } 237 for _, allow := range allowedExtensions { 238 if allow == ext { 239 return true 240 } 241 } 242 return false 243 } 244 245 func normalizeExtensionFormat(text string) string { 246 text = strings.TrimPrefix(text, ".") 247 if text == "yml" { 248 text = "yaml" 249 } 250 return text 251 } 252 253 func sliceContains(subject []string, needle string) bool { 254 for _, elem := range subject { 255 if elem == needle { 256 return true 257 } 258 } 259 return false 260 }