github.com/mithrandie/csvq@v1.18.1/lib/query/file_info.go (about) 1 package query 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "path/filepath" 8 "reflect" 9 "strings" 10 11 "github.com/mithrandie/csvq/lib/file" 12 "github.com/mithrandie/csvq/lib/option" 13 "github.com/mithrandie/csvq/lib/parser" 14 15 "github.com/mithrandie/go-text" 16 "github.com/mithrandie/go-text/fixedlen" 17 "github.com/mithrandie/go-text/json" 18 ) 19 20 const ( 21 TableDelimiter = "DELIMITER" 22 TableDelimiterPositions = "DELIMITER_POSITIONS" 23 TableFormat = "FORMAT" 24 TableEncoding = "ENCODING" 25 TableLineBreak = "LINE_BREAK" 26 TableHeader = "HEADER" 27 TableEncloseAll = "ENCLOSE_ALL" 28 TableJsonEscape = "JSON_ESCAPE" 29 TablePrettyPrint = "PRETTY_PRINT" 30 ) 31 32 type ViewType int 33 34 const ( 35 ViewTypeFile ViewType = iota 36 ViewTypeTemporaryTable 37 ViewTypeStdin 38 ViewTypeRemoteObject 39 ViewTypeStringObject 40 ViewTypeInlineTable 41 ) 42 43 var FileAttributeList = []string{ 44 TableDelimiter, 45 TableDelimiterPositions, 46 TableFormat, 47 TableEncoding, 48 TableLineBreak, 49 TableHeader, 50 TableEncloseAll, 51 TableJsonEscape, 52 TablePrettyPrint, 53 } 54 55 type TableAttributeUnchangedError struct { 56 Path string 57 Message string 58 } 59 60 func NewTableAttributeUnchangedError(fpath string) error { 61 return &TableAttributeUnchangedError{ 62 Path: fpath, 63 Message: "table attributes of %s remain unchanged", 64 } 65 } 66 67 func (e TableAttributeUnchangedError) Error() string { 68 return fmt.Sprintf(e.Message, e.Path) 69 } 70 71 type FileInfo struct { 72 Path string 73 ArchivePath string 74 75 Format option.Format 76 Delimiter rune 77 DelimiterPositions fixedlen.DelimiterPositions 78 JsonQuery string 79 Encoding text.Encoding 80 LineBreak text.LineBreak 81 NoHeader bool 82 EncloseAll bool 83 JsonEscape json.EscapeType 84 PrettyPrint bool 85 86 SingleLine bool 87 88 Handler *file.Handler 89 90 ForUpdate bool 91 ViewType ViewType 92 93 restorePointHeader Header 94 restorePointRecordSet RecordSet 95 } 96 97 func NewFileInfo( 98 filename parser.Identifier, 99 repository string, 100 options option.ImportOptions, 101 defaultFormat option.Format, 102 ) (*FileInfo, error) { 103 fpath, format, err := SearchFilePath(filename, repository, options, defaultFormat) 104 if err != nil { 105 return nil, err 106 } 107 108 delimiter := options.Delimiter 109 encoding := options.Encoding 110 switch format { 111 case option.TSV: 112 delimiter = '\t' 113 case option.JSON, option.JSONL: 114 encoding = text.UTF8 115 } 116 117 return &FileInfo{ 118 Path: fpath, 119 Format: format, 120 Delimiter: delimiter, 121 Encoding: encoding, 122 ViewType: ViewTypeFile, 123 }, nil 124 } 125 126 func NewTemporaryTableFileInfo(name string) *FileInfo { 127 return &FileInfo{ 128 Path: name, 129 ViewType: ViewTypeTemporaryTable, 130 } 131 } 132 133 func NewStdinFileInfo(filePath string, importOptions option.ImportOptions, exportOptions option.ExportOptions) *FileInfo { 134 f := &FileInfo{ 135 Path: filePath, 136 ViewType: ViewTypeStdin, 137 } 138 f.SetAllDefaultFileInfoAttributes(importOptions, exportOptions) 139 return f 140 } 141 142 func NewInlineFileInfo(filePath string, importOptions option.ImportOptions, exportOptions option.ExportOptions) *FileInfo { 143 f := &FileInfo{ 144 Path: filePath, 145 ViewType: ViewTypeInlineTable, 146 } 147 f.SetAllDefaultFileInfoAttributes(importOptions, exportOptions) 148 return f 149 } 150 151 func (f *FileInfo) SetAllDefaultFileInfoAttributes(importOptions option.ImportOptions, exportOptions option.ExportOptions) { 152 f.Format = importOptions.Format 153 f.Delimiter = importOptions.Delimiter 154 f.Encoding = importOptions.Encoding 155 156 switch f.Format { 157 case option.TSV: 158 f.Delimiter = '\t' 159 case option.JSON, option.JSONL: 160 f.Encoding = text.UTF8 161 } 162 163 f.SetDefaultFileInfoAttributes(importOptions, exportOptions) 164 } 165 166 func (f *FileInfo) SetDefaultFileInfoAttributes(importOptions option.ImportOptions, exportOptions option.ExportOptions) { 167 f.DelimiterPositions = importOptions.DelimiterPositions 168 f.SingleLine = importOptions.SingleLine 169 f.JsonQuery = option.TrimSpace(importOptions.JsonQuery) 170 f.LineBreak = exportOptions.LineBreak 171 f.NoHeader = importOptions.NoHeader 172 f.EncloseAll = exportOptions.EncloseAll 173 f.JsonEscape = exportOptions.JsonEscape 174 } 175 176 func (f *FileInfo) IsUpdatable() bool { 177 return f.IsFile() || f.IsInMemoryTable() 178 } 179 180 func (f *FileInfo) SetDelimiter(s string) error { 181 delimiter, err := option.ParseDelimiter(s) 182 if err != nil { 183 return err 184 } 185 186 var format option.Format 187 if delimiter == '\t' { 188 format = option.TSV 189 } else { 190 format = option.CSV 191 } 192 193 if f.Delimiter == delimiter && f.Format == format { 194 return NewTableAttributeUnchangedError(f.Path) 195 } 196 197 f.Delimiter = delimiter 198 f.Format = format 199 return nil 200 } 201 202 func (f *FileInfo) SetDelimiterPositions(s string) error { 203 pos, singleLine, err := option.ParseDelimiterPositions(s) 204 if err != nil { 205 return err 206 } 207 delimiterPositions := fixedlen.DelimiterPositions(pos) 208 format := option.FIXED 209 210 if reflect.DeepEqual(f.DelimiterPositions, delimiterPositions) && 211 f.SingleLine == singleLine && 212 f.Format == format { 213 return NewTableAttributeUnchangedError(f.Path) 214 } 215 216 f.Format = format 217 f.DelimiterPositions = delimiterPositions 218 f.SingleLine = singleLine 219 220 return nil 221 } 222 223 func (f *FileInfo) SetFormat(s string) error { 224 format, escapeType, err := option.ParseFormat(s, f.JsonEscape) 225 if err != nil { 226 return err 227 } 228 229 if f.Format == format && 230 f.JsonEscape == escapeType { 231 return NewTableAttributeUnchangedError(f.Path) 232 } 233 234 delimiter := f.Delimiter 235 encoding := f.Encoding 236 237 switch format { 238 case option.TSV: 239 delimiter = '\t' 240 case option.JSON, option.JSONL: 241 encoding = text.UTF8 242 } 243 244 f.Format = format 245 f.JsonEscape = escapeType 246 f.Delimiter = delimiter 247 f.Encoding = encoding 248 return nil 249 } 250 251 func (f *FileInfo) SetEncoding(s string) error { 252 encoding, err := option.ParseEncoding(s) 253 if err != nil || encoding == text.AUTO { 254 return errors.New("encoding must be one of UTF8|UTF8M|UTF16|UTF16BE|UTF16LE|UTF16BEM|UTF16LEM|SJIS") 255 } 256 257 switch f.Format { 258 case option.JSON, option.JSONL: 259 if encoding != text.UTF8 { 260 return errors.New("json format is supported only UTF8") 261 } 262 } 263 264 if f.Encoding == encoding { 265 return NewTableAttributeUnchangedError(f.Path) 266 } 267 268 f.Encoding = encoding 269 return nil 270 } 271 272 func (f *FileInfo) SetLineBreak(s string) error { 273 lb, err := option.ParseLineBreak(s) 274 if err != nil { 275 return err 276 } 277 278 if f.LineBreak == lb { 279 return NewTableAttributeUnchangedError(f.Path) 280 } 281 282 f.LineBreak = lb 283 return nil 284 } 285 286 func (f *FileInfo) SetNoHeader(b bool) error { 287 if b == f.NoHeader { 288 return NewTableAttributeUnchangedError(f.Path) 289 } 290 f.NoHeader = b 291 return nil 292 } 293 294 func (f *FileInfo) SetEncloseAll(b bool) error { 295 if b == f.EncloseAll { 296 return NewTableAttributeUnchangedError(f.Path) 297 } 298 f.EncloseAll = b 299 return nil 300 } 301 302 func (f *FileInfo) SetJsonEscape(s string) error { 303 escape, err := option.ParseJsonEscapeType(s) 304 if err != nil { 305 return err 306 } 307 308 if escape == f.JsonEscape { 309 return NewTableAttributeUnchangedError(f.Path) 310 } 311 312 f.JsonEscape = escape 313 return nil 314 } 315 316 func (f *FileInfo) SetPrettyPrint(b bool) error { 317 if b == f.PrettyPrint { 318 return NewTableAttributeUnchangedError(f.Path) 319 } 320 f.PrettyPrint = b 321 return nil 322 } 323 324 func (f *FileInfo) IsFile() bool { 325 return f.ViewType == ViewTypeFile 326 } 327 328 func (f *FileInfo) IsTemporaryTable() bool { 329 return f.ViewType == ViewTypeTemporaryTable 330 } 331 332 func (f *FileInfo) IsStdin() bool { 333 return f.ViewType == ViewTypeStdin 334 } 335 336 func (f *FileInfo) IsInMemoryTable() bool { 337 return f.ViewType == ViewTypeStdin || f.ViewType == ViewTypeTemporaryTable 338 } 339 340 func (f *FileInfo) IsRemoteObject() bool { 341 return f.ViewType == ViewTypeRemoteObject 342 } 343 344 func (f *FileInfo) IsStringObject() bool { 345 return f.ViewType == ViewTypeStringObject 346 } 347 348 func (f *FileInfo) IsInlineTable() bool { 349 return f.ViewType == ViewTypeInlineTable 350 } 351 352 func (f *FileInfo) IdentifiedPath() string { 353 s := strings.ToUpper(f.Path) 354 if 0 < len(f.ArchivePath) { 355 s = s + " IN " + strings.ToUpper(f.ArchivePath) 356 } 357 return s 358 } 359 360 func (f *FileInfo) ExportOptions(tx *Transaction) option.ExportOptions { 361 ops := tx.Flags.ExportOptions.Copy() 362 ops.Format = f.Format 363 ops.Delimiter = f.Delimiter 364 ops.DelimiterPositions = f.DelimiterPositions 365 ops.SingleLine = f.SingleLine 366 ops.Encoding = f.Encoding 367 ops.LineBreak = f.LineBreak 368 ops.WithoutHeader = f.NoHeader 369 ops.EncloseAll = f.EncloseAll 370 ops.JsonEscape = f.JsonEscape 371 ops.PrettyPrint = f.PrettyPrint 372 return ops 373 } 374 375 func SearchFilePath(filename parser.Identifier, repository string, options option.ImportOptions, defaultFormat option.Format) (string, option.Format, error) { 376 var fpath string 377 var err error 378 379 format := options.Format 380 381 switch format { 382 case option.CSV, option.TSV: 383 fpath, err = SearchCSVFilePath(filename, repository) 384 case option.JSON: 385 fpath, err = SearchJsonFilePath(filename, repository) 386 case option.JSONL: 387 fpath, err = SearchJsonlFilePath(filename, repository) 388 case option.FIXED: 389 fpath, err = SearchFixedLengthFilePath(filename, repository) 390 case option.LTSV: 391 fpath, err = SearchLTSVFilePath(filename, repository) 392 default: // AutoSelect 393 if fpath, err = SearchFilePathFromAllTypes(filename, repository); err == nil { 394 switch strings.ToLower(filepath.Ext(fpath)) { 395 case option.CsvExt: 396 format = option.CSV 397 case option.TsvExt: 398 format = option.TSV 399 case option.JsonExt: 400 format = option.JSON 401 case option.JsonlExt: 402 format = option.JSONL 403 case option.LtsvExt: 404 format = option.LTSV 405 default: 406 format = defaultFormat 407 } 408 } 409 } 410 411 return fpath, format, err 412 } 413 414 func SearchCSVFilePath(filename parser.Identifier, repository string) (string, error) { 415 return SearchFilePathWithExtType(filename, repository, []string{option.CsvExt, option.TsvExt, option.TextExt}) 416 } 417 418 func SearchJsonFilePath(filename parser.Identifier, repository string) (string, error) { 419 return SearchFilePathWithExtType(filename, repository, []string{option.JsonExt}) 420 } 421 422 func SearchJsonlFilePath(filename parser.Identifier, repository string) (string, error) { 423 return SearchFilePathWithExtType(filename, repository, []string{option.JsonlExt}) 424 } 425 426 func SearchFixedLengthFilePath(filename parser.Identifier, repository string) (string, error) { 427 return SearchFilePathWithExtType(filename, repository, []string{option.TextExt}) 428 } 429 430 func SearchLTSVFilePath(filename parser.Identifier, repository string) (string, error) { 431 return SearchFilePathWithExtType(filename, repository, []string{option.LtsvExt, option.TextExt}) 432 } 433 434 func SearchFilePathFromAllTypes(filename parser.Identifier, repository string) (string, error) { 435 return SearchFilePathWithExtType(filename, repository, []string{option.CsvExt, option.TsvExt, option.JsonExt, option.JsonlExt, option.LtsvExt, option.TextExt}) 436 } 437 438 func SearchFilePathWithExtType(filename parser.Identifier, repository string, extTypes []string) (string, error) { 439 fpath := filename.Literal 440 if !filepath.IsAbs(fpath) { 441 if len(repository) < 1 { 442 repository, _ = os.Getwd() 443 } 444 fpath = filepath.Join(repository, fpath) 445 } 446 447 var info os.FileInfo 448 var err error 449 450 if info, err = os.Stat(fpath); err != nil { 451 pathes := make([]string, 0, len(extTypes)) 452 infoList := make([]os.FileInfo, 0, len(extTypes)) 453 for _, ext := range extTypes { 454 if i, err := os.Stat(fpath + ext); err == nil { 455 pathes = append(pathes, fpath+ext) 456 infoList = append(infoList, i) 457 } 458 } 459 switch { 460 case len(pathes) < 1: 461 return fpath, NewFileNotExistError(filename) 462 case 1 < len(pathes): 463 return fpath, NewFileNameAmbiguousError(filename) 464 } 465 fpath = pathes[0] 466 info = infoList[0] 467 } 468 469 fpath, err = filepath.Abs(fpath) 470 if err != nil { 471 return fpath, NewFileNotExistError(filename) 472 } 473 474 if info.IsDir() { 475 return fpath, NewFileUnableToReadError(filename) 476 } 477 478 return fpath, nil 479 } 480 481 func NewFileInfoForCreate(filename parser.Identifier, repository string, delimiter rune, encoding text.Encoding) (*FileInfo, error) { 482 fpath, err := CreateFilePath(filename, repository) 483 if err != nil { 484 return nil, NewIOError(filename, err.Error()) 485 } 486 487 var format option.Format 488 switch strings.ToLower(filepath.Ext(fpath)) { 489 case option.TsvExt: 490 delimiter = '\t' 491 format = option.TSV 492 case option.JsonExt: 493 encoding = text.UTF8 494 format = option.JSON 495 case option.JsonlExt: 496 encoding = text.UTF8 497 format = option.JSONL 498 case option.LtsvExt: 499 format = option.LTSV 500 case option.GfmExt: 501 format = option.GFM 502 case option.OrgExt: 503 format = option.ORG 504 default: 505 format = option.CSV 506 } 507 508 return &FileInfo{ 509 Path: fpath, 510 Delimiter: delimiter, 511 Format: format, 512 Encoding: encoding, 513 ViewType: ViewTypeFile, 514 }, nil 515 } 516 517 func CreateFilePath(filename parser.Identifier, repository string) (string, error) { 518 fpath := filename.Literal 519 if !filepath.IsAbs(fpath) { 520 if len(repository) < 1 { 521 repository, _ = os.Getwd() 522 } 523 fpath = filepath.Join(repository, fpath) 524 } 525 return filepath.Abs(fpath) 526 }