github.com/mithrandie/csvq@v1.18.1/lib/query/file_info.go (about)

     1  package query
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"reflect"
     9  	"strings"
    10  
    11  	"github.com/mithrandie/csvq/lib/file"
    12  	"github.com/mithrandie/csvq/lib/option"
    13  	"github.com/mithrandie/csvq/lib/parser"
    14  
    15  	"github.com/mithrandie/go-text"
    16  	"github.com/mithrandie/go-text/fixedlen"
    17  	"github.com/mithrandie/go-text/json"
    18  )
    19  
    20  const (
    21  	TableDelimiter          = "DELIMITER"
    22  	TableDelimiterPositions = "DELIMITER_POSITIONS"
    23  	TableFormat             = "FORMAT"
    24  	TableEncoding           = "ENCODING"
    25  	TableLineBreak          = "LINE_BREAK"
    26  	TableHeader             = "HEADER"
    27  	TableEncloseAll         = "ENCLOSE_ALL"
    28  	TableJsonEscape         = "JSON_ESCAPE"
    29  	TablePrettyPrint        = "PRETTY_PRINT"
    30  )
    31  
    32  type ViewType int
    33  
    34  const (
    35  	ViewTypeFile ViewType = iota
    36  	ViewTypeTemporaryTable
    37  	ViewTypeStdin
    38  	ViewTypeRemoteObject
    39  	ViewTypeStringObject
    40  	ViewTypeInlineTable
    41  )
    42  
    43  var FileAttributeList = []string{
    44  	TableDelimiter,
    45  	TableDelimiterPositions,
    46  	TableFormat,
    47  	TableEncoding,
    48  	TableLineBreak,
    49  	TableHeader,
    50  	TableEncloseAll,
    51  	TableJsonEscape,
    52  	TablePrettyPrint,
    53  }
    54  
    55  type TableAttributeUnchangedError struct {
    56  	Path    string
    57  	Message string
    58  }
    59  
    60  func NewTableAttributeUnchangedError(fpath string) error {
    61  	return &TableAttributeUnchangedError{
    62  		Path:    fpath,
    63  		Message: "table attributes of %s remain unchanged",
    64  	}
    65  }
    66  
    67  func (e TableAttributeUnchangedError) Error() string {
    68  	return fmt.Sprintf(e.Message, e.Path)
    69  }
    70  
    71  type FileInfo struct {
    72  	Path        string
    73  	ArchivePath string
    74  
    75  	Format             option.Format
    76  	Delimiter          rune
    77  	DelimiterPositions fixedlen.DelimiterPositions
    78  	JsonQuery          string
    79  	Encoding           text.Encoding
    80  	LineBreak          text.LineBreak
    81  	NoHeader           bool
    82  	EncloseAll         bool
    83  	JsonEscape         json.EscapeType
    84  	PrettyPrint        bool
    85  
    86  	SingleLine bool
    87  
    88  	Handler *file.Handler
    89  
    90  	ForUpdate bool
    91  	ViewType  ViewType
    92  
    93  	restorePointHeader    Header
    94  	restorePointRecordSet RecordSet
    95  }
    96  
    97  func NewFileInfo(
    98  	filename parser.Identifier,
    99  	repository string,
   100  	options option.ImportOptions,
   101  	defaultFormat option.Format,
   102  ) (*FileInfo, error) {
   103  	fpath, format, err := SearchFilePath(filename, repository, options, defaultFormat)
   104  	if err != nil {
   105  		return nil, err
   106  	}
   107  
   108  	delimiter := options.Delimiter
   109  	encoding := options.Encoding
   110  	switch format {
   111  	case option.TSV:
   112  		delimiter = '\t'
   113  	case option.JSON, option.JSONL:
   114  		encoding = text.UTF8
   115  	}
   116  
   117  	return &FileInfo{
   118  		Path:      fpath,
   119  		Format:    format,
   120  		Delimiter: delimiter,
   121  		Encoding:  encoding,
   122  		ViewType:  ViewTypeFile,
   123  	}, nil
   124  }
   125  
   126  func NewTemporaryTableFileInfo(name string) *FileInfo {
   127  	return &FileInfo{
   128  		Path:     name,
   129  		ViewType: ViewTypeTemporaryTable,
   130  	}
   131  }
   132  
   133  func NewStdinFileInfo(filePath string, importOptions option.ImportOptions, exportOptions option.ExportOptions) *FileInfo {
   134  	f := &FileInfo{
   135  		Path:     filePath,
   136  		ViewType: ViewTypeStdin,
   137  	}
   138  	f.SetAllDefaultFileInfoAttributes(importOptions, exportOptions)
   139  	return f
   140  }
   141  
   142  func NewInlineFileInfo(filePath string, importOptions option.ImportOptions, exportOptions option.ExportOptions) *FileInfo {
   143  	f := &FileInfo{
   144  		Path:     filePath,
   145  		ViewType: ViewTypeInlineTable,
   146  	}
   147  	f.SetAllDefaultFileInfoAttributes(importOptions, exportOptions)
   148  	return f
   149  }
   150  
   151  func (f *FileInfo) SetAllDefaultFileInfoAttributes(importOptions option.ImportOptions, exportOptions option.ExportOptions) {
   152  	f.Format = importOptions.Format
   153  	f.Delimiter = importOptions.Delimiter
   154  	f.Encoding = importOptions.Encoding
   155  
   156  	switch f.Format {
   157  	case option.TSV:
   158  		f.Delimiter = '\t'
   159  	case option.JSON, option.JSONL:
   160  		f.Encoding = text.UTF8
   161  	}
   162  
   163  	f.SetDefaultFileInfoAttributes(importOptions, exportOptions)
   164  }
   165  
   166  func (f *FileInfo) SetDefaultFileInfoAttributes(importOptions option.ImportOptions, exportOptions option.ExportOptions) {
   167  	f.DelimiterPositions = importOptions.DelimiterPositions
   168  	f.SingleLine = importOptions.SingleLine
   169  	f.JsonQuery = option.TrimSpace(importOptions.JsonQuery)
   170  	f.LineBreak = exportOptions.LineBreak
   171  	f.NoHeader = importOptions.NoHeader
   172  	f.EncloseAll = exportOptions.EncloseAll
   173  	f.JsonEscape = exportOptions.JsonEscape
   174  }
   175  
   176  func (f *FileInfo) IsUpdatable() bool {
   177  	return f.IsFile() || f.IsInMemoryTable()
   178  }
   179  
   180  func (f *FileInfo) SetDelimiter(s string) error {
   181  	delimiter, err := option.ParseDelimiter(s)
   182  	if err != nil {
   183  		return err
   184  	}
   185  
   186  	var format option.Format
   187  	if delimiter == '\t' {
   188  		format = option.TSV
   189  	} else {
   190  		format = option.CSV
   191  	}
   192  
   193  	if f.Delimiter == delimiter && f.Format == format {
   194  		return NewTableAttributeUnchangedError(f.Path)
   195  	}
   196  
   197  	f.Delimiter = delimiter
   198  	f.Format = format
   199  	return nil
   200  }
   201  
   202  func (f *FileInfo) SetDelimiterPositions(s string) error {
   203  	pos, singleLine, err := option.ParseDelimiterPositions(s)
   204  	if err != nil {
   205  		return err
   206  	}
   207  	delimiterPositions := fixedlen.DelimiterPositions(pos)
   208  	format := option.FIXED
   209  
   210  	if reflect.DeepEqual(f.DelimiterPositions, delimiterPositions) &&
   211  		f.SingleLine == singleLine &&
   212  		f.Format == format {
   213  		return NewTableAttributeUnchangedError(f.Path)
   214  	}
   215  
   216  	f.Format = format
   217  	f.DelimiterPositions = delimiterPositions
   218  	f.SingleLine = singleLine
   219  
   220  	return nil
   221  }
   222  
   223  func (f *FileInfo) SetFormat(s string) error {
   224  	format, escapeType, err := option.ParseFormat(s, f.JsonEscape)
   225  	if err != nil {
   226  		return err
   227  	}
   228  
   229  	if f.Format == format &&
   230  		f.JsonEscape == escapeType {
   231  		return NewTableAttributeUnchangedError(f.Path)
   232  	}
   233  
   234  	delimiter := f.Delimiter
   235  	encoding := f.Encoding
   236  
   237  	switch format {
   238  	case option.TSV:
   239  		delimiter = '\t'
   240  	case option.JSON, option.JSONL:
   241  		encoding = text.UTF8
   242  	}
   243  
   244  	f.Format = format
   245  	f.JsonEscape = escapeType
   246  	f.Delimiter = delimiter
   247  	f.Encoding = encoding
   248  	return nil
   249  }
   250  
   251  func (f *FileInfo) SetEncoding(s string) error {
   252  	encoding, err := option.ParseEncoding(s)
   253  	if err != nil || encoding == text.AUTO {
   254  		return errors.New("encoding must be one of UTF8|UTF8M|UTF16|UTF16BE|UTF16LE|UTF16BEM|UTF16LEM|SJIS")
   255  	}
   256  
   257  	switch f.Format {
   258  	case option.JSON, option.JSONL:
   259  		if encoding != text.UTF8 {
   260  			return errors.New("json format is supported only UTF8")
   261  		}
   262  	}
   263  
   264  	if f.Encoding == encoding {
   265  		return NewTableAttributeUnchangedError(f.Path)
   266  	}
   267  
   268  	f.Encoding = encoding
   269  	return nil
   270  }
   271  
   272  func (f *FileInfo) SetLineBreak(s string) error {
   273  	lb, err := option.ParseLineBreak(s)
   274  	if err != nil {
   275  		return err
   276  	}
   277  
   278  	if f.LineBreak == lb {
   279  		return NewTableAttributeUnchangedError(f.Path)
   280  	}
   281  
   282  	f.LineBreak = lb
   283  	return nil
   284  }
   285  
   286  func (f *FileInfo) SetNoHeader(b bool) error {
   287  	if b == f.NoHeader {
   288  		return NewTableAttributeUnchangedError(f.Path)
   289  	}
   290  	f.NoHeader = b
   291  	return nil
   292  }
   293  
   294  func (f *FileInfo) SetEncloseAll(b bool) error {
   295  	if b == f.EncloseAll {
   296  		return NewTableAttributeUnchangedError(f.Path)
   297  	}
   298  	f.EncloseAll = b
   299  	return nil
   300  }
   301  
   302  func (f *FileInfo) SetJsonEscape(s string) error {
   303  	escape, err := option.ParseJsonEscapeType(s)
   304  	if err != nil {
   305  		return err
   306  	}
   307  
   308  	if escape == f.JsonEscape {
   309  		return NewTableAttributeUnchangedError(f.Path)
   310  	}
   311  
   312  	f.JsonEscape = escape
   313  	return nil
   314  }
   315  
   316  func (f *FileInfo) SetPrettyPrint(b bool) error {
   317  	if b == f.PrettyPrint {
   318  		return NewTableAttributeUnchangedError(f.Path)
   319  	}
   320  	f.PrettyPrint = b
   321  	return nil
   322  }
   323  
   324  func (f *FileInfo) IsFile() bool {
   325  	return f.ViewType == ViewTypeFile
   326  }
   327  
   328  func (f *FileInfo) IsTemporaryTable() bool {
   329  	return f.ViewType == ViewTypeTemporaryTable
   330  }
   331  
   332  func (f *FileInfo) IsStdin() bool {
   333  	return f.ViewType == ViewTypeStdin
   334  }
   335  
   336  func (f *FileInfo) IsInMemoryTable() bool {
   337  	return f.ViewType == ViewTypeStdin || f.ViewType == ViewTypeTemporaryTable
   338  }
   339  
   340  func (f *FileInfo) IsRemoteObject() bool {
   341  	return f.ViewType == ViewTypeRemoteObject
   342  }
   343  
   344  func (f *FileInfo) IsStringObject() bool {
   345  	return f.ViewType == ViewTypeStringObject
   346  }
   347  
   348  func (f *FileInfo) IsInlineTable() bool {
   349  	return f.ViewType == ViewTypeInlineTable
   350  }
   351  
   352  func (f *FileInfo) IdentifiedPath() string {
   353  	s := strings.ToUpper(f.Path)
   354  	if 0 < len(f.ArchivePath) {
   355  		s = s + " IN " + strings.ToUpper(f.ArchivePath)
   356  	}
   357  	return s
   358  }
   359  
   360  func (f *FileInfo) ExportOptions(tx *Transaction) option.ExportOptions {
   361  	ops := tx.Flags.ExportOptions.Copy()
   362  	ops.Format = f.Format
   363  	ops.Delimiter = f.Delimiter
   364  	ops.DelimiterPositions = f.DelimiterPositions
   365  	ops.SingleLine = f.SingleLine
   366  	ops.Encoding = f.Encoding
   367  	ops.LineBreak = f.LineBreak
   368  	ops.WithoutHeader = f.NoHeader
   369  	ops.EncloseAll = f.EncloseAll
   370  	ops.JsonEscape = f.JsonEscape
   371  	ops.PrettyPrint = f.PrettyPrint
   372  	return ops
   373  }
   374  
   375  func SearchFilePath(filename parser.Identifier, repository string, options option.ImportOptions, defaultFormat option.Format) (string, option.Format, error) {
   376  	var fpath string
   377  	var err error
   378  
   379  	format := options.Format
   380  
   381  	switch format {
   382  	case option.CSV, option.TSV:
   383  		fpath, err = SearchCSVFilePath(filename, repository)
   384  	case option.JSON:
   385  		fpath, err = SearchJsonFilePath(filename, repository)
   386  	case option.JSONL:
   387  		fpath, err = SearchJsonlFilePath(filename, repository)
   388  	case option.FIXED:
   389  		fpath, err = SearchFixedLengthFilePath(filename, repository)
   390  	case option.LTSV:
   391  		fpath, err = SearchLTSVFilePath(filename, repository)
   392  	default: // AutoSelect
   393  		if fpath, err = SearchFilePathFromAllTypes(filename, repository); err == nil {
   394  			switch strings.ToLower(filepath.Ext(fpath)) {
   395  			case option.CsvExt:
   396  				format = option.CSV
   397  			case option.TsvExt:
   398  				format = option.TSV
   399  			case option.JsonExt:
   400  				format = option.JSON
   401  			case option.JsonlExt:
   402  				format = option.JSONL
   403  			case option.LtsvExt:
   404  				format = option.LTSV
   405  			default:
   406  				format = defaultFormat
   407  			}
   408  		}
   409  	}
   410  
   411  	return fpath, format, err
   412  }
   413  
   414  func SearchCSVFilePath(filename parser.Identifier, repository string) (string, error) {
   415  	return SearchFilePathWithExtType(filename, repository, []string{option.CsvExt, option.TsvExt, option.TextExt})
   416  }
   417  
   418  func SearchJsonFilePath(filename parser.Identifier, repository string) (string, error) {
   419  	return SearchFilePathWithExtType(filename, repository, []string{option.JsonExt})
   420  }
   421  
   422  func SearchJsonlFilePath(filename parser.Identifier, repository string) (string, error) {
   423  	return SearchFilePathWithExtType(filename, repository, []string{option.JsonlExt})
   424  }
   425  
   426  func SearchFixedLengthFilePath(filename parser.Identifier, repository string) (string, error) {
   427  	return SearchFilePathWithExtType(filename, repository, []string{option.TextExt})
   428  }
   429  
   430  func SearchLTSVFilePath(filename parser.Identifier, repository string) (string, error) {
   431  	return SearchFilePathWithExtType(filename, repository, []string{option.LtsvExt, option.TextExt})
   432  }
   433  
   434  func SearchFilePathFromAllTypes(filename parser.Identifier, repository string) (string, error) {
   435  	return SearchFilePathWithExtType(filename, repository, []string{option.CsvExt, option.TsvExt, option.JsonExt, option.JsonlExt, option.LtsvExt, option.TextExt})
   436  }
   437  
   438  func SearchFilePathWithExtType(filename parser.Identifier, repository string, extTypes []string) (string, error) {
   439  	fpath := filename.Literal
   440  	if !filepath.IsAbs(fpath) {
   441  		if len(repository) < 1 {
   442  			repository, _ = os.Getwd()
   443  		}
   444  		fpath = filepath.Join(repository, fpath)
   445  	}
   446  
   447  	var info os.FileInfo
   448  	var err error
   449  
   450  	if info, err = os.Stat(fpath); err != nil {
   451  		pathes := make([]string, 0, len(extTypes))
   452  		infoList := make([]os.FileInfo, 0, len(extTypes))
   453  		for _, ext := range extTypes {
   454  			if i, err := os.Stat(fpath + ext); err == nil {
   455  				pathes = append(pathes, fpath+ext)
   456  				infoList = append(infoList, i)
   457  			}
   458  		}
   459  		switch {
   460  		case len(pathes) < 1:
   461  			return fpath, NewFileNotExistError(filename)
   462  		case 1 < len(pathes):
   463  			return fpath, NewFileNameAmbiguousError(filename)
   464  		}
   465  		fpath = pathes[0]
   466  		info = infoList[0]
   467  	}
   468  
   469  	fpath, err = filepath.Abs(fpath)
   470  	if err != nil {
   471  		return fpath, NewFileNotExistError(filename)
   472  	}
   473  
   474  	if info.IsDir() {
   475  		return fpath, NewFileUnableToReadError(filename)
   476  	}
   477  
   478  	return fpath, nil
   479  }
   480  
   481  func NewFileInfoForCreate(filename parser.Identifier, repository string, delimiter rune, encoding text.Encoding) (*FileInfo, error) {
   482  	fpath, err := CreateFilePath(filename, repository)
   483  	if err != nil {
   484  		return nil, NewIOError(filename, err.Error())
   485  	}
   486  
   487  	var format option.Format
   488  	switch strings.ToLower(filepath.Ext(fpath)) {
   489  	case option.TsvExt:
   490  		delimiter = '\t'
   491  		format = option.TSV
   492  	case option.JsonExt:
   493  		encoding = text.UTF8
   494  		format = option.JSON
   495  	case option.JsonlExt:
   496  		encoding = text.UTF8
   497  		format = option.JSONL
   498  	case option.LtsvExt:
   499  		format = option.LTSV
   500  	case option.GfmExt:
   501  		format = option.GFM
   502  	case option.OrgExt:
   503  		format = option.ORG
   504  	default:
   505  		format = option.CSV
   506  	}
   507  
   508  	return &FileInfo{
   509  		Path:      fpath,
   510  		Delimiter: delimiter,
   511  		Format:    format,
   512  		Encoding:  encoding,
   513  		ViewType:  ViewTypeFile,
   514  	}, nil
   515  }
   516  
   517  func CreateFilePath(filename parser.Identifier, repository string) (string, error) {
   518  	fpath := filename.Literal
   519  	if !filepath.IsAbs(fpath) {
   520  		if len(repository) < 1 {
   521  			repository, _ = os.Getwd()
   522  		}
   523  		fpath = filepath.Join(repository, fpath)
   524  	}
   525  	return filepath.Abs(fpath)
   526  }