github.com/tobgu/qframe@v0.4.0/config/csv/config.go (about)

     1  package csv
     2  
     3  import (
     4  	qfio "github.com/tobgu/qframe/internal/io"
     5  	"github.com/tobgu/qframe/types"
     6  )
     7  
     8  // Config holds configuration for reading CSV files into QFrames.
     9  // It should be considered a private implementation detail and should never be
    10  // referenced or used directly outside of the QFrame code. To manipulate it
    11  // use the functions returning ConfigFunc below.
    12  type Config qfio.CSVConfig
    13  
    14  // ConfigFunc is a function that operates on a Config object.
    15  type ConfigFunc func(*Config)
    16  
    17  // NewConfig creates a new Config object.
    18  // This function should never be called from outside QFrame.
    19  func NewConfig(ff []ConfigFunc) Config {
    20  	conf := Config{Delimiter: ','}
    21  	for _, f := range ff {
    22  		f(&conf)
    23  	}
    24  	return conf
    25  }
    26  
    27  // EmptyNull configures if empty strings should be considered as empty strings (default) or null.
    28  //
    29  // emptyNull - If set to true empty string will be translated to null.
    30  func EmptyNull(emptyNull bool) ConfigFunc {
    31  	return func(c *Config) {
    32  		c.EmptyNull = emptyNull
    33  	}
    34  }
    35  
    36  // MissingColumnNameAlias sets the name to be used for empty columns name with given string
    37  func MissingColumnNameAlias(MissingColumnNameAlias string) ConfigFunc {
    38  	return func(c *Config) {
    39  		c.MissingColumnNameAlias = MissingColumnNameAlias
    40  	}
    41  }
    42  
    43  // RenameDuplicateColumns configures if duplicate column names should have the column index appended to the column name to resolve the conflict.
    44  func RenameDuplicateColumns(RenameDuplicateColumns bool) ConfigFunc {
    45  	return func(c *Config) {
    46  		c.RenameDuplicateColumns = RenameDuplicateColumns
    47  	}
    48  }
    49  
    50  // IgnoreEmptyLines configures if a line without any characters should be ignored or interpreted
    51  // as a zero length string.
    52  //
    53  // IgnoreEmptyLines - If set to true empty lines will not produce any data.
    54  func IgnoreEmptyLines(ignoreEmptyLines bool) ConfigFunc {
    55  	return func(c *Config) {
    56  		c.IgnoreEmptyLines = ignoreEmptyLines
    57  	}
    58  }
    59  
    60  // Delimiter configures the delimiter/separator between columns.
    61  // Only byte representable delimiters are supported. Default is ','.
    62  //
    63  // delimiter - The delimiter to use.
    64  func Delimiter(delimiter byte) ConfigFunc {
    65  	return func(c *Config) {
    66  		c.Delimiter = delimiter
    67  	}
    68  }
    69  
    70  // Types is used set types for certain columns.
    71  // If types are not given a best effort attempt will be done to auto detected the type.
    72  //
    73  // typs - map column name -> type name. For a list of type names see package qframe/types.
    74  func Types(typs map[string]string) ConfigFunc {
    75  	return func(c *Config) {
    76  		c.Types = make(map[string]types.DataType, len(typs))
    77  		for k, v := range typs {
    78  			c.Types[k] = types.DataType(v)
    79  		}
    80  	}
    81  }
    82  
    83  // EnumValues is used to list the possible values and internal order of these values for an enum column.
    84  //
    85  // values - map column name -> list of valid values.
    86  //
    87  // Enum columns that do not specify the values are automatically assigned values based on the content
    88  // of the column. The ordering between these values is undefined. It hence doesn't make much sense to
    89  // sort a QFrame on an enum column unless the ordering has been specified.
    90  //
    91  // Note that the column must be listed as having an enum type (using Types above) for this option to take effect.
    92  func EnumValues(values map[string][]string) ConfigFunc {
    93  	return func(c *Config) {
    94  		c.EnumVals = make(map[string][]string)
    95  		for k, v := range values {
    96  			c.EnumVals[k] = v
    97  		}
    98  	}
    99  }
   100  
   101  // RowCountHint can be used to provide an indication of the number of rows
   102  // in the CSV. In some cases this will help allocating buffers more efficiently
   103  // and improve import times.
   104  //
   105  // rowCount - The number of rows.
   106  func RowCountHint(rowCount int) ConfigFunc {
   107  	return func(c *Config) {
   108  		c.RowCountHint = rowCount
   109  	}
   110  }
   111  
   112  // Headers can be used to specify the header names for a CSV file without header.
   113  //
   114  // header - Slice with column names.
   115  func Headers(headers []string) ConfigFunc {
   116  	return func(c *Config) {
   117  		c.Headers = headers
   118  	}
   119  }
   120  
   121  // ToConfig holds configuration for writing CSV files
   122  type ToConfig qfio.ToCsvConfig
   123  
   124  // ToConfigFunc is a function that operates on a ToConfig object.
   125  type ToConfigFunc func(*ToConfig)
   126  
   127  // NewConfig creates a new ToConfig object.
   128  // This function should never be called from outside QFrame.
   129  func NewToConfig(ff []ToConfigFunc) ToConfig {
   130  	conf := ToConfig{Header: true} //Default
   131  	for _, f := range ff {
   132  		f(&conf)
   133  	}
   134  	return conf
   135  }
   136  
   137  // Header indicates whether or not the CSV file should be written with a header.
   138  // Default is true.
   139  func Header(header bool) ToConfigFunc {
   140  	return func(c *ToConfig) {
   141  		c.Header = header
   142  	}
   143  }