storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/csv/args.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2019 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package csv
    18  
    19  import (
    20  	"encoding/xml"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"strings"
    25  	"unicode/utf8"
    26  )
    27  
    28  const (
    29  	none = "none"
    30  	use  = "use"
    31  
    32  	defaultRecordDelimiter      = "\n"
    33  	defaultFieldDelimiter       = ","
    34  	defaultQuoteCharacter       = `"`
    35  	defaultQuoteEscapeCharacter = `"`
    36  	defaultCommentCharacter     = "#"
    37  
    38  	asneeded = "asneeded"
    39  )
    40  
    41  // ReaderArgs - represents elements inside <InputSerialization><CSV> in request XML.
    42  type ReaderArgs struct {
    43  	FileHeaderInfo             string `xml:"FileHeaderInfo"`
    44  	RecordDelimiter            string `xml:"RecordDelimiter"`
    45  	FieldDelimiter             string `xml:"FieldDelimiter"`
    46  	QuoteCharacter             string `xml:"QuoteCharacter"`
    47  	QuoteEscapeCharacter       string `xml:"QuoteEscapeCharacter"`
    48  	CommentCharacter           string `xml:"Comments"`
    49  	AllowQuotedRecordDelimiter bool   `xml:"AllowQuotedRecordDelimiter"`
    50  	unmarshaled                bool
    51  }
    52  
    53  // IsEmpty - returns whether reader args is empty or not.
    54  func (args *ReaderArgs) IsEmpty() bool {
    55  	return !args.unmarshaled
    56  }
    57  
    58  // UnmarshalXML - decodes XML data.
    59  func (args *ReaderArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) {
    60  	args.FileHeaderInfo = none
    61  	args.RecordDelimiter = defaultRecordDelimiter
    62  	args.FieldDelimiter = defaultFieldDelimiter
    63  	args.QuoteCharacter = defaultQuoteCharacter
    64  	args.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
    65  	args.CommentCharacter = defaultCommentCharacter
    66  	args.AllowQuotedRecordDelimiter = false
    67  
    68  	for {
    69  		// Read tokens from the XML document in a stream.
    70  		t, err := d.Token()
    71  		if err != nil {
    72  			if err == io.EOF {
    73  				break
    74  			}
    75  			return err
    76  		}
    77  
    78  		switch se := t.(type) {
    79  		case xml.StartElement:
    80  			tagName := se.Name.Local
    81  			switch tagName {
    82  			case "AllowQuotedRecordDelimiter":
    83  				var b bool
    84  				if err = d.DecodeElement(&b, &se); err != nil {
    85  					return err
    86  				}
    87  				args.AllowQuotedRecordDelimiter = b
    88  			default:
    89  				var s string
    90  				if err = d.DecodeElement(&s, &se); err != nil {
    91  					return err
    92  				}
    93  				switch tagName {
    94  				case "FileHeaderInfo":
    95  					args.FileHeaderInfo = strings.ToLower(s)
    96  				case "RecordDelimiter":
    97  					args.RecordDelimiter = s
    98  				case "FieldDelimiter":
    99  					args.FieldDelimiter = s
   100  				case "QuoteCharacter":
   101  					if utf8.RuneCountInString(s) > 1 {
   102  						return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
   103  					}
   104  					args.QuoteCharacter = s
   105  				case "QuoteEscapeCharacter":
   106  					switch utf8.RuneCountInString(s) {
   107  					case 0:
   108  						args.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
   109  					case 1:
   110  						args.QuoteEscapeCharacter = s
   111  					default:
   112  						return fmt.Errorf("unsupported QuoteEscapeCharacter '%v'", s)
   113  					}
   114  				case "Comments":
   115  					args.CommentCharacter = s
   116  				default:
   117  					return errors.New("unrecognized option")
   118  				}
   119  			}
   120  		}
   121  	}
   122  
   123  	args.unmarshaled = true
   124  	return nil
   125  }
   126  
   127  // WriterArgs - represents elements inside <OutputSerialization><CSV/> in request XML.
   128  type WriterArgs struct {
   129  	QuoteFields          string `xml:"QuoteFields"`
   130  	RecordDelimiter      string `xml:"RecordDelimiter"`
   131  	FieldDelimiter       string `xml:"FieldDelimiter"`
   132  	QuoteCharacter       string `xml:"QuoteCharacter"`
   133  	QuoteEscapeCharacter string `xml:"QuoteEscapeCharacter"`
   134  	unmarshaled          bool
   135  }
   136  
   137  // IsEmpty - returns whether writer args is empty or not.
   138  func (args *WriterArgs) IsEmpty() bool {
   139  	return !args.unmarshaled
   140  }
   141  
   142  // UnmarshalXML - decodes XML data.
   143  func (args *WriterArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
   144  
   145  	args.QuoteFields = asneeded
   146  	args.RecordDelimiter = defaultRecordDelimiter
   147  	args.FieldDelimiter = defaultFieldDelimiter
   148  	args.QuoteCharacter = defaultQuoteCharacter
   149  	args.QuoteEscapeCharacter = defaultQuoteCharacter
   150  
   151  	for {
   152  		// Read tokens from the XML document in a stream.
   153  		t, err := d.Token()
   154  		if err != nil {
   155  			if err == io.EOF {
   156  				break
   157  			}
   158  			return err
   159  		}
   160  
   161  		switch se := t.(type) {
   162  		case xml.StartElement:
   163  			var s string
   164  			if err = d.DecodeElement(&s, &se); err != nil {
   165  				return err
   166  			}
   167  			switch se.Name.Local {
   168  			case "QuoteFields":
   169  				args.QuoteFields = strings.ToLower(s)
   170  			case "RecordDelimiter":
   171  				args.RecordDelimiter = s
   172  			case "FieldDelimiter":
   173  				args.FieldDelimiter = s
   174  			case "QuoteCharacter":
   175  				switch utf8.RuneCountInString(s) {
   176  				case 0:
   177  					args.QuoteCharacter = "\x00"
   178  				case 1:
   179  					args.QuoteCharacter = s
   180  				default:
   181  					return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
   182  				}
   183  			case "QuoteEscapeCharacter":
   184  				switch utf8.RuneCountInString(s) {
   185  				case 0:
   186  					args.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
   187  				case 1:
   188  					args.QuoteEscapeCharacter = s
   189  				default:
   190  					return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
   191  				}
   192  			default:
   193  				return errors.New("unrecognized option")
   194  			}
   195  		}
   196  	}
   197  
   198  	args.unmarshaled = true
   199  	return nil
   200  }