github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/csv/args.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package csv
    19  
    20  import (
    21  	"encoding/xml"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"strings"
    26  	"unicode/utf8"
    27  )
    28  
    29  const (
    30  	none = "none"
    31  	use  = "use"
    32  
    33  	defaultRecordDelimiter      = "\n"
    34  	defaultFieldDelimiter       = ","
    35  	defaultQuoteCharacter       = `"`
    36  	defaultQuoteEscapeCharacter = `"`
    37  	defaultCommentCharacter     = "#"
    38  
    39  	asneeded = "asneeded"
    40  )
    41  
    42  // ReaderArgs - represents elements inside <InputSerialization><CSV> in request XML.
    43  type ReaderArgs struct {
    44  	FileHeaderInfo             string `xml:"FileHeaderInfo"`
    45  	RecordDelimiter            string `xml:"RecordDelimiter"`
    46  	FieldDelimiter             string `xml:"FieldDelimiter"`
    47  	QuoteCharacter             string `xml:"QuoteCharacter"`
    48  	QuoteEscapeCharacter       string `xml:"QuoteEscapeCharacter"`
    49  	CommentCharacter           string `xml:"Comments"`
    50  	AllowQuotedRecordDelimiter bool   `xml:"AllowQuotedRecordDelimiter"`
    51  	unmarshaled                bool
    52  }
    53  
    54  // IsEmpty - returns whether reader args is empty or not.
    55  func (args *ReaderArgs) IsEmpty() bool {
    56  	return !args.unmarshaled
    57  }
    58  
    59  // UnmarshalXML - decodes XML data.
    60  func (args *ReaderArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) {
    61  	args.FileHeaderInfo = none
    62  	args.RecordDelimiter = defaultRecordDelimiter
    63  	args.FieldDelimiter = defaultFieldDelimiter
    64  	args.QuoteCharacter = defaultQuoteCharacter
    65  	args.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
    66  	args.CommentCharacter = defaultCommentCharacter
    67  	args.AllowQuotedRecordDelimiter = false
    68  
    69  	for {
    70  		// Read tokens from the XML document in a stream.
    71  		t, err := d.Token()
    72  		if err != nil {
    73  			if err == io.EOF {
    74  				break
    75  			}
    76  			return err
    77  		}
    78  
    79  		if se, ok := t.(xml.StartElement); ok {
    80  			tagName := se.Name.Local
    81  			switch tagName {
    82  			case "AllowQuotedRecordDelimiter":
    83  				var b bool
    84  				if err = d.DecodeElement(&b, &se); err != nil {
    85  					return err
    86  				}
    87  				args.AllowQuotedRecordDelimiter = b
    88  			default:
    89  				var s string
    90  				if err = d.DecodeElement(&s, &se); err != nil {
    91  					return err
    92  				}
    93  				switch tagName {
    94  				case "FileHeaderInfo":
    95  					args.FileHeaderInfo = strings.ToLower(s)
    96  				case "RecordDelimiter":
    97  					args.RecordDelimiter = s
    98  				case "FieldDelimiter":
    99  					args.FieldDelimiter = s
   100  				case "QuoteCharacter":
   101  					if utf8.RuneCountInString(s) > 1 {
   102  						return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
   103  					}
   104  					args.QuoteCharacter = s
   105  				case "QuoteEscapeCharacter":
   106  					switch utf8.RuneCountInString(s) {
   107  					case 0:
   108  						args.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
   109  					case 1:
   110  						args.QuoteEscapeCharacter = s
   111  					default:
   112  						return fmt.Errorf("unsupported QuoteEscapeCharacter '%v'", s)
   113  					}
   114  				case "Comments":
   115  					args.CommentCharacter = s
   116  				default:
   117  					return errors.New("unrecognized option")
   118  				}
   119  			}
   120  		}
   121  	}
   122  
   123  	args.unmarshaled = true
   124  	return nil
   125  }
   126  
   127  // WriterArgs - represents elements inside <OutputSerialization><CSV/> in request XML.
   128  type WriterArgs struct {
   129  	QuoteFields          string `xml:"QuoteFields"`
   130  	RecordDelimiter      string `xml:"RecordDelimiter"`
   131  	FieldDelimiter       string `xml:"FieldDelimiter"`
   132  	QuoteCharacter       string `xml:"QuoteCharacter"`
   133  	QuoteEscapeCharacter string `xml:"QuoteEscapeCharacter"`
   134  	unmarshaled          bool
   135  }
   136  
   137  // IsEmpty - returns whether writer args is empty or not.
   138  func (args *WriterArgs) IsEmpty() bool {
   139  	return !args.unmarshaled
   140  }
   141  
   142  // UnmarshalXML - decodes XML data.
   143  func (args *WriterArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
   144  	args.QuoteFields = asneeded
   145  	args.RecordDelimiter = defaultRecordDelimiter
   146  	args.FieldDelimiter = defaultFieldDelimiter
   147  	args.QuoteCharacter = defaultQuoteCharacter
   148  	args.QuoteEscapeCharacter = defaultQuoteCharacter
   149  
   150  	for {
   151  		// Read tokens from the XML document in a stream.
   152  		t, err := d.Token()
   153  		if err != nil {
   154  			if err == io.EOF {
   155  				break
   156  			}
   157  			return err
   158  		}
   159  
   160  		if se, ok := t.(xml.StartElement); ok {
   161  			var s string
   162  			if err = d.DecodeElement(&s, &se); err != nil {
   163  				return err
   164  			}
   165  			switch se.Name.Local {
   166  			case "QuoteFields":
   167  				args.QuoteFields = strings.ToLower(s)
   168  			case "RecordDelimiter":
   169  				args.RecordDelimiter = s
   170  			case "FieldDelimiter":
   171  				args.FieldDelimiter = s
   172  			case "QuoteCharacter":
   173  				switch utf8.RuneCountInString(s) {
   174  				case 0:
   175  					args.QuoteCharacter = "\x00"
   176  				case 1:
   177  					args.QuoteCharacter = s
   178  				default:
   179  					return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
   180  				}
   181  			case "QuoteEscapeCharacter":
   182  				switch utf8.RuneCountInString(s) {
   183  				case 0:
   184  					args.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
   185  				case 1:
   186  					args.QuoteEscapeCharacter = s
   187  				default:
   188  					return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
   189  				}
   190  			default:
   191  				return errors.New("unrecognized option")
   192  			}
   193  		}
   194  	}
   195  
   196  	args.unmarshaled = true
   197  	return nil
   198  }