github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/csv/recordtransform.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package csv
    19  
    20  import (
    21  	"bytes"
    22  	"io"
    23  )
    24  
    25  // recordTransform will convert records to always have newline records.
    26  type recordTransform struct {
    27  	reader io.Reader
    28  	// recordDelimiter can be up to 2 characters.
    29  	recordDelimiter []byte
    30  	oneByte         []byte
    31  	useOneByte      bool
    32  }
    33  
    34  func (rr *recordTransform) Read(p []byte) (n int, err error) {
    35  	if rr.useOneByte {
    36  		p[0] = rr.oneByte[0]
    37  		rr.useOneByte = false
    38  		n, err = rr.reader.Read(p[1:])
    39  		n++
    40  	} else {
    41  		n, err = rr.reader.Read(p)
    42  	}
    43  
    44  	if err != nil {
    45  		return n, err
    46  	}
    47  
    48  	// Do nothing if record-delimiter is already newline.
    49  	if string(rr.recordDelimiter) == "\n" {
    50  		return n, nil
    51  	}
    52  
    53  	// Change record delimiters to newline.
    54  	if len(rr.recordDelimiter) == 1 {
    55  		for idx := 0; idx < len(p); {
    56  			i := bytes.Index(p[idx:], rr.recordDelimiter)
    57  			if i < 0 {
    58  				break
    59  			}
    60  			idx += i
    61  			p[idx] = '\n'
    62  		}
    63  		return n, nil
    64  	}
    65  
    66  	// 2 characters...
    67  	for idx := 0; idx < len(p); {
    68  		i := bytes.Index(p[idx:], rr.recordDelimiter)
    69  		if i < 0 {
    70  			break
    71  		}
    72  		idx += i
    73  
    74  		p[idx] = '\n'
    75  		p = append(p[:idx+1], p[idx+2:]...)
    76  		n--
    77  	}
    78  
    79  	if p[n-1] != rr.recordDelimiter[0] {
    80  		return n, nil
    81  	}
    82  
    83  	if _, err = rr.reader.Read(rr.oneByte); err != nil {
    84  		return n, err
    85  	}
    86  
    87  	if rr.oneByte[0] == rr.recordDelimiter[1] {
    88  		p[n-1] = '\n'
    89  		return n, nil
    90  	}
    91  
    92  	rr.useOneByte = true
    93  	return n, nil
    94  }