github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/simdj/reader.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package simdj
    19  
    20  import (
    21  	"fmt"
    22  	"io"
    23  	"sync"
    24  	"sync/atomic"
    25  
    26  	"github.com/minio/minio/internal/s3select/json"
    27  	"github.com/minio/minio/internal/s3select/sql"
    28  	"github.com/minio/simdjson-go"
    29  )
    30  
    31  // Reader - JSON record reader for S3Select.
    32  type Reader struct {
    33  	args    *json.ReaderArgs
    34  	input   chan simdjson.Stream
    35  	decoded chan simdjson.Object
    36  
    37  	// err will only be returned after decoded has been closed.
    38  	err          *error
    39  	readCloser   io.ReadCloser
    40  	onReaderExit func()
    41  
    42  	exitReader chan struct{}
    43  	readerWg   sync.WaitGroup
    44  }
    45  
    46  // Read - reads single record.
    47  func (r *Reader) Read(dst sql.Record) (sql.Record, error) {
    48  	v, ok := <-r.decoded
    49  	if !ok {
    50  		if r.err != nil && *r.err != nil {
    51  			return nil, errJSONParsingError(*r.err)
    52  		}
    53  		return nil, io.EOF
    54  	}
    55  	dstRec, ok := dst.(*Record)
    56  	if !ok {
    57  		dstRec = &Record{}
    58  	}
    59  	dstRec.object = v
    60  	return dstRec, nil
    61  }
    62  
    63  // Close - closes underlying reader.
    64  func (r *Reader) Close() error {
    65  	// Close the input.
    66  	// Potentially racy if the stream decoder is still reading.
    67  	if r.readCloser != nil {
    68  		r.readCloser.Close()
    69  	}
    70  	if r.exitReader != nil {
    71  		close(r.exitReader)
    72  		r.readerWg.Wait()
    73  		r.exitReader = nil
    74  		r.input = nil
    75  	}
    76  	return nil
    77  }
    78  
    79  // startReader will start a reader that accepts input from r.input.
    80  // Input should be root -> object input. Each root indicates a record.
    81  // If r.input is closed, it is assumed that no more input will come.
    82  // When this function returns r.readerWg will be decremented and r.decoded will be closed.
    83  // On errors, r.err will be set. This should only be accessed after r.decoded has been closed.
    84  func (r *Reader) startReader() {
    85  	defer r.onReaderExit()
    86  	var tmpObj simdjson.Object
    87  	for {
    88  		var in simdjson.Stream
    89  		select {
    90  		case in = <-r.input:
    91  		case <-r.exitReader:
    92  			return
    93  		}
    94  		if in.Error != nil && in.Error != io.EOF {
    95  			r.err = &in.Error
    96  			return
    97  		}
    98  		if in.Value == nil {
    99  			if in.Error == io.EOF {
   100  				return
   101  			}
   102  			continue
   103  		}
   104  		i := in.Value.Iter()
   105  	readloop:
   106  		for {
   107  			var next simdjson.Iter
   108  			typ, err := i.AdvanceIter(&next)
   109  			if err != nil {
   110  				r.err = &err
   111  				return
   112  			}
   113  			switch typ {
   114  			case simdjson.TypeNone:
   115  				break readloop
   116  			case simdjson.TypeRoot:
   117  				typ, obj, err := next.Root(nil)
   118  				if err != nil {
   119  					r.err = &err
   120  					return
   121  				}
   122  				if typ != simdjson.TypeObject {
   123  					if typ == simdjson.TypeNone {
   124  						continue
   125  					}
   126  					err = fmt.Errorf("unexpected json type below root :%v", typ)
   127  					r.err = &err
   128  					return
   129  				}
   130  
   131  				o, err := obj.Object(&tmpObj)
   132  				if err != nil {
   133  					r.err = &err
   134  					return
   135  				}
   136  				select {
   137  				case <-r.exitReader:
   138  					return
   139  				case r.decoded <- *o:
   140  				}
   141  			default:
   142  				err = fmt.Errorf("unexpected root json type:%v", typ)
   143  				r.err = &err
   144  				return
   145  			}
   146  		}
   147  		if in.Error == io.EOF {
   148  			return
   149  		}
   150  	}
   151  }
   152  
   153  // NewReader - creates new JSON reader using readCloser.
   154  func NewReader(readCloser io.ReadCloser, args *json.ReaderArgs) *Reader {
   155  	r := Reader{
   156  		args:       args,
   157  		readCloser: &safeCloser{r: io.Reader(readCloser)},
   158  		decoded:    make(chan simdjson.Object, 1000),
   159  		input:      make(chan simdjson.Stream, 2),
   160  		exitReader: make(chan struct{}),
   161  	}
   162  	r.onReaderExit = func() {
   163  		close(r.decoded)
   164  		readCloser.Close()
   165  		for range r.input {
   166  			// Read until EOF trickles through.
   167  			// Otherwise, we risk the decoder hanging.
   168  		}
   169  		r.readerWg.Done()
   170  	}
   171  
   172  	// We cannot reuse as we are sending parsed objects elsewhere.
   173  	simdjson.ParseNDStream(readCloser, r.input, nil)
   174  	r.readerWg.Add(1)
   175  	go r.startReader()
   176  	return &r
   177  }
   178  
   179  // NewElementReader - creates new JSON reader using readCloser.
   180  func NewElementReader(ch chan simdjson.Object, err *error, args *json.ReaderArgs) *Reader {
   181  	return &Reader{
   182  		args:       args,
   183  		decoded:    ch,
   184  		err:        err,
   185  		readCloser: nil,
   186  	}
   187  }
   188  
   189  // safeCloser will wrap a Reader as a ReadCloser.
   190  // It is safe to call Close while the reader is being used.
   191  type safeCloser struct {
   192  	closed uint32
   193  	r      io.Reader
   194  }
   195  
   196  func (s *safeCloser) Read(p []byte) (n int, err error) {
   197  	if atomic.LoadUint32(&s.closed) == 1 {
   198  		return 0, io.EOF
   199  	}
   200  	n, err = s.r.Read(p)
   201  	if atomic.LoadUint32(&s.closed) == 1 {
   202  		return 0, io.EOF
   203  	}
   204  	return n, err
   205  }
   206  
   207  func (s *safeCloser) Close() error {
   208  	atomic.CompareAndSwapUint32(&s.closed, 0, 1)
   209  	return nil
   210  }