github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/simdj/reader.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package simdj 19 20 import ( 21 "fmt" 22 "io" 23 "sync" 24 "sync/atomic" 25 26 "github.com/minio/minio/internal/s3select/json" 27 "github.com/minio/minio/internal/s3select/sql" 28 "github.com/minio/simdjson-go" 29 ) 30 31 // Reader - JSON record reader for S3Select. 32 type Reader struct { 33 args *json.ReaderArgs 34 input chan simdjson.Stream 35 decoded chan simdjson.Object 36 37 // err will only be returned after decoded has been closed. 38 err *error 39 readCloser io.ReadCloser 40 onReaderExit func() 41 42 exitReader chan struct{} 43 readerWg sync.WaitGroup 44 } 45 46 // Read - reads single record. 47 func (r *Reader) Read(dst sql.Record) (sql.Record, error) { 48 v, ok := <-r.decoded 49 if !ok { 50 if r.err != nil && *r.err != nil { 51 return nil, errJSONParsingError(*r.err) 52 } 53 return nil, io.EOF 54 } 55 dstRec, ok := dst.(*Record) 56 if !ok { 57 dstRec = &Record{} 58 } 59 dstRec.object = v 60 return dstRec, nil 61 } 62 63 // Close - closes underlying reader. 64 func (r *Reader) Close() error { 65 // Close the input. 66 // Potentially racy if the stream decoder is still reading. 67 if r.readCloser != nil { 68 r.readCloser.Close() 69 } 70 if r.exitReader != nil { 71 close(r.exitReader) 72 r.readerWg.Wait() 73 r.exitReader = nil 74 r.input = nil 75 } 76 return nil 77 } 78 79 // startReader will start a reader that accepts input from r.input. 80 // Input should be root -> object input. Each root indicates a record. 81 // If r.input is closed, it is assumed that no more input will come. 82 // When this function returns r.readerWg will be decremented and r.decoded will be closed. 83 // On errors, r.err will be set. This should only be accessed after r.decoded has been closed. 84 func (r *Reader) startReader() { 85 defer r.onReaderExit() 86 var tmpObj simdjson.Object 87 for { 88 var in simdjson.Stream 89 select { 90 case in = <-r.input: 91 case <-r.exitReader: 92 return 93 } 94 if in.Error != nil && in.Error != io.EOF { 95 r.err = &in.Error 96 return 97 } 98 if in.Value == nil { 99 if in.Error == io.EOF { 100 return 101 } 102 continue 103 } 104 i := in.Value.Iter() 105 readloop: 106 for { 107 var next simdjson.Iter 108 typ, err := i.AdvanceIter(&next) 109 if err != nil { 110 r.err = &err 111 return 112 } 113 switch typ { 114 case simdjson.TypeNone: 115 break readloop 116 case simdjson.TypeRoot: 117 typ, obj, err := next.Root(nil) 118 if err != nil { 119 r.err = &err 120 return 121 } 122 if typ != simdjson.TypeObject { 123 if typ == simdjson.TypeNone { 124 continue 125 } 126 err = fmt.Errorf("unexpected json type below root :%v", typ) 127 r.err = &err 128 return 129 } 130 131 o, err := obj.Object(&tmpObj) 132 if err != nil { 133 r.err = &err 134 return 135 } 136 select { 137 case <-r.exitReader: 138 return 139 case r.decoded <- *o: 140 } 141 default: 142 err = fmt.Errorf("unexpected root json type:%v", typ) 143 r.err = &err 144 return 145 } 146 } 147 if in.Error == io.EOF { 148 return 149 } 150 } 151 } 152 153 // NewReader - creates new JSON reader using readCloser. 154 func NewReader(readCloser io.ReadCloser, args *json.ReaderArgs) *Reader { 155 r := Reader{ 156 args: args, 157 readCloser: &safeCloser{r: io.Reader(readCloser)}, 158 decoded: make(chan simdjson.Object, 1000), 159 input: make(chan simdjson.Stream, 2), 160 exitReader: make(chan struct{}), 161 } 162 r.onReaderExit = func() { 163 close(r.decoded) 164 readCloser.Close() 165 for range r.input { 166 // Read until EOF trickles through. 167 // Otherwise, we risk the decoder hanging. 168 } 169 r.readerWg.Done() 170 } 171 172 // We cannot reuse as we are sending parsed objects elsewhere. 173 simdjson.ParseNDStream(readCloser, r.input, nil) 174 r.readerWg.Add(1) 175 go r.startReader() 176 return &r 177 } 178 179 // NewElementReader - creates new JSON reader using readCloser. 180 func NewElementReader(ch chan simdjson.Object, err *error, args *json.ReaderArgs) *Reader { 181 return &Reader{ 182 args: args, 183 decoded: ch, 184 err: err, 185 readCloser: nil, 186 } 187 } 188 189 // safeCloser will wrap a Reader as a ReadCloser. 190 // It is safe to call Close while the reader is being used. 191 type safeCloser struct { 192 closed uint32 193 r io.Reader 194 } 195 196 func (s *safeCloser) Read(p []byte) (n int, err error) { 197 if atomic.LoadUint32(&s.closed) == 1 { 198 return 0, io.EOF 199 } 200 n, err = s.r.Read(p) 201 if atomic.LoadUint32(&s.closed) == 1 { 202 return 0, io.EOF 203 } 204 return n, err 205 } 206 207 func (s *safeCloser) Close() error { 208 atomic.CompareAndSwapUint32(&s.closed, 0, 1) 209 return nil 210 }