github.com/minio/simdjson-go@v0.4.6-0.20231116094823-04d21cddf993/simdjson_amd64.go (about)

     1  //go:build !appengine && !noasm && gc
     2  // +build !appengine,!noasm,gc
     3  
     4  /*
     5   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     6   *
     7   * Licensed under the Apache License, Version 2.0 (the "License");
     8   * you may not use this file except in compliance with the License.
     9   * You may obtain a copy of the License at
    10   *
    11   *     http://www.apache.org/licenses/LICENSE-2.0
    12   *
    13   * Unless required by applicable law or agreed to in writing, software
    14   * distributed under the License is distributed on an "AS IS" BASIS,
    15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16   * See the License for the specific language governing permissions and
    17   * limitations under the License.
    18   */
    19  
    20  package simdjson
    21  
    22  import (
    23  	"bufio"
    24  	"bytes"
    25  	"errors"
    26  	"fmt"
    27  	"io"
    28  	"runtime"
    29  	"sync"
    30  
    31  	"github.com/klauspost/cpuid/v2"
    32  )
    33  
    34  var wantFeatures = cpuid.CombineFeatures(cpuid.AVX2, cpuid.CLMUL)
    35  
    36  // SupportedCPU will return whether the CPU is supported.
    37  func SupportedCPU() bool {
    38  	return cpuid.CPU.HasAll(wantFeatures)
    39  }
    40  
    41  func newInternalParsedJson(reuse *ParsedJson, opts []ParserOption) (*internalParsedJson, error) {
    42  	if !SupportedCPU() {
    43  		return nil, errors.New("Host CPU does not meet target specs")
    44  	}
    45  	var pj *internalParsedJson
    46  	if reuse != nil && reuse.internal != nil {
    47  		pj = reuse.internal
    48  		pj.ParsedJson = *reuse
    49  		pj.ParsedJson.internal = nil
    50  		reuse = &ParsedJson{}
    51  	}
    52  	if pj == nil {
    53  		pj = &internalParsedJson{}
    54  	}
    55  	pj.copyStrings = true
    56  	for _, opt := range opts {
    57  		if err := opt(pj); err != nil {
    58  			return nil, err
    59  		}
    60  	}
    61  	return pj, nil
    62  }
    63  
    64  // Parse an object or array from a block of data and return the parsed JSON.
    65  // An optional block of previously parsed json can be supplied to reduce allocations.
    66  func Parse(b []byte, reuse *ParsedJson, opts ...ParserOption) (*ParsedJson, error) {
    67  	pj, err := newInternalParsedJson(reuse, opts)
    68  	if err != nil {
    69  		return nil, err
    70  	}
    71  	err = pj.parseMessage(b, false)
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  	parsed := &pj.ParsedJson
    76  	parsed.internal = pj
    77  	return parsed, nil
    78  }
    79  
    80  // ParseND will parse newline delimited JSON objects or arrays.
    81  // An optional block of previously parsed json can be supplied to reduce allocations.
    82  func ParseND(b []byte, reuse *ParsedJson, opts ...ParserOption) (*ParsedJson, error) {
    83  	pj, err := newInternalParsedJson(reuse, opts)
    84  	if err != nil {
    85  		return nil, err
    86  	}
    87  	err = pj.parseMessage(bytes.TrimSpace(b), true)
    88  	if err != nil {
    89  		return nil, err
    90  	}
    91  	return &pj.ParsedJson, nil
    92  }
    93  
    94  // A Stream is used to stream back results.
    95  // Either Error or Value will be set on returned results.
    96  type Stream struct {
    97  	Value *ParsedJson
    98  	Error error
    99  }
   100  
   101  // ParseNDStream will parse a stream and return parsed JSON to the supplied result channel.
   102  // The method will return immediately.
   103  // Each element is contained within a root tag.
   104  //
   105  //	<root>Element 1</root><root>Element 2</root>...
   106  //
   107  // Each result will contain an unspecified number of full elements,
   108  // so it can be assumed that each result starts and ends with a root tag.
   109  // The parser will keep parsing until writes to the result stream blocks.
   110  // A stream is finished when a non-nil Error is returned.
   111  // If the stream was parsed until the end the Error value will be io.EOF
   112  // The channel will be closed after an error has been returned.
   113  // An optional channel for returning consumed results can be provided.
   114  // There is no guarantee that elements will be consumed, so always use
   115  // non-blocking writes to the reuse channel.
   116  func ParseNDStream(r io.Reader, res chan<- Stream, reuse <-chan *ParsedJson) {
   117  	if !SupportedCPU() {
   118  		go func() {
   119  			res <- Stream{
   120  				Value: nil,
   121  				Error: fmt.Errorf("Host CPU does not meet target specs"),
   122  			}
   123  			close(res)
   124  		}()
   125  		return
   126  	}
   127  	const tmpSize = 10 << 20
   128  	buf := bufio.NewReaderSize(r, tmpSize)
   129  	tmpPool := sync.Pool{New: func() interface{} {
   130  		return make([]byte, tmpSize+1024)
   131  	}}
   132  	conc := (runtime.GOMAXPROCS(0) + 1) / 2
   133  	queue := make(chan chan Stream, conc)
   134  	go func() {
   135  		// Forward finished items in order.
   136  		defer close(res)
   137  		end := false
   138  		for items := range queue {
   139  			i := <-items
   140  			select {
   141  			case res <- i:
   142  			default:
   143  				if !end {
   144  					// Block if we haven't returned an error
   145  					res <- i
   146  				}
   147  			}
   148  			if i.Error != nil {
   149  				end = true
   150  			}
   151  		}
   152  	}()
   153  	go func() {
   154  		defer close(queue)
   155  		for {
   156  			tmp := tmpPool.Get().([]byte)
   157  			tmp = tmp[:tmpSize]
   158  			n, err := buf.Read(tmp)
   159  			if err != nil && err != io.EOF {
   160  				queueError(queue, err)
   161  				return
   162  			}
   163  			tmp = tmp[:n]
   164  			// Read until Newline
   165  			if err != io.EOF {
   166  				b, err2 := buf.ReadBytes('\n')
   167  				if err2 != nil && err2 != io.EOF {
   168  					queueError(queue, err2)
   169  					return
   170  				}
   171  				tmp = append(tmp, b...)
   172  				// Forward io.EOF
   173  				err = err2
   174  			}
   175  
   176  			if len(tmp) > 0 {
   177  				result := make(chan Stream, 0)
   178  				queue <- result
   179  				go func() {
   180  					var pj internalParsedJson
   181  					pj.copyStrings = true
   182  					select {
   183  					case v := <-reuse:
   184  						if cap(v.Message) >= tmpSize+1024 {
   185  							tmpPool.Put(v.Message)
   186  							v.Message = nil
   187  						}
   188  						pj.ParsedJson = *v
   189  
   190  					default:
   191  					}
   192  					parseErr := pj.parseMessage(tmp, true)
   193  					if parseErr != nil {
   194  						result <- Stream{
   195  							Value: nil,
   196  							Error: fmt.Errorf("parsing input: %w", parseErr),
   197  						}
   198  						return
   199  					}
   200  					parsed := pj.ParsedJson
   201  					result <- Stream{
   202  						Value: &parsed,
   203  						Error: nil,
   204  					}
   205  				}()
   206  			} else {
   207  				tmpPool.Put(tmp)
   208  			}
   209  			if err != nil {
   210  				// Should only really be io.EOF
   211  				queueError(queue, err)
   212  				return
   213  			}
   214  		}
   215  	}()
   216  }
   217  
   218  func queueError(queue chan chan Stream, err error) {
   219  	result := make(chan Stream, 0)
   220  	queue <- result
   221  	result <- Stream{
   222  		Value: nil,
   223  		Error: err,
   224  	}
   225  }