github.com/dgraph-io/simdjson-go@v0.3.0/simdjson_amd64.go (about)

     1  //+build !appengine
     2  //+build !noasm
     3  //+build gc
     4  
     5  /*
     6   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     7   *
     8   * Licensed under the Apache License, Version 2.0 (the "License");
     9   * you may not use this file except in compliance with the License.
    10   * You may obtain a copy of the License at
    11   *
    12   *     http://www.apache.org/licenses/LICENSE-2.0
    13   *
    14   * Unless required by applicable law or agreed to in writing, software
    15   * distributed under the License is distributed on an "AS IS" BASIS,
    16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    17   * See the License for the specific language governing permissions and
    18   * limitations under the License.
    19   */
    20  
    21  package simdjson
    22  
    23  import (
    24  	"bufio"
    25  	"bytes"
    26  	"errors"
    27  	"fmt"
    28  	"io"
    29  	"runtime"
    30  	"sync"
    31  
    32  	"github.com/klauspost/cpuid/v2"
    33  )
    34  
    35  // SupportedCPU will return whether the CPU is supported.
    36  func SupportedCPU() bool {
    37  	return cpuid.CPU.Supports(cpuid.AVX2, cpuid.CLMUL)
    38  }
    39  
    40  // Parse a block of data and return the parsed JSON.
    41  // An optional block of previously parsed json can be supplied to reduce allocations.
    42  func Parse(b []byte, reuse *ParsedJson) (*ParsedJson, error) {
    43  	if !SupportedCPU() {
    44  		return nil, errors.New("Host CPU does not meet target specs")
    45  	}
    46  	var pj *internalParsedJson
    47  	if reuse != nil && reuse.internal != nil {
    48  		pj = reuse.internal
    49  		pj.ParsedJson = *reuse
    50  		pj.ParsedJson.internal = nil
    51  		reuse = &ParsedJson{}
    52  	}
    53  	if pj == nil {
    54  		pj = &internalParsedJson{}
    55  	}
    56  	err := pj.parseMessage(b)
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  	parsed := &pj.ParsedJson
    61  	parsed.internal = pj
    62  	return parsed, nil
    63  }
    64  
    65  // ParseND will parse newline delimited JSON.
    66  // An optional block of previously parsed json can be supplied to reduce allocations.
    67  func ParseND(b []byte, reuse *ParsedJson) (*ParsedJson, error) {
    68  	if !SupportedCPU() {
    69  		return nil, errors.New("Host CPU does not meet target specs")
    70  	}
    71  	var pj internalParsedJson
    72  	if reuse != nil {
    73  		pj.ParsedJson = *reuse
    74  	}
    75  	b = bytes.TrimSpace(b)
    76  
    77  	err := pj.parseMessageNdjson(b)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  	return &pj.ParsedJson, nil
    82  }
    83  
    84  // A Stream is used to stream back results.
    85  // Either Error or Value will be set on returned results.
    86  type Stream struct {
    87  	Value *ParsedJson
    88  	Error error
    89  }
    90  
    91  // ParseNDStream will parse a stream and return parsed JSON to the supplied result channel.
    92  // The method will return immediately.
    93  // Each element is contained within a root tag.
    94  //   <root>Element 1</root><root>Element 2</root>...
    95  // Each result will contain an unspecified number of full elements,
    96  // so it can be assumed that each result starts and ends with a root tag.
    97  // The parser will keep parsing until writes to the result stream blocks.
    98  // A stream is finished when a non-nil Error is returned.
    99  // If the stream was parsed until the end the Error value will be io.EOF
   100  // The channel will be closed after an error has been returned.
   101  // An optional channel for returning consumed results can be provided.
   102  // There is no guarantee that elements will be consumed, so always use
   103  // non-blocking writes to the reuse channel.
   104  func ParseNDStream(r io.Reader, res chan<- Stream, reuse <-chan *ParsedJson) {
   105  	if !SupportedCPU() {
   106  		go func() {
   107  			res <- Stream{
   108  				Value: nil,
   109  				Error: fmt.Errorf("Host CPU does not meet target specs"),
   110  			}
   111  			close(res)
   112  		}()
   113  		return
   114  	}
   115  	const tmpSize = 10 << 20
   116  	buf := bufio.NewReaderSize(r, tmpSize)
   117  	tmpPool := sync.Pool{New: func() interface{} {
   118  		return make([]byte, tmpSize+1024)
   119  	}}
   120  	conc := (runtime.GOMAXPROCS(0) + 1) / 2
   121  	queue := make(chan chan Stream, conc)
   122  	go func() {
   123  		// Forward finished items in order.
   124  		defer close(res)
   125  		end := false
   126  		for items := range queue {
   127  			i := <-items
   128  			select {
   129  			case res <- i:
   130  			default:
   131  				if !end {
   132  					// Block if we haven't returned an error
   133  					res <- i
   134  				}
   135  			}
   136  			if i.Error != nil {
   137  				end = true
   138  			}
   139  		}
   140  	}()
   141  	go func() {
   142  		defer close(queue)
   143  		for {
   144  			tmp := tmpPool.Get().([]byte)
   145  			tmp = tmp[:tmpSize]
   146  			n, err := buf.Read(tmp)
   147  			if err != nil && err != io.EOF {
   148  				queueError(queue, err)
   149  				return
   150  			}
   151  			tmp = tmp[:n]
   152  			// Read until Newline
   153  			if err != io.EOF {
   154  				b, err2 := buf.ReadBytes('\n')
   155  				if err2 != nil && err2 != io.EOF {
   156  					queueError(queue, err2)
   157  					return
   158  				}
   159  				tmp = append(tmp, b...)
   160  				// Forward io.EOF
   161  				err = err2
   162  			}
   163  
   164  			if len(tmp) > 0 {
   165  				result := make(chan Stream, 0)
   166  				queue <- result
   167  				go func() {
   168  					var pj internalParsedJson
   169  					select {
   170  					case v := <-reuse:
   171  						if cap(v.Message) >= tmpSize+1024 {
   172  							tmpPool.Put(v.Message)
   173  							v.Message = nil
   174  						}
   175  						pj.ParsedJson = *v
   176  
   177  					default:
   178  					}
   179  					parseErr := pj.parseMessageNdjson(tmp)
   180  					if parseErr != nil {
   181  						result <- Stream{
   182  							Value: nil,
   183  							Error: fmt.Errorf("parsing input: %w", parseErr),
   184  						}
   185  						return
   186  					}
   187  					parsed := pj.ParsedJson
   188  					result <- Stream{
   189  						Value: &parsed,
   190  						Error: nil,
   191  					}
   192  				}()
   193  			} else {
   194  				tmpPool.Put(tmp)
   195  			}
   196  			if err != nil {
   197  				// Should only really be io.EOF
   198  				queueError(queue, err)
   199  				return
   200  			}
   201  		}
   202  	}()
   203  }
   204  
   205  func queueError(queue chan chan Stream, err error) {
   206  	result := make(chan Stream, 0)
   207  	queue <- result
   208  	result <- Stream{
   209  		Value: nil,
   210  		Error: err,
   211  	}
   212  }