gitee.com/sy_183/go-common@v1.0.5-0.20231205030221-958cfe129b47/parser/line-parser.go (about)

     1  package parser
     2  
     3  import (
     4  	"bytes"
     5  	"gitee.com/sy_183/go-common/errors"
     6  	"gitee.com/sy_183/go-common/slice/unsafe"
     7  )
     8  
     9  const DefaultMaxLineSize = 2048
    10  
    11  type LineParser struct {
    12  	MaxLineSize int
    13  	cache       [][]byte
    14  	cacheSize   int
    15  	skip        bool
    16  	line        string
    17  }
    18  
    19  func (lp *LineParser) reset() {
    20  	lp.cache = lp.cache[:0]
    21  	lp.cacheSize = 0
    22  }
    23  
    24  func (lp *LineParser) Parse(data []byte) (ok bool, remain []byte, err error) {
    25  	if lp.skip {
    26  		if i := bytes.IndexByte(data, '\n'); i >= 0 {
    27  			data = data[i+1:]
    28  			lp.skip = false
    29  		} else {
    30  			return false, nil, nil
    31  		}
    32  	}
    33  	if len(data) == 0 {
    34  		return
    35  	}
    36  	if lp.MaxLineSize <= 0 {
    37  		lp.MaxLineSize = DefaultMaxLineSize
    38  	}
    39  	limit := lp.MaxLineSize - lp.cacheSize
    40  	if i := bytes.IndexByte(data, '\n'); i >= 0 && i <= limit {
    41  		if i > 0 {
    42  			var chunk []byte
    43  			if data[i-1] == '\r' {
    44  				// 换行符之前包含一个回车符
    45  				chunk = data[:i-1]
    46  			} else {
    47  				// 换行符之前不包含回车符
    48  				chunk = data[:i]
    49  			}
    50  			if len(chunk) > 0 {
    51  				lp.cache = append(lp.cache, chunk)
    52  				lp.cacheSize += len(chunk)
    53  			}
    54  		} else if len(lp.cache) > 0 {
    55  			last := lp.cache[len(lp.cache)-1]
    56  			if last[len(last)-1] == '\r' {
    57  				// 上一次解析的数据中最后一个字符为回车符,需要去除掉
    58  				if last = last[:len(last)-1]; len(last) == 0 {
    59  					lp.cache = lp.cache[:len(lp.cache)-1]
    60  				} else {
    61  					lp.cache[len(lp.cache)-1] = last
    62  				}
    63  				lp.cacheSize--
    64  			}
    65  		}
    66  		switch len(lp.cache) {
    67  		case 0:
    68  			lp.line = ""
    69  		case 1:
    70  			lp.line = unsafe.String(lp.cache[0])
    71  		default:
    72  			lp.line = unsafe.String(bytes.Join(lp.cache, nil))
    73  		}
    74  		lp.reset()
    75  		return true, data[i+1:], nil
    76  	} else if (i >= 0 && i > limit) || (i < 0 && len(data) >= limit) {
    77  		// 不管是否找到了换行符,如果此时解析行的长度超过了限制,则返回错误
    78  		var parsedLineSize int
    79  		if i >= 0 {
    80  			parsedLineSize = lp.cacheSize + i + 1
    81  			remain = data[i+1:]
    82  		} else {
    83  			parsedLineSize = lp.cacheSize + len(data)
    84  			lp.skip = true
    85  		}
    86  		lp.reset()
    87  		return false, remain, errors.NewSizeOutOfRange("单行数据", 0, int64(lp.MaxLineSize), int64(parsedLineSize), false)
    88  	} else {
    89  		// 没找到换行符,并且此时解析行的长度没超过限制,将数据添加到缓存
    90  		lp.cache = append(lp.cache, data)
    91  		lp.cacheSize += len(data)
    92  	}
    93  	return false, nil, nil
    94  }
    95  
    96  func (lp *LineParser) ParseP(data []byte, remainP *[]byte) (ok bool, err error) {
    97  	ok, *remainP, err = lp.Parse(data)
    98  	return
    99  }
   100  
   101  func (lp *LineParser) Line() string {
   102  	return lp.line
   103  }
   104  
   105  func (lp *LineParser) Reset() {
   106  	lp.reset()
   107  	lp.skip = false
   108  	lp.line = ""
   109  }