github.com/camlistore/go4@v0.0.0-20200104003542-c7e774b10ea0/strutil/strutil.go (about)

     1  /*
     2  Copyright 2013 The Perkeep Authors
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package strutil contains string and byte processing functions.
    18  package strutil // import "go4.org/strutil"
    19  
    20  import (
    21  	"strings"
    22  	"unicode"
    23  	"unicode/utf8"
    24  )
    25  
    26  // Fork of Go's implementation in pkg/strings/strings.go:
    27  // Generic split: splits after each instance of sep,
    28  // including sepSave bytes of sep in the subarrays.
    29  func genSplit(dst []string, s, sep string, sepSave, n int) []string {
    30  	if n == 0 {
    31  		return nil
    32  	}
    33  	if sep == "" {
    34  		panic("sep is empty")
    35  	}
    36  	if n < 0 {
    37  		n = strings.Count(s, sep) + 1
    38  	}
    39  	c := sep[0]
    40  	start := 0
    41  	na := 0
    42  	for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
    43  		if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
    44  			dst = append(dst, s[start:i+sepSave])
    45  			na++
    46  			start = i + len(sep)
    47  			i += len(sep) - 1
    48  		}
    49  	}
    50  	dst = append(dst, s[start:])
    51  	return dst
    52  }
    53  
    54  // AppendSplitN is like strings.SplitN but appends to and returns dst.
    55  // Unlike strings.SplitN, an empty separator is not supported.
    56  // The count n determines the number of substrings to return:
    57  //   n > 0: at most n substrings; the last substring will be the unsplit remainder.
    58  //   n == 0: the result is nil (zero substrings)
    59  //   n < 0: all substrings
    60  func AppendSplitN(dst []string, s, sep string, n int) []string {
    61  	return genSplit(dst, s, sep, 0, n)
    62  }
    63  
    64  // equalFoldRune compares a and b runes whether they fold equally.
    65  //
    66  // The code comes from strings.EqualFold, but shortened to only one rune.
    67  func equalFoldRune(sr, tr rune) bool {
    68  	if sr == tr {
    69  		return true
    70  	}
    71  	// Make sr < tr to simplify what follows.
    72  	if tr < sr {
    73  		sr, tr = tr, sr
    74  	}
    75  	// Fast check for ASCII.
    76  	if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
    77  		// ASCII, and sr is upper case.  tr must be lower case.
    78  		if tr == sr+'a'-'A' {
    79  			return true
    80  		}
    81  		return false
    82  	}
    83  
    84  	// General case.  SimpleFold(x) returns the next equivalent rune > x
    85  	// or wraps around to smaller values.
    86  	r := unicode.SimpleFold(sr)
    87  	for r != sr && r < tr {
    88  		r = unicode.SimpleFold(r)
    89  	}
    90  	if r == tr {
    91  		return true
    92  	}
    93  	return false
    94  }
    95  
    96  // HasPrefixFold is like strings.HasPrefix but uses Unicode case-folding,
    97  // matching case insensitively.
    98  func HasPrefixFold(s, prefix string) bool {
    99  	if prefix == "" {
   100  		return true
   101  	}
   102  	for _, pr := range prefix {
   103  		if s == "" {
   104  			return false
   105  		}
   106  		// step with s, too
   107  		sr, size := utf8.DecodeRuneInString(s)
   108  		if sr == utf8.RuneError {
   109  			return false
   110  		}
   111  		s = s[size:]
   112  		if !equalFoldRune(sr, pr) {
   113  			return false
   114  		}
   115  	}
   116  	return true
   117  }
   118  
   119  // HasSuffixFold is like strings.HasSuffix but uses Unicode case-folding,
   120  // matching case insensitively.
   121  func HasSuffixFold(s, suffix string) bool {
   122  	if suffix == "" {
   123  		return true
   124  	}
   125  	// count the runes and bytes in s, but only till rune count of suffix
   126  	bo, so := len(s), len(suffix)
   127  	for bo > 0 && so > 0 {
   128  		r, size := utf8.DecodeLastRuneInString(s[:bo])
   129  		if r == utf8.RuneError {
   130  			return false
   131  		}
   132  		bo -= size
   133  
   134  		sr, size := utf8.DecodeLastRuneInString(suffix[:so])
   135  		if sr == utf8.RuneError {
   136  			return false
   137  		}
   138  		so -= size
   139  
   140  		if !equalFoldRune(r, sr) {
   141  			return false
   142  		}
   143  	}
   144  	return so == 0
   145  }
   146  
   147  // ContainsFold is like strings.Contains but uses Unicode case-folding.
   148  func ContainsFold(s, substr string) bool {
   149  	if substr == "" {
   150  		return true
   151  	}
   152  	if s == "" {
   153  		return false
   154  	}
   155  	firstRune := rune(substr[0])
   156  	if firstRune >= utf8.RuneSelf {
   157  		firstRune, _ = utf8.DecodeRuneInString(substr)
   158  	}
   159  	for i, rune := range s {
   160  		if equalFoldRune(rune, firstRune) && HasPrefixFold(s[i:], substr) {
   161  			return true
   162  		}
   163  	}
   164  	return false
   165  }
   166  
   167  // IsPlausibleJSON reports whether s likely contains a JSON object, without
   168  // actually parsing it. It's meant to be a light heuristic.
   169  func IsPlausibleJSON(s string) bool {
   170  	return startsWithOpenBrace(s) && endsWithCloseBrace(s)
   171  }
   172  
   173  func isASCIIWhite(b byte) bool { return b == ' ' || b == '\n' || b == '\r' || b == '\t' }
   174  
   175  func startsWithOpenBrace(s string) bool {
   176  	for len(s) > 0 {
   177  		switch {
   178  		case s[0] == '{':
   179  			return true
   180  		case isASCIIWhite(s[0]):
   181  			s = s[1:]
   182  		default:
   183  			return false
   184  		}
   185  	}
   186  	return false
   187  }
   188  
   189  func endsWithCloseBrace(s string) bool {
   190  	for len(s) > 0 {
   191  		last := len(s) - 1
   192  		switch {
   193  		case s[last] == '}':
   194  			return true
   195  		case isASCIIWhite(s[last]):
   196  			s = s[:last]
   197  		default:
   198  			return false
   199  		}
   200  	}
   201  	return false
   202  }