github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/ss/fields.go (about)

     1  package ss
     2  
     3  import (
     4  	"strconv"
     5  	"strings"
     6  	"unicode"
     7  	"unicode/utf8"
     8  )
     9  
    10  // FieldsX splits the string s around each instance of one or more consecutive white space
    11  // characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an
    12  // empty slice if s contains only white space.
    13  // The count determines the number of substrings to return:
    14  //
    15  //	n > 0: at most n substrings; the last substring will be the unsplit remainder.
    16  //	n == 0: the result is nil (zero substrings)
    17  //	n < 0: all substrings
    18  //
    19  // Code are copy from strings.Fields and add count parameter to control the max fields.
    20  func FieldsX(s, keepStart, keepEnd string, count int) []string { // nolint gocognit
    21  	if count == 0 {
    22  		return nil
    23  	}
    24  
    25  	// First count the fields.
    26  	// This is an exact count if s is ASCII, otherwise it is an approximation.
    27  	n, setBits := countFieldsX(s, keepStart, keepEnd, count)
    28  
    29  	if setBits >= utf8.RuneSelf {
    30  		// Some runes in the input string are not ASCII.
    31  		return FieldsFuncX(s, keepStart, keepEnd, count, unicode.IsSpace)
    32  	}
    33  
    34  	// ASCII fast path
    35  	a := make([]string, n)
    36  	na := 0
    37  	fieldStart := 0
    38  	i := 0
    39  
    40  	// Skip spaces in the front of the input.
    41  	for i < len(s) && asciiSpace[s[i]] != 0 {
    42  		i++
    43  	}
    44  
    45  	fieldStart = i
    46  	inRange := false
    47  
    48  	for i < len(s) && (count < 0 || na < count) {
    49  		si := string(s[i])
    50  		if !inRange && si == keepStart {
    51  			inRange = true
    52  			i++
    53  
    54  			continue
    55  		}
    56  
    57  		if inRange {
    58  			if si == keepEnd {
    59  				inRange = false
    60  			}
    61  
    62  			i++
    63  
    64  			continue
    65  		}
    66  
    67  		if asciiSpace[s[i]] == 0 {
    68  			i++
    69  
    70  			continue
    71  		}
    72  
    73  		if na == count-1 {
    74  			a[na] = s[fieldStart:]
    75  		} else {
    76  			a[na] = s[fieldStart:i]
    77  		}
    78  
    79  		na++
    80  		i++
    81  
    82  		// Skip spaces in between fields.
    83  		for i < len(s) && asciiSpace[s[i]] != 0 {
    84  			i++
    85  		}
    86  
    87  		fieldStart = i
    88  	}
    89  
    90  	if fieldStart < len(s) && (count < 0 || na < count) { // Last field might end at EOF.
    91  		a[na] = s[fieldStart:]
    92  	}
    93  
    94  	return fixLastField(a)
    95  }
    96  
    97  func countFieldsX(s, keepStart, keepEnd string, count int) (int, uint8) {
    98  	// setBits is used to track which bits are set in the bytes of s.
    99  	setBits := uint8(0)
   100  	n := 0
   101  	wasSpace := 1
   102  	inRange := false
   103  
   104  	for i := 0; i < len(s); i++ {
   105  		r := s[i]
   106  		setBits |= r
   107  
   108  		si := string(s[i])
   109  		if !inRange && si == keepStart {
   110  			inRange = true
   111  		}
   112  
   113  		isSpace := 0
   114  
   115  		if inRange {
   116  			if si == keepEnd {
   117  				inRange = false
   118  			}
   119  		} else {
   120  			isSpace = int(asciiSpace[r])
   121  		}
   122  
   123  		n += wasSpace & ^isSpace
   124  		wasSpace = isSpace
   125  	}
   126  
   127  	if count < 0 || n < count {
   128  		return n, setBits
   129  	}
   130  
   131  	return count, setBits
   132  }
   133  
   134  // FieldsFuncX splits the string s at each run of Unicode code points c satisfying f(c)
   135  // and returns an array of slices of s. If all code points in s satisfy f(c) or the
   136  // string is empty, an empty slice is returned.
   137  // FieldsFuncN makes no guarantees about the order in which it calls f(c).
   138  // If f does not return consistent results for a given c, FieldsFuncN may crash.
   139  func FieldsFuncX(s, keepStart, keepEnd string, count int, f func(rune) bool) []string { // nolint funlen
   140  	// A span is used to record a slice of s of the form s[start:end].
   141  	// The start index is inclusive and the end index is exclusive.
   142  	type span struct {
   143  		start int
   144  		end   int
   145  	}
   146  
   147  	spans := make([]span, 0, 32)
   148  
   149  	// Find the field start and end indices.
   150  	wasField := false
   151  	fromIndex := 0
   152  	ending := false
   153  	inRange := false
   154  
   155  	for i, r := range s {
   156  		si := string(r)
   157  
   158  		if !inRange && si == keepStart {
   159  			inRange = true
   160  		}
   161  
   162  		isSep := !inRange && f(r)
   163  
   164  		if inRange && si == keepEnd {
   165  			inRange = false
   166  		}
   167  
   168  		if isSep {
   169  			if wasField {
   170  				spans = append(spans, span{start: fromIndex, end: i})
   171  				wasField = false
   172  
   173  				if count > 0 && len(spans) == count-1 {
   174  					ending = true
   175  				}
   176  			}
   177  
   178  			continue
   179  		}
   180  
   181  		if ending {
   182  			wasField = true
   183  			fromIndex = i
   184  
   185  			break
   186  		}
   187  
   188  		if !wasField {
   189  			wasField = true
   190  			fromIndex = i
   191  
   192  			if count == 1 { // nolint gomnd
   193  				break
   194  			}
   195  		}
   196  	}
   197  
   198  	// Last field might end at EOF.
   199  	if wasField {
   200  		spans = append(spans, span{fromIndex, len(s)})
   201  	}
   202  
   203  	// Create strings from recorded field indices.
   204  	a := make([]string, len(spans))
   205  	for i, span := range spans {
   206  		a[i] = s[span.start:span.end]
   207  	}
   208  
   209  	return fixLastFieldFunc(a, f)
   210  }
   211  
   212  // PickFirst ignores the error and returns s
   213  func PickFirst(s string, _ interface{}) string {
   214  	return s
   215  }
   216  
   217  // ExpandRange expands a string like 1-3 to [1,2,3]
   218  func ExpandRange(f string) []string {
   219  	hyphenPos := strings.Index(f, "-")
   220  	if hyphenPos <= 0 || hyphenPos == len(f)-1 {
   221  		return []string{f}
   222  	}
   223  
   224  	from := strings.TrimSpace(f[0:hyphenPos])
   225  	to := strings.TrimSpace(f[hyphenPos+1:])
   226  
   227  	fromI := 0
   228  	toI := 0
   229  
   230  	var err error
   231  
   232  	if fromI, err = strconv.Atoi(from); err != nil {
   233  		return []string{f}
   234  	}
   235  
   236  	if toI, err = strconv.Atoi(to); err != nil {
   237  		return []string{f}
   238  	}
   239  
   240  	parts := make([]string, 0)
   241  
   242  	if fromI < toI {
   243  		for i := fromI; i <= toI; i++ {
   244  			parts = append(parts, strconv.Itoa(i))
   245  		}
   246  	} else {
   247  		for i := fromI; i >= toI; i-- {
   248  			parts = append(parts, strconv.Itoa(i))
   249  		}
   250  	}
   251  
   252  	return parts
   253  }
   254  
   255  // FieldsN splits the string s around each instance of one or more consecutive white space
   256  // characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an
   257  // empty slice if s contains only white space.
   258  // The count determines the number of substrings to return:
   259  //
   260  //	n > 0: at most n substrings; the last substring will be the unsplit remainder.
   261  //	n == 0: the result is nil (zero substrings)
   262  //	n < 0: all substrings
   263  //
   264  // Code are copy from strings.Fields and add count parameter to control the max fields.
   265  func FieldsN(s string, count int) []string {
   266  	if count == 0 {
   267  		return nil
   268  	}
   269  
   270  	// First count the fields.
   271  	// This is an exact count if s is ASCII, otherwise it is an approximation.
   272  	n, setBits := countFields(s, count)
   273  
   274  	if setBits >= utf8.RuneSelf {
   275  		// Some runes in the input string are not ASCII.
   276  		return FieldsFuncN(s, count, unicode.IsSpace)
   277  	}
   278  
   279  	// ASCII fast path
   280  	a := make([]string, n)
   281  	na := 0
   282  	fieldStart := 0
   283  	i := 0
   284  
   285  	// Skip spaces in the front of the input.
   286  	for i < len(s) && asciiSpace[s[i]] != 0 {
   287  		i++
   288  	}
   289  
   290  	fieldStart = i
   291  
   292  	for i < len(s) && (count < 0 || na < count) {
   293  		if asciiSpace[s[i]] == 0 {
   294  			i++
   295  			continue
   296  		}
   297  
   298  		if na == count-1 {
   299  			a[na] = s[fieldStart:]
   300  		} else {
   301  			a[na] = s[fieldStart:i]
   302  		}
   303  
   304  		na++
   305  		i++
   306  
   307  		// Skip spaces in between fields.
   308  		for i < len(s) && asciiSpace[s[i]] != 0 {
   309  			i++
   310  		}
   311  
   312  		fieldStart = i
   313  	}
   314  
   315  	if fieldStart < len(s) && (count < 0 || na < count) { // Last field might end at EOF.
   316  		a[na] = s[fieldStart:]
   317  	}
   318  
   319  	return fixLastField(a)
   320  }
   321  
   322  func fixLastField(a []string) []string {
   323  	lastIndex := len(a) - 1 // nolint gomnd
   324  	last := a[lastIndex]
   325  	stopPos := 0
   326  
   327  	for i := 0; i < len(last); i++ {
   328  		isSep := asciiSpace[last[i]] == 1 // nolint gomnd
   329  		if isSep {
   330  			if stopPos == 0 {
   331  				stopPos = i
   332  			}
   333  		} else {
   334  			stopPos = 0
   335  		}
   336  	}
   337  
   338  	if stopPos > 0 {
   339  		a[lastIndex] = last[0:stopPos]
   340  	}
   341  
   342  	return a
   343  }
   344  
   345  func countFields(s string, count int) (int, uint8) {
   346  	// setBits is used to track which bits are set in the bytes of s.
   347  	setBits := uint8(0)
   348  	n := 0
   349  	wasSpace := 1
   350  
   351  	for i := 0; i < len(s); i++ {
   352  		r := s[i]
   353  		setBits |= r
   354  		isSpace := int(asciiSpace[r])
   355  		n += wasSpace & ^isSpace
   356  		wasSpace = isSpace
   357  	}
   358  
   359  	if count < 0 || n < count {
   360  		return n, setBits
   361  	}
   362  
   363  	return count, setBits
   364  }
   365  
   366  var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} // nolint gochecknoglobals
   367  
   368  // FieldsFuncN splits the string s at each run of Unicode code points c satisfying f(c)
   369  // and returns an array of slices of s. If all code points in s satisfy f(c) or the
   370  // string is empty, an empty slice is returned.
   371  // FieldsFuncN makes no guarantees about the order in which it calls f(c).
   372  // If f does not return consistent results for a given c, FieldsFuncN may crash.
   373  func FieldsFuncN(s string, n int, f func(rune) bool) []string {
   374  	// A span is used to record a slice of s of the form s[start:end].
   375  	// The start index is inclusive and the end index is exclusive.
   376  	type span struct {
   377  		start int
   378  		end   int
   379  	}
   380  
   381  	spans := make([]span, 0, 32)
   382  
   383  	// Find the field start and end indices.
   384  	wasField := false
   385  	fromIndex := 0
   386  	ending := false
   387  
   388  	for i, r := range s {
   389  		isSep := f(r)
   390  
   391  		if isSep {
   392  			if wasField {
   393  				spans = append(spans, span{start: fromIndex, end: i})
   394  				wasField = false
   395  
   396  				if n > 0 && len(spans) == n-1 {
   397  					ending = true
   398  				}
   399  			}
   400  
   401  			continue
   402  		}
   403  
   404  		if ending {
   405  			wasField = true
   406  			fromIndex = i
   407  
   408  			break
   409  		}
   410  
   411  		if !wasField {
   412  			wasField = true
   413  			fromIndex = i
   414  
   415  			if n == 1 { // nolint gomnd
   416  				break
   417  			}
   418  		}
   419  	}
   420  
   421  	// Last field might end at EOF.
   422  	if wasField {
   423  		spans = append(spans, span{fromIndex, len(s)})
   424  	}
   425  
   426  	// Create strings from recorded field indices.
   427  	a := make([]string, len(spans))
   428  	for i, span := range spans {
   429  		a[i] = s[span.start:span.end]
   430  	}
   431  
   432  	return fixLastFieldFunc(a, f)
   433  }
   434  
   435  func fixLastFieldFunc(a []string, f func(rune) bool) []string {
   436  	if len(a) == 0 {
   437  		return nil
   438  	}
   439  
   440  	lastIndex := len(a) - 1 // nolint gomnd
   441  	last := a[lastIndex]
   442  	stopPos := 0
   443  
   444  	for i, r := range last {
   445  		isSep := f(r)
   446  		if isSep {
   447  			if stopPos == 0 {
   448  				stopPos = i
   449  			}
   450  		} else {
   451  			stopPos = 0
   452  		}
   453  	}
   454  
   455  	if stopPos > 0 {
   456  		a[lastIndex] = last[0:stopPos]
   457  	}
   458  
   459  	return a
   460  }