github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/ss/fields.go (about) 1 package ss 2 3 import ( 4 "strconv" 5 "strings" 6 "unicode" 7 "unicode/utf8" 8 ) 9 10 // FieldsX splits the string s around each instance of one or more consecutive white space 11 // characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an 12 // empty slice if s contains only white space. 13 // The count determines the number of substrings to return: 14 // 15 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 16 // n == 0: the result is nil (zero substrings) 17 // n < 0: all substrings 18 // 19 // Code are copy from strings.Fields and add count parameter to control the max fields. 20 func FieldsX(s, keepStart, keepEnd string, count int) []string { // nolint gocognit 21 if count == 0 { 22 return nil 23 } 24 25 // First count the fields. 26 // This is an exact count if s is ASCII, otherwise it is an approximation. 27 n, setBits := countFieldsX(s, keepStart, keepEnd, count) 28 29 if setBits >= utf8.RuneSelf { 30 // Some runes in the input string are not ASCII. 31 return FieldsFuncX(s, keepStart, keepEnd, count, unicode.IsSpace) 32 } 33 34 // ASCII fast path 35 a := make([]string, n) 36 na := 0 37 fieldStart := 0 38 i := 0 39 40 // Skip spaces in the front of the input. 41 for i < len(s) && asciiSpace[s[i]] != 0 { 42 i++ 43 } 44 45 fieldStart = i 46 inRange := false 47 48 for i < len(s) && (count < 0 || na < count) { 49 si := string(s[i]) 50 if !inRange && si == keepStart { 51 inRange = true 52 i++ 53 54 continue 55 } 56 57 if inRange { 58 if si == keepEnd { 59 inRange = false 60 } 61 62 i++ 63 64 continue 65 } 66 67 if asciiSpace[s[i]] == 0 { 68 i++ 69 70 continue 71 } 72 73 if na == count-1 { 74 a[na] = s[fieldStart:] 75 } else { 76 a[na] = s[fieldStart:i] 77 } 78 79 na++ 80 i++ 81 82 // Skip spaces in between fields. 83 for i < len(s) && asciiSpace[s[i]] != 0 { 84 i++ 85 } 86 87 fieldStart = i 88 } 89 90 if fieldStart < len(s) && (count < 0 || na < count) { // Last field might end at EOF. 91 a[na] = s[fieldStart:] 92 } 93 94 return fixLastField(a) 95 } 96 97 func countFieldsX(s, keepStart, keepEnd string, count int) (int, uint8) { 98 // setBits is used to track which bits are set in the bytes of s. 99 setBits := uint8(0) 100 n := 0 101 wasSpace := 1 102 inRange := false 103 104 for i := 0; i < len(s); i++ { 105 r := s[i] 106 setBits |= r 107 108 si := string(s[i]) 109 if !inRange && si == keepStart { 110 inRange = true 111 } 112 113 isSpace := 0 114 115 if inRange { 116 if si == keepEnd { 117 inRange = false 118 } 119 } else { 120 isSpace = int(asciiSpace[r]) 121 } 122 123 n += wasSpace & ^isSpace 124 wasSpace = isSpace 125 } 126 127 if count < 0 || n < count { 128 return n, setBits 129 } 130 131 return count, setBits 132 } 133 134 // FieldsFuncX splits the string s at each run of Unicode code points c satisfying f(c) 135 // and returns an array of slices of s. If all code points in s satisfy f(c) or the 136 // string is empty, an empty slice is returned. 137 // FieldsFuncN makes no guarantees about the order in which it calls f(c). 138 // If f does not return consistent results for a given c, FieldsFuncN may crash. 139 func FieldsFuncX(s, keepStart, keepEnd string, count int, f func(rune) bool) []string { // nolint funlen 140 // A span is used to record a slice of s of the form s[start:end]. 141 // The start index is inclusive and the end index is exclusive. 142 type span struct { 143 start int 144 end int 145 } 146 147 spans := make([]span, 0, 32) 148 149 // Find the field start and end indices. 150 wasField := false 151 fromIndex := 0 152 ending := false 153 inRange := false 154 155 for i, r := range s { 156 si := string(r) 157 158 if !inRange && si == keepStart { 159 inRange = true 160 } 161 162 isSep := !inRange && f(r) 163 164 if inRange && si == keepEnd { 165 inRange = false 166 } 167 168 if isSep { 169 if wasField { 170 spans = append(spans, span{start: fromIndex, end: i}) 171 wasField = false 172 173 if count > 0 && len(spans) == count-1 { 174 ending = true 175 } 176 } 177 178 continue 179 } 180 181 if ending { 182 wasField = true 183 fromIndex = i 184 185 break 186 } 187 188 if !wasField { 189 wasField = true 190 fromIndex = i 191 192 if count == 1 { // nolint gomnd 193 break 194 } 195 } 196 } 197 198 // Last field might end at EOF. 199 if wasField { 200 spans = append(spans, span{fromIndex, len(s)}) 201 } 202 203 // Create strings from recorded field indices. 204 a := make([]string, len(spans)) 205 for i, span := range spans { 206 a[i] = s[span.start:span.end] 207 } 208 209 return fixLastFieldFunc(a, f) 210 } 211 212 // PickFirst ignores the error and returns s 213 func PickFirst(s string, _ interface{}) string { 214 return s 215 } 216 217 // ExpandRange expands a string like 1-3 to [1,2,3] 218 func ExpandRange(f string) []string { 219 hyphenPos := strings.Index(f, "-") 220 if hyphenPos <= 0 || hyphenPos == len(f)-1 { 221 return []string{f} 222 } 223 224 from := strings.TrimSpace(f[0:hyphenPos]) 225 to := strings.TrimSpace(f[hyphenPos+1:]) 226 227 fromI := 0 228 toI := 0 229 230 var err error 231 232 if fromI, err = strconv.Atoi(from); err != nil { 233 return []string{f} 234 } 235 236 if toI, err = strconv.Atoi(to); err != nil { 237 return []string{f} 238 } 239 240 parts := make([]string, 0) 241 242 if fromI < toI { 243 for i := fromI; i <= toI; i++ { 244 parts = append(parts, strconv.Itoa(i)) 245 } 246 } else { 247 for i := fromI; i >= toI; i-- { 248 parts = append(parts, strconv.Itoa(i)) 249 } 250 } 251 252 return parts 253 } 254 255 // FieldsN splits the string s around each instance of one or more consecutive white space 256 // characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an 257 // empty slice if s contains only white space. 258 // The count determines the number of substrings to return: 259 // 260 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 261 // n == 0: the result is nil (zero substrings) 262 // n < 0: all substrings 263 // 264 // Code are copy from strings.Fields and add count parameter to control the max fields. 265 func FieldsN(s string, count int) []string { 266 if count == 0 { 267 return nil 268 } 269 270 // First count the fields. 271 // This is an exact count if s is ASCII, otherwise it is an approximation. 272 n, setBits := countFields(s, count) 273 274 if setBits >= utf8.RuneSelf { 275 // Some runes in the input string are not ASCII. 276 return FieldsFuncN(s, count, unicode.IsSpace) 277 } 278 279 // ASCII fast path 280 a := make([]string, n) 281 na := 0 282 fieldStart := 0 283 i := 0 284 285 // Skip spaces in the front of the input. 286 for i < len(s) && asciiSpace[s[i]] != 0 { 287 i++ 288 } 289 290 fieldStart = i 291 292 for i < len(s) && (count < 0 || na < count) { 293 if asciiSpace[s[i]] == 0 { 294 i++ 295 continue 296 } 297 298 if na == count-1 { 299 a[na] = s[fieldStart:] 300 } else { 301 a[na] = s[fieldStart:i] 302 } 303 304 na++ 305 i++ 306 307 // Skip spaces in between fields. 308 for i < len(s) && asciiSpace[s[i]] != 0 { 309 i++ 310 } 311 312 fieldStart = i 313 } 314 315 if fieldStart < len(s) && (count < 0 || na < count) { // Last field might end at EOF. 316 a[na] = s[fieldStart:] 317 } 318 319 return fixLastField(a) 320 } 321 322 func fixLastField(a []string) []string { 323 lastIndex := len(a) - 1 // nolint gomnd 324 last := a[lastIndex] 325 stopPos := 0 326 327 for i := 0; i < len(last); i++ { 328 isSep := asciiSpace[last[i]] == 1 // nolint gomnd 329 if isSep { 330 if stopPos == 0 { 331 stopPos = i 332 } 333 } else { 334 stopPos = 0 335 } 336 } 337 338 if stopPos > 0 { 339 a[lastIndex] = last[0:stopPos] 340 } 341 342 return a 343 } 344 345 func countFields(s string, count int) (int, uint8) { 346 // setBits is used to track which bits are set in the bytes of s. 347 setBits := uint8(0) 348 n := 0 349 wasSpace := 1 350 351 for i := 0; i < len(s); i++ { 352 r := s[i] 353 setBits |= r 354 isSpace := int(asciiSpace[r]) 355 n += wasSpace & ^isSpace 356 wasSpace = isSpace 357 } 358 359 if count < 0 || n < count { 360 return n, setBits 361 } 362 363 return count, setBits 364 } 365 366 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} // nolint gochecknoglobals 367 368 // FieldsFuncN splits the string s at each run of Unicode code points c satisfying f(c) 369 // and returns an array of slices of s. If all code points in s satisfy f(c) or the 370 // string is empty, an empty slice is returned. 371 // FieldsFuncN makes no guarantees about the order in which it calls f(c). 372 // If f does not return consistent results for a given c, FieldsFuncN may crash. 373 func FieldsFuncN(s string, n int, f func(rune) bool) []string { 374 // A span is used to record a slice of s of the form s[start:end]. 375 // The start index is inclusive and the end index is exclusive. 376 type span struct { 377 start int 378 end int 379 } 380 381 spans := make([]span, 0, 32) 382 383 // Find the field start and end indices. 384 wasField := false 385 fromIndex := 0 386 ending := false 387 388 for i, r := range s { 389 isSep := f(r) 390 391 if isSep { 392 if wasField { 393 spans = append(spans, span{start: fromIndex, end: i}) 394 wasField = false 395 396 if n > 0 && len(spans) == n-1 { 397 ending = true 398 } 399 } 400 401 continue 402 } 403 404 if ending { 405 wasField = true 406 fromIndex = i 407 408 break 409 } 410 411 if !wasField { 412 wasField = true 413 fromIndex = i 414 415 if n == 1 { // nolint gomnd 416 break 417 } 418 } 419 } 420 421 // Last field might end at EOF. 422 if wasField { 423 spans = append(spans, span{fromIndex, len(s)}) 424 } 425 426 // Create strings from recorded field indices. 427 a := make([]string, len(spans)) 428 for i, span := range spans { 429 a[i] = s[span.start:span.end] 430 } 431 432 return fixLastFieldFunc(a, f) 433 } 434 435 func fixLastFieldFunc(a []string, f func(rune) bool) []string { 436 if len(a) == 0 { 437 return nil 438 } 439 440 lastIndex := len(a) - 1 // nolint gomnd 441 last := a[lastIndex] 442 stopPos := 0 443 444 for i, r := range last { 445 isSep := f(r) 446 if isSep { 447 if stopPos == 0 { 448 stopPos = i 449 } 450 } else { 451 stopPos = 0 452 } 453 } 454 455 if stopPos > 0 { 456 a[lastIndex] = last[0:stopPos] 457 } 458 459 return a 460 }