github.com/richardwilkes/toolbox@v1.121.0/txt/natural_sort.go (about) 1 // Copyright (c) 2016-2024 by Richard A. Wilkes. All rights reserved. 2 // 3 // This Source Code Form is subject to the terms of the Mozilla Public 4 // License, version 2.0. If a copy of the MPL was not distributed with 5 // this file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 // 7 // This Source Code Form is "Incompatible With Secondary Licenses", as 8 // defined by the Mozilla Public License, version 2.0. 9 10 package txt 11 12 import ( 13 "slices" 14 ) 15 16 // NaturalLess compares two strings using natural ordering. This means that "a2" < "a12". 17 // 18 // Non-digit sequences and numbers are compared separately. The former are compared byte-wise, while the latter are 19 // compared numerically (except that the number of leading zeros is used as a tie-breaker, so "2" < "02"). 20 // 21 // Limitations: 22 // - only ASCII digits (0-9) are considered. 23 // 24 // Original algorithm: https://github.com/fvbommel/util/blob/master/sortorder/natsort.go 25 func NaturalLess(s1, s2 string, caseInsensitive bool) bool { 26 return NaturalCmp(s1, s2, caseInsensitive) < 0 27 } 28 29 // NaturalCmp compares two strings using natural ordering. This means that "a2" < "a12". 30 // 31 // Non-digit sequences and numbers are compared separately. The former are compared byte-wise, while the latter are 32 // compared numerically (except that the number of leading zeros is used as a tie-breaker, so "2" < "02"). 33 // 34 // Limitations: 35 // - only ASCII digits (0-9) are considered. 36 // 37 // Original algorithm: https://github.com/fvbommel/util/blob/master/sortorder/natsort.go 38 func NaturalCmp(s1, s2 string, caseInsensitive bool) int { 39 i1 := 0 40 i2 := 0 41 for i1 < len(s1) && i2 < len(s2) { 42 c1 := s1[i1] 43 c2 := s2[i2] 44 d1 := c1 >= '0' && c1 <= '9' 45 d2 := c2 >= '0' && c2 <= '9' 46 switch { 47 case d1 != d2: // Digits before other characters. 48 if d1 { // True if LHS is a digit, false if the RHS is one. 49 return -1 50 } 51 return 1 52 case !d1: // && !d2, because d1 == d2 53 // UTF-8 compares byte-wise-lexicographically, no need to decode code-points. 54 if caseInsensitive { 55 if c1 >= 'a' && c1 <= 'z' { 56 c1 -= 'a' - 'A' 57 } 58 if c2 >= 'a' && c2 <= 'z' { 59 c2 -= 'a' - 'A' 60 } 61 } 62 if c1 != c2 { 63 if c1 < c2 { 64 return -1 65 } 66 return 1 67 } 68 i1++ 69 i2++ 70 default: // Digits 71 // Eat zeros. 72 for i1 < len(s1) && s1[i1] == '0' { 73 i1++ 74 } 75 for i1 < len(s1) && s1[i1] == '0' { 76 i1++ 77 } 78 for i2 < len(s2) && s2[i2] == '0' { 79 i2++ 80 } 81 // Eat all digits. 82 nz1, nz2 := i1, i2 83 for i1 < len(s1) && s1[i1] >= '0' && s1[i1] <= '9' { 84 i1++ 85 } 86 for i2 < len(s2) && s2[i2] >= '0' && s2[i2] <= '9' { 87 i2++ 88 } 89 // If lengths of numbers with non-zero prefix differ, the shorter one is less. 90 if len1, len2 := i1-nz1, i2-nz2; len1 != len2 { 91 if len1 < len2 { 92 return -1 93 } 94 return 1 95 } 96 // If they're not equal, string comparison is correct. 97 if nr1, nr2 := s1[nz1:i1], s2[nz2:i2]; nr1 != nr2 { 98 if nr1 < nr2 { 99 return -1 100 } 101 return 1 102 } 103 // Otherwise, the one with less zeros is less. Because everything up to the number is equal, comparing the 104 // index after the zeros is sufficient. 105 if nz1 != nz2 { 106 if nz1 < nz2 { 107 return -1 108 } 109 return 1 110 } 111 } 112 // They're identical so far, so continue comparing. 113 } 114 // So far they are identical. At least one is ended. If the other continues, it sorts last. If the are the same 115 // length and the caseInsensitive flag was set, compare again, but without the flag. 116 switch { 117 case len(s1) == len(s2): 118 if caseInsensitive { 119 return NaturalCmp(s1, s2, false) 120 } 121 return 0 122 case len(s1) < len(s2): 123 return -1 124 default: 125 return 1 126 } 127 } 128 129 // SortStringsNaturalAscending sorts a slice of strings using NaturalLess in least to most order. 130 func SortStringsNaturalAscending(in []string) { 131 slices.SortFunc(in, func(a, b string) int { return NaturalCmp(a, b, true) }) 132 } 133 134 // SortStringsNaturalDescending sorts a slice of strings using NaturalLess in most to least order. 135 func SortStringsNaturalDescending(in []string) { 136 slices.SortFunc(in, func(a, b string) int { return NaturalCmp(b, a, true) }) 137 }