github.com/richardwilkes/toolbox@v1.121.0/txt/natural_sort.go (about)

     1  // Copyright (c) 2016-2024 by Richard A. Wilkes. All rights reserved.
     2  //
     3  // This Source Code Form is subject to the terms of the Mozilla Public
     4  // License, version 2.0. If a copy of the MPL was not distributed with
     5  // this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     6  //
     7  // This Source Code Form is "Incompatible With Secondary Licenses", as
     8  // defined by the Mozilla Public License, version 2.0.
     9  
    10  package txt
    11  
    12  import (
    13  	"slices"
    14  )
    15  
    16  // NaturalLess compares two strings using natural ordering. This means that "a2" < "a12".
    17  //
    18  // Non-digit sequences and numbers are compared separately. The former are compared byte-wise, while the latter are
    19  // compared numerically (except that the number of leading zeros is used as a tie-breaker, so "2" < "02").
    20  //
    21  // Limitations:
    22  //   - only ASCII digits (0-9) are considered.
    23  //
    24  // Original algorithm: https://github.com/fvbommel/util/blob/master/sortorder/natsort.go
    25  func NaturalLess(s1, s2 string, caseInsensitive bool) bool {
    26  	return NaturalCmp(s1, s2, caseInsensitive) < 0
    27  }
    28  
    29  // NaturalCmp compares two strings using natural ordering. This means that "a2" < "a12".
    30  //
    31  // Non-digit sequences and numbers are compared separately. The former are compared byte-wise, while the latter are
    32  // compared numerically (except that the number of leading zeros is used as a tie-breaker, so "2" < "02").
    33  //
    34  // Limitations:
    35  //   - only ASCII digits (0-9) are considered.
    36  //
    37  // Original algorithm: https://github.com/fvbommel/util/blob/master/sortorder/natsort.go
    38  func NaturalCmp(s1, s2 string, caseInsensitive bool) int {
    39  	i1 := 0
    40  	i2 := 0
    41  	for i1 < len(s1) && i2 < len(s2) {
    42  		c1 := s1[i1]
    43  		c2 := s2[i2]
    44  		d1 := c1 >= '0' && c1 <= '9'
    45  		d2 := c2 >= '0' && c2 <= '9'
    46  		switch {
    47  		case d1 != d2: // Digits before other characters.
    48  			if d1 { // True if LHS is a digit, false if the RHS is one.
    49  				return -1
    50  			}
    51  			return 1
    52  		case !d1: // && !d2, because d1 == d2
    53  			// UTF-8 compares byte-wise-lexicographically, no need to decode code-points.
    54  			if caseInsensitive {
    55  				if c1 >= 'a' && c1 <= 'z' {
    56  					c1 -= 'a' - 'A'
    57  				}
    58  				if c2 >= 'a' && c2 <= 'z' {
    59  					c2 -= 'a' - 'A'
    60  				}
    61  			}
    62  			if c1 != c2 {
    63  				if c1 < c2 {
    64  					return -1
    65  				}
    66  				return 1
    67  			}
    68  			i1++
    69  			i2++
    70  		default: // Digits
    71  			// Eat zeros.
    72  			for i1 < len(s1) && s1[i1] == '0' {
    73  				i1++
    74  			}
    75  			for i1 < len(s1) && s1[i1] == '0' {
    76  				i1++
    77  			}
    78  			for i2 < len(s2) && s2[i2] == '0' {
    79  				i2++
    80  			}
    81  			// Eat all digits.
    82  			nz1, nz2 := i1, i2
    83  			for i1 < len(s1) && s1[i1] >= '0' && s1[i1] <= '9' {
    84  				i1++
    85  			}
    86  			for i2 < len(s2) && s2[i2] >= '0' && s2[i2] <= '9' {
    87  				i2++
    88  			}
    89  			// If lengths of numbers with non-zero prefix differ, the shorter one is less.
    90  			if len1, len2 := i1-nz1, i2-nz2; len1 != len2 {
    91  				if len1 < len2 {
    92  					return -1
    93  				}
    94  				return 1
    95  			}
    96  			// If they're not equal, string comparison is correct.
    97  			if nr1, nr2 := s1[nz1:i1], s2[nz2:i2]; nr1 != nr2 {
    98  				if nr1 < nr2 {
    99  					return -1
   100  				}
   101  				return 1
   102  			}
   103  			// Otherwise, the one with less zeros is less. Because everything up to the number is equal, comparing the
   104  			// index after the zeros is sufficient.
   105  			if nz1 != nz2 {
   106  				if nz1 < nz2 {
   107  					return -1
   108  				}
   109  				return 1
   110  			}
   111  		}
   112  		// They're identical so far, so continue comparing.
   113  	}
   114  	// So far they are identical. At least one is ended. If the other continues, it sorts last. If the are the same
   115  	// length and the caseInsensitive flag was set, compare again, but without the flag.
   116  	switch {
   117  	case len(s1) == len(s2):
   118  		if caseInsensitive {
   119  			return NaturalCmp(s1, s2, false)
   120  		}
   121  		return 0
   122  	case len(s1) < len(s2):
   123  		return -1
   124  	default:
   125  		return 1
   126  	}
   127  }
   128  
   129  // SortStringsNaturalAscending sorts a slice of strings using NaturalLess in least to most order.
   130  func SortStringsNaturalAscending(in []string) {
   131  	slices.SortFunc(in, func(a, b string) int { return NaturalCmp(a, b, true) })
   132  }
   133  
   134  // SortStringsNaturalDescending sorts a slice of strings using NaturalLess in most to least order.
   135  func SortStringsNaturalDescending(in []string) {
   136  	slices.SortFunc(in, func(a, b string) int { return NaturalCmp(b, a, true) })
   137  }