github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/stringspb/strings.go

github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/stringspb/strings.go (about)

     1  // Package stringspb has advanced string formatting and struct-dumping.
     2  package stringspb
     3  
     4  import (
     5  	"bytes"
     6  	"encoding/json"
     7  	"fmt"
     8  	"regexp"
     9  	"strings"
    10  	"unicode/utf8"
    11  )
    12  
    13  var nonAscii = regexp.MustCompile(`[^a-zA-Z0-9\.\_]+`)
    14  var mutatedVowels = strings.NewReplacer("ä", "ae", "ö", "oe", "ü", "ue", "Ä", "ae", "Ö", "oe", "Ü", "ue")
    15  
    16  // normalize spaces
    17  var replNewLines = strings.NewReplacer("\r\n", " ", "\r", " ", "\n", " ")
    18  var replTabs = strings.NewReplacer("\t", " ")
    19  var doubleSpaces = regexp.MustCompile("([ ]+)")
    20  
    21  // All kinds of newlines, tabs and double spaces
    22  // are reduced to single space.
    23  // It paves the way for later beautification.
    24  func NormalizeInnerWhitespace(s string) string {
    25  	s = replNewLines.Replace(s)
    26  	s = replTabs.Replace(s)
    27  	s = doubleSpaces.ReplaceAllString(s, " ")
    28  	return s
    29  }
    30  
    31  func StringNormalize(s string) string {
    32  	return LowerCasedUnderscored(s)
    33  }
    34  
    35  // LowerCasedUnderscored gives us a condensed filename
    36  // cleansed of all non Ascii characters
    37  // where word boundaries are encoded by "_"
    38  //
    39  // whenever we want a transformation of user input
    40  // into innoccuous lower case - sortable - searchable
    41  // ascii - the we should look to this func
    42  
    43  // in addition - extensions are respected and cleansed
    44  func LowerCasedUnderscored(s string) string {
    45  
    46  	//log.Printf("%v\n", s)
    47  
    48  	s = mutatedVowels.Replace(s)
    49  
    50  	s = strings.TrimSpace(s)
    51  	s = strings.Trim(s, `"' `)
    52  
    53  	replaced := nonAscii.ReplaceAllString(s, "_")
    54  
    55  	replaced = strings.Trim(replaced, `_`)
    56  	replaced = strings.ToLower(replaced)
    57  
    58  	// clean the  file extension
    59  	replaced = strings.Replace(replaced, "_.", ".", -1)
    60  	replaced = strings.Replace(replaced, "._", ".", -1)
    61  
    62  	//log.Printf("%v\n", replaced)
    63  
    64  	return replaced
    65  }
    66  
    67  func Reverse(s string) string {
    68  	rn := []rune(s)
    69  	rev := make([]rune, len(rn))
    70  	for idx, cp := range rn {
    71  		pos := len(rn) - idx - 1
    72  		rev[pos] = cp
    73  	}
    74  
    75  	return string(rev)
    76  }
    77  
    78  // ToLen chops or extends string to the exactly desired length
    79  // format strings like %4v do not restrict.
    80  func ToLenR(s string, nx int) string {
    81  	s = Reverse(s)
    82  	s = ToLen(s, nx)
    83  	s = Reverse(s)
    84  	return s
    85  }
    86  
    87  func ToLen(s string, nx int) string {
    88  
    89  	ret := make([]rune, 0, nx)
    90  	cntr := 0
    91  
    92  	for idx, cp := range s {
    93  		ret = append(ret, cp)
    94  		cntr++
    95  		if idx > nx-2 {
    96  			break
    97  		}
    98  	}
    99  
   100  	for cntr < nx {
   101  		ret = append(ret, ' ')
   102  		cntr++
   103  	}
   104  
   105  	return string(ret)
   106  
   107  }
   108  
   109  //  followed by ... and n trailing characters
   110  func Ellipsoider(s string, nx int) string {
   111  
   112  	if len(s) == 0 {
   113  		return ""
   114  		// return "[empty]"
   115  	}
   116  
   117  	if len(s) <= 2*nx {
   118  		return s
   119  	}
   120  
   121  	// len(s) > 2*nx
   122  	const ellip = "..."
   123  	return fmt.Sprintf("%v%v%v", ToLen(s, nx-1), ellip, s[len(s)-nx+1:])
   124  
   125  }
   126  
   127  // SplitByWhitespace splits by *any* combination of \t \n or space
   128  func SplitByWhitespace(s1 string) (s2 []string) {
   129  
   130  	return strings.Fields(s1) // 2015-06: RTFM
   131  
   132  	s1 = strings.TrimSpace(s1)
   133  	s2 = regexp.MustCompile(`[\s]+`).Split(s1, -1) // 2015-06: nice but needless
   134  	return
   135  }
   136  
   137  // Breaker breaks a string into n equal sized substrings
   138  func Breaker(s string, nx int) []string {
   139  
   140  	if len(s) == 0 {
   141  		return make([]string, 0)
   142  	}
   143  
   144  	rows := len(s) / nx
   145  	if (len(s) % nx) != 0 {
   146  		rows++
   147  	}
   148  	var ret []string = make([]string, rows)
   149  	for i := 0; i < rows; i++ {
   150  		if i < rows-1 {
   151  			ret[i] = s[i*nx : (i+1)*nx]
   152  
   153  		} else {
   154  			ret[i] = s[i*nx:]
   155  
   156  		}
   157  	}
   158  	return ret
   159  
   160  }
   161  
   162  /*
   163  	IncrementString takes the last Character or Symbol
   164  	and "increments" it.
   165  
   166  	This is for all datastore indexes where we want to
   167  	filter by
   168  		field >= someString
   169  		field <  nextBiggerString
   170  
   171  
   172  	Note: We assume that s is already converted to lower case,
   173  
   174  	If we wanted maintain case sensitive filtering,
   175  	then we would do something like
   176  		uTFCodePointUpperCase :=  uTFCodePointLastChar - 'A' + 'a'
   177  
   178  	And then we would construct four filters
   179  		.Filter("title >=", "cowgirls")
   180  		.Filter("title < ", "cowgirlt")
   181  		.Filter("title >=", "Cowgirls")
   182  		.Filter("title < ", "Cowgirlt")
   183  
   184  */
   185  func IncrementString(s string) string {
   186  
   187  	if s == "" {
   188  		panic("Increment String is undefined for an empty string")
   189  	}
   190  
   191  	uTFCodePointLastChar, itsSize := utf8.DecodeLastRuneInString(s)
   192  	if uTFCodePointLastChar == utf8.RuneError {
   193  		panic(fmt.Sprint("Following string is invalid utf8: %q", s))
   194  	}
   195  	sReduced := s[:len(s)-itsSize]
   196  
   197  	uTFCodePointLastChar++
   198  	oneHigherChar := fmt.Sprintf("%c", uTFCodePointLastChar)
   199  
   200  	return sReduced + oneHigherChar
   201  
   202  }
   203  
   204  // IndentedDump is the long awaited spew alternative, that is *safe*.
   205  // It takes any structure and converts it to a hierarchical string.
   206  // It has no external dependencies.
   207  //
   208  // Big disadvantage: no unexported fields.
   209  // For unexported fields fall back to
   210  //		fmt.Println(spew.Sdump(nd))
   211  //
   212  // http://play.golang.org/p/AQASTC4mBl suggests,
   213  // that strings are copied upon call and upon return
   214  //
   215  // Brad Fitz at google groups reccommends return a value
   216  // https://groups.google.com/forum/#!topic/golang-nuts/AdO_d4E_x6k
   217  func IndentedDump(v interface{}) string {
   218  
   219  	// firstColLeftMostPrefix := " "
   220  	// byts, err := json.MarshalIndent(v, firstColLeftMostPrefix, "\t")
   221  	// if err != nil {
   222  	// 	s := fmt.Sprintf("error indent: %v\n", err)
   223  	// 	return s
   224  	// }
   225  
   226  	// var reverseJSONTagEscaping = strings.NewReplacer(`\u003c`, "<", `\u003e`, ">", `\n`, "\n")
   227  	// s := reverseJSONTagEscaping.Replace(string(byts))
   228  
   229  	bts := IndentedDumpBytes(v)
   230  	return string(bts)
   231  }
   232  
   233  func IndentedDumpBytes(v interface{}) []byte {
   234  
   235  	firstColLeftMostPrefix := " "
   236  	byts, err := json.MarshalIndent(v, firstColLeftMostPrefix, "\t")
   237  	if err != nil {
   238  		s := fmt.Sprintf("error indent: %v\n", err)
   239  		return []byte(s)
   240  	}
   241  
   242  	byts = bytes.Replace(byts, []byte(`\u003c`), []byte("<"), -1)
   243  	byts = bytes.Replace(byts, []byte(`\u003e`), []byte(">"), -1)
   244  	byts = bytes.Replace(byts, []byte(`\n`), []byte("\n"), -1)
   245  
   246  	return byts
   247  }
   248  
   249  func SliceDumpI(sl [][]int) {
   250  	for i := 0; i < len(sl); i++ {
   251  		fmt.Printf("%2v: ", i)
   252  		for j := 0; j < len(sl[i]); j++ {
   253  			fmt.Printf("%2v %2v; ", j, sl[i][j])
   254  		}
   255  		fmt.Printf("\n")
   256  	}
   257  }
   258  
   259  func init() {
   260  	// log.Println(LowerCasedUnderscored(`" geh du alter Äsel äh? - "" `))
   261  	// log.Println(LowerCasedUnderscored(` 'Theo - wir fahrn nach Łódź .PnG'`))
   262  }