github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/stringspb/strings.go (about) 1 // Package stringspb has advanced string formatting and struct-dumping. 2 package stringspb 3 4 import ( 5 "bytes" 6 "encoding/json" 7 "fmt" 8 "regexp" 9 "strings" 10 "unicode/utf8" 11 ) 12 13 var nonAscii = regexp.MustCompile(`[^a-zA-Z0-9\.\_]+`) 14 var mutatedVowels = strings.NewReplacer("ä", "ae", "ö", "oe", "ü", "ue", "Ä", "ae", "Ö", "oe", "Ü", "ue") 15 16 // normalize spaces 17 var replNewLines = strings.NewReplacer("\r\n", " ", "\r", " ", "\n", " ") 18 var replTabs = strings.NewReplacer("\t", " ") 19 var doubleSpaces = regexp.MustCompile("([ ]+)") 20 21 // All kinds of newlines, tabs and double spaces 22 // are reduced to single space. 23 // It paves the way for later beautification. 24 func NormalizeInnerWhitespace(s string) string { 25 s = replNewLines.Replace(s) 26 s = replTabs.Replace(s) 27 s = doubleSpaces.ReplaceAllString(s, " ") 28 return s 29 } 30 31 func StringNormalize(s string) string { 32 return LowerCasedUnderscored(s) 33 } 34 35 // LowerCasedUnderscored gives us a condensed filename 36 // cleansed of all non Ascii characters 37 // where word boundaries are encoded by "_" 38 // 39 // whenever we want a transformation of user input 40 // into innoccuous lower case - sortable - searchable 41 // ascii - the we should look to this func 42 43 // in addition - extensions are respected and cleansed 44 func LowerCasedUnderscored(s string) string { 45 46 //log.Printf("%v\n", s) 47 48 s = mutatedVowels.Replace(s) 49 50 s = strings.TrimSpace(s) 51 s = strings.Trim(s, `"' `) 52 53 replaced := nonAscii.ReplaceAllString(s, "_") 54 55 replaced = strings.Trim(replaced, `_`) 56 replaced = strings.ToLower(replaced) 57 58 // clean the file extension 59 replaced = strings.Replace(replaced, "_.", ".", -1) 60 replaced = strings.Replace(replaced, "._", ".", -1) 61 62 //log.Printf("%v\n", replaced) 63 64 return replaced 65 } 66 67 func Reverse(s string) string { 68 rn := []rune(s) 69 rev := make([]rune, len(rn)) 70 for idx, cp := range rn { 71 pos := len(rn) - idx - 1 72 rev[pos] = cp 73 } 74 75 return string(rev) 76 } 77 78 // ToLen chops or extends string to the exactly desired length 79 // format strings like %4v do not restrict. 80 func ToLenR(s string, nx int) string { 81 s = Reverse(s) 82 s = ToLen(s, nx) 83 s = Reverse(s) 84 return s 85 } 86 87 func ToLen(s string, nx int) string { 88 89 ret := make([]rune, 0, nx) 90 cntr := 0 91 92 for idx, cp := range s { 93 ret = append(ret, cp) 94 cntr++ 95 if idx > nx-2 { 96 break 97 } 98 } 99 100 for cntr < nx { 101 ret = append(ret, ' ') 102 cntr++ 103 } 104 105 return string(ret) 106 107 } 108 109 // followed by ... and n trailing characters 110 func Ellipsoider(s string, nx int) string { 111 112 if len(s) == 0 { 113 return "" 114 // return "[empty]" 115 } 116 117 if len(s) <= 2*nx { 118 return s 119 } 120 121 // len(s) > 2*nx 122 const ellip = "..." 123 return fmt.Sprintf("%v%v%v", ToLen(s, nx-1), ellip, s[len(s)-nx+1:]) 124 125 } 126 127 // SplitByWhitespace splits by *any* combination of \t \n or space 128 func SplitByWhitespace(s1 string) (s2 []string) { 129 130 return strings.Fields(s1) // 2015-06: RTFM 131 132 s1 = strings.TrimSpace(s1) 133 s2 = regexp.MustCompile(`[\s]+`).Split(s1, -1) // 2015-06: nice but needless 134 return 135 } 136 137 // Breaker breaks a string into n equal sized substrings 138 func Breaker(s string, nx int) []string { 139 140 if len(s) == 0 { 141 return make([]string, 0) 142 } 143 144 rows := len(s) / nx 145 if (len(s) % nx) != 0 { 146 rows++ 147 } 148 var ret []string = make([]string, rows) 149 for i := 0; i < rows; i++ { 150 if i < rows-1 { 151 ret[i] = s[i*nx : (i+1)*nx] 152 153 } else { 154 ret[i] = s[i*nx:] 155 156 } 157 } 158 return ret 159 160 } 161 162 /* 163 IncrementString takes the last Character or Symbol 164 and "increments" it. 165 166 This is for all datastore indexes where we want to 167 filter by 168 field >= someString 169 field < nextBiggerString 170 171 172 Note: We assume that s is already converted to lower case, 173 174 If we wanted maintain case sensitive filtering, 175 then we would do something like 176 uTFCodePointUpperCase := uTFCodePointLastChar - 'A' + 'a' 177 178 And then we would construct four filters 179 .Filter("title >=", "cowgirls") 180 .Filter("title < ", "cowgirlt") 181 .Filter("title >=", "Cowgirls") 182 .Filter("title < ", "Cowgirlt") 183 184 */ 185 func IncrementString(s string) string { 186 187 if s == "" { 188 panic("Increment String is undefined for an empty string") 189 } 190 191 uTFCodePointLastChar, itsSize := utf8.DecodeLastRuneInString(s) 192 if uTFCodePointLastChar == utf8.RuneError { 193 panic(fmt.Sprint("Following string is invalid utf8: %q", s)) 194 } 195 sReduced := s[:len(s)-itsSize] 196 197 uTFCodePointLastChar++ 198 oneHigherChar := fmt.Sprintf("%c", uTFCodePointLastChar) 199 200 return sReduced + oneHigherChar 201 202 } 203 204 // IndentedDump is the long awaited spew alternative, that is *safe*. 205 // It takes any structure and converts it to a hierarchical string. 206 // It has no external dependencies. 207 // 208 // Big disadvantage: no unexported fields. 209 // For unexported fields fall back to 210 // fmt.Println(spew.Sdump(nd)) 211 // 212 // http://play.golang.org/p/AQASTC4mBl suggests, 213 // that strings are copied upon call and upon return 214 // 215 // Brad Fitz at google groups reccommends return a value 216 // https://groups.google.com/forum/#!topic/golang-nuts/AdO_d4E_x6k 217 func IndentedDump(v interface{}) string { 218 219 // firstColLeftMostPrefix := " " 220 // byts, err := json.MarshalIndent(v, firstColLeftMostPrefix, "\t") 221 // if err != nil { 222 // s := fmt.Sprintf("error indent: %v\n", err) 223 // return s 224 // } 225 226 // var reverseJSONTagEscaping = strings.NewReplacer(`\u003c`, "<", `\u003e`, ">", `\n`, "\n") 227 // s := reverseJSONTagEscaping.Replace(string(byts)) 228 229 bts := IndentedDumpBytes(v) 230 return string(bts) 231 } 232 233 func IndentedDumpBytes(v interface{}) []byte { 234 235 firstColLeftMostPrefix := " " 236 byts, err := json.MarshalIndent(v, firstColLeftMostPrefix, "\t") 237 if err != nil { 238 s := fmt.Sprintf("error indent: %v\n", err) 239 return []byte(s) 240 } 241 242 byts = bytes.Replace(byts, []byte(`\u003c`), []byte("<"), -1) 243 byts = bytes.Replace(byts, []byte(`\u003e`), []byte(">"), -1) 244 byts = bytes.Replace(byts, []byte(`\n`), []byte("\n"), -1) 245 246 return byts 247 } 248 249 func SliceDumpI(sl [][]int) { 250 for i := 0; i < len(sl); i++ { 251 fmt.Printf("%2v: ", i) 252 for j := 0; j < len(sl[i]); j++ { 253 fmt.Printf("%2v %2v; ", j, sl[i][j]) 254 } 255 fmt.Printf("\n") 256 } 257 } 258 259 func init() { 260 // log.Println(LowerCasedUnderscored(`" geh du alter Äsel äh? - "" `)) 261 // log.Println(LowerCasedUnderscored(` 'Theo - wir fahrn nach Łódź .PnG'`)) 262 }