github.com/rudderlabs/rudder-go-kit@v0.30.0/sanitize/sanitize_test.go (about) 1 package sanitize 2 3 import ( 4 "fmt" 5 "testing" 6 "unicode" 7 8 "github.com/stretchr/testify/require" 9 ) 10 11 var out string 12 13 func BenchmarkMessageID(b *testing.B) { 14 dirtyMessageID := "\u0000 Test foo_bar-baz \u034F 123-222 " 15 properMessageID := "123e4567-e89b-12d3-a456-426614174000" 16 17 b.Run("in-place for loop - dirty", func(b *testing.B) { 18 for i := 0; i < b.N; i++ { 19 out = sanitizeMessageIDForLoop(dirtyMessageID) 20 } 21 }) 22 23 b.Run("in-place for loop - proper", func(b *testing.B) { 24 for i := 0; i < b.N; i++ { 25 out = sanitizeMessageIDForLoop(properMessageID) 26 } 27 }) 28 29 b.Run("strings map - dirty", func(b *testing.B) { 30 for i := 0; i < b.N; i++ { 31 out = Unicode(dirtyMessageID) 32 } 33 }) 34 35 b.Run("strings map - proper", func(b *testing.B) { 36 for i := 0; i < b.N; i++ { 37 out = Unicode(properMessageID) 38 } 39 }) 40 } 41 42 // incorrect implementation of sanitizeMessageID, but used for benchmarking 43 func sanitizeMessageIDForLoop(messageID string) string { 44 for i, r := range messageID { 45 if unicode.IsPrint(r) { 46 continue 47 } 48 if !unicode.Is(invisibleRangeTable, r) { 49 continue 50 } 51 52 messageID = messageID[:i] + messageID[i+1:] 53 } 54 return messageID 55 } 56 57 func TestSanitizeMessageID(t *testing.T) { 58 testcases := []struct { 59 in string 60 out string 61 }{ 62 {"\u0000 Test \u0000foo_bar-baz 123-222 \u0000", " Test foo_bar-baz 123-222 "}, 63 {"\u0000", ""}, 64 {"\u0000 ", " "}, 65 {"\u0000 \u0000", " "}, 66 {"\u00A0\t\n\r\u034F", ""}, 67 {"τυχαίο;", "τυχαίο;"}, 68 } 69 70 for _, tc := range testcases { 71 cleanMessageID := Unicode(tc.in) 72 require.Equal(t, tc.out, cleanMessageID, fmt.Sprintf("%#v -> %#v", tc.in, tc.out)) 73 } 74 75 for _, r := range invisibleRunes { 76 cleanMessageID := Unicode(string(r)) 77 require.Empty(t, cleanMessageID, fmt.Sprintf("%U", r)) 78 } 79 }