github.com/rudderlabs/rudder-go-kit@v0.30.0/sanitize/sanitize_test.go

github.com/rudderlabs/rudder-go-kit@v0.30.0/sanitize/sanitize_test.go (about)

     1  package sanitize
     2  
     3  import (
     4  	"fmt"
     5  	"testing"
     6  	"unicode"
     7  
     8  	"github.com/stretchr/testify/require"
     9  )
    10  
    11  var out string
    12  
    13  func BenchmarkMessageID(b *testing.B) {
    14  	dirtyMessageID := "\u0000 Test foo_bar-baz \u034F 123-222 "
    15  	properMessageID := "123e4567-e89b-12d3-a456-426614174000"
    16  
    17  	b.Run("in-place for loop - dirty", func(b *testing.B) {
    18  		for i := 0; i < b.N; i++ {
    19  			out = sanitizeMessageIDForLoop(dirtyMessageID)
    20  		}
    21  	})
    22  
    23  	b.Run("in-place for loop - proper", func(b *testing.B) {
    24  		for i := 0; i < b.N; i++ {
    25  			out = sanitizeMessageIDForLoop(properMessageID)
    26  		}
    27  	})
    28  
    29  	b.Run("strings map - dirty", func(b *testing.B) {
    30  		for i := 0; i < b.N; i++ {
    31  			out = Unicode(dirtyMessageID)
    32  		}
    33  	})
    34  
    35  	b.Run("strings map - proper", func(b *testing.B) {
    36  		for i := 0; i < b.N; i++ {
    37  			out = Unicode(properMessageID)
    38  		}
    39  	})
    40  }
    41  
    42  // incorrect implementation of sanitizeMessageID, but used for benchmarking
    43  func sanitizeMessageIDForLoop(messageID string) string {
    44  	for i, r := range messageID {
    45  		if unicode.IsPrint(r) {
    46  			continue
    47  		}
    48  		if !unicode.Is(invisibleRangeTable, r) {
    49  			continue
    50  		}
    51  
    52  		messageID = messageID[:i] + messageID[i+1:]
    53  	}
    54  	return messageID
    55  }
    56  
    57  func TestSanitizeMessageID(t *testing.T) {
    58  	testcases := []struct {
    59  		in  string
    60  		out string
    61  	}{
    62  		{"\u0000 Test \u0000foo_bar-baz 123-222 \u0000", " Test foo_bar-baz 123-222 "},
    63  		{"\u0000", ""},
    64  		{"\u0000 ", " "},
    65  		{"\u0000 \u0000", " "},
    66  		{"\u00A0\t\n\r\u034F", ""},
    67  		{"τυχαίο;", "τυχαίο;"},
    68  	}
    69  
    70  	for _, tc := range testcases {
    71  		cleanMessageID := Unicode(tc.in)
    72  		require.Equal(t, tc.out, cleanMessageID, fmt.Sprintf("%#v -> %#v", tc.in, tc.out))
    73  	}
    74  
    75  	for _, r := range invisibleRunes {
    76  		cleanMessageID := Unicode(string(r))
    77  		require.Empty(t, cleanMessageID, fmt.Sprintf("%U", r))
    78  	}
    79  }