github.com/ezoic/ws@v1.0.4-0.20220713205711-5c1d69e074c5/wsutil/utf8_test.go (about)

     1  package wsutil
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/hex"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"testing"
    10  	"unicode/utf8"
    11  )
    12  
    13  func TestUTF8ReaderReadFull(t *testing.T) {
    14  	for _, test := range []struct {
    15  		hex   string
    16  		err   bool
    17  		valid bool
    18  		n     int
    19  	}{
    20  		{
    21  			hex:   "cebae1bdb9cf83cebcceb5eda080656469746564",
    22  			err:   true,
    23  			valid: false,
    24  			n:     11,
    25  		},
    26  		{
    27  			hex:   "cebae1bdb9cf83cebcceb5eda080656469746564",
    28  			valid: false,
    29  			err:   true,
    30  			n:     11,
    31  		},
    32  		{
    33  			hex:   "7f7f7fdf",
    34  			valid: false,
    35  			err:   false,
    36  			n:     4,
    37  		},
    38  		{
    39  			hex:   "dfbf",
    40  			n:     2,
    41  			valid: true,
    42  			err:   false,
    43  		},
    44  	} {
    45  		t.Run("", func(t *testing.T) {
    46  			bts, err := hex.DecodeString(test.hex)
    47  			if err != nil {
    48  				t.Fatal(err)
    49  			}
    50  
    51  			src := bytes.NewReader(bts)
    52  			r := NewUTF8Reader(src)
    53  
    54  			p := make([]byte, src.Len())
    55  			n, err := io.ReadFull(r, p)
    56  
    57  			if err != nil && !utf8.Valid(bts[:n]) {
    58  				// Should return only number of valid bytes read.
    59  				t.Errorf("read n bytes is actually invalid utf8 sequence")
    60  			}
    61  			if n := r.Accepted(); err == nil && !utf8.Valid(bts[:n]) {
    62  				// Should return only number of valid bytes read.
    63  				t.Errorf("read n bytes is actually invalid utf8 sequence")
    64  			}
    65  			if test.err && err == nil {
    66  				t.Errorf("expected read error; got nil")
    67  			}
    68  			if !test.err && err != nil {
    69  				t.Errorf("unexpected read error: %s", err)
    70  			}
    71  			if n != test.n {
    72  				t.Errorf("ReadFull() read %d; want %d", n, test.n)
    73  			}
    74  			if act, exp := r.Valid(), test.valid; act != exp {
    75  				t.Errorf("Valid() = %v; want %v", act, exp)
    76  			}
    77  		})
    78  	}
    79  }
    80  
    81  func TestUTF8Reader(t *testing.T) {
    82  	for i, test := range []struct {
    83  		label string
    84  
    85  		data []byte
    86  		// or
    87  		hex string
    88  
    89  		chop int
    90  
    91  		err   bool
    92  		valid bool
    93  		at    int
    94  	}{
    95  		{
    96  			data:  []byte("hello, world!"),
    97  			valid: true,
    98  			chop:  2,
    99  		},
   100  		{
   101  			data:  []byte{0x7f, 0xf0, 0x00},
   102  			valid: false,
   103  			err:   true,
   104  			at:    2,
   105  			chop:  1,
   106  		},
   107  		{
   108  			hex:   "48656c6c6f2dc2b540c39fc3b6c3a4c3bcc3a0c3a12d5554462d382121",
   109  			valid: true,
   110  			chop:  1,
   111  		},
   112  		{
   113  			hex:   "cebae1bdb9cf83cebcceb5eda080656469746564",
   114  			valid: false,
   115  			err:   true,
   116  			at:    12,
   117  			chop:  1,
   118  		},
   119  	} {
   120  		t.Run(fmt.Sprintf("%s#%d", test.label, i), func(t *testing.T) {
   121  			data := test.data
   122  			if h := test.hex; h != "" {
   123  				var err error
   124  				if data, err = hex.DecodeString(h); err != nil {
   125  					t.Fatal(err)
   126  				}
   127  			}
   128  
   129  			cr := &chopReader{
   130  				src: bytes.NewReader(data),
   131  				sz:  test.chop,
   132  			}
   133  
   134  			r := NewUTF8Reader(cr)
   135  
   136  			bts := make([]byte, 2*len(data))
   137  
   138  			var (
   139  				i, n int
   140  				err  error
   141  			)
   142  			for {
   143  				n, err = r.Read(bts[i:])
   144  				i += n
   145  				if err != nil {
   146  					if err == io.EOF {
   147  						err = nil
   148  					}
   149  					bts = bts[:i]
   150  					break
   151  				}
   152  			}
   153  			if test.err && err == nil {
   154  				t.Errorf("want error; got nil")
   155  				return
   156  			}
   157  			if !test.err && err != nil {
   158  				t.Errorf("unexpected error: %s", err)
   159  				return
   160  			}
   161  			if test.err && err == ErrInvalidUTF8 && i != test.at {
   162  				t.Errorf("received error at %d; want at %d", i, test.at)
   163  				return
   164  			}
   165  			if act, exp := r.Valid(), test.valid; act != exp {
   166  				t.Errorf("Valid() = %v; want %v", act, exp)
   167  				return
   168  			}
   169  			if !test.err && !bytes.Equal(bts, data) {
   170  				t.Errorf("bytes are not equal")
   171  			}
   172  		})
   173  	}
   174  }
   175  
   176  func BenchmarkUTF8Reader(b *testing.B) {
   177  	for i, bench := range []struct {
   178  		label string
   179  		data  []byte
   180  		chop  int
   181  		err   bool
   182  	}{
   183  		{
   184  			data: bytes.Repeat([]byte("x"), 1024),
   185  			chop: 128,
   186  		},
   187  		{
   188  			data: append(
   189  				bytes.Repeat([]byte("x"), 1024),
   190  				append(
   191  					[]byte{0x7f, 0xf0},
   192  					bytes.Repeat([]byte("x"), 128)...,
   193  				)...,
   194  			),
   195  			err:  true,
   196  			chop: 7,
   197  		},
   198  	} {
   199  		b.Run(fmt.Sprintf("%s#%d", bench.label, i), func(b *testing.B) {
   200  			for i := 0; i < b.N; i++ {
   201  				cr := &chopReader{
   202  					src: bytes.NewReader(bench.data),
   203  					sz:  bench.chop,
   204  				}
   205  				r := NewUTF8Reader(cr)
   206  				_, err := ioutil.ReadAll(r)
   207  				if !bench.err && err != nil {
   208  					b.Fatal(err)
   209  				}
   210  			}
   211  		})
   212  	}
   213  }