github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/bufio/scan_test.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bufio_test
     6  
     7  import (
     8  	. "bufio"
     9  	"bytes"
    10  	"errors"
    11  	"io"
    12  	"strings"
    13  	"testing"
    14  	"unicode"
    15  	"unicode/utf8"
    16  )
    17  
    18  // Test white space table matches the Unicode definition.
    19  func TestSpace(t *testing.T) {
    20  	for r := rune(0); r <= utf8.MaxRune; r++ {
    21  		if IsSpace(r) != unicode.IsSpace(r) {
    22  			t.Fatalf("white space property disagrees: %#U should be %t", r, unicode.IsSpace(r))
    23  		}
    24  	}
    25  }
    26  
    27  var scanTests = []string{
    28  	"",
    29  	"a",
    30  	"¼",
    31  	"☹",
    32  	"\x81",   // UTF-8 error
    33  	"\uFFFD", // correctly encoded RuneError
    34  	"abcdefgh",
    35  	"abc def\n\t\tgh    ",
    36  	"abc¼☹\x81\uFFFD日本語\x82abc",
    37  }
    38  
    39  func TestScanByte(t *testing.T) {
    40  	for n, test := range scanTests {
    41  		buf := bytes.NewBufferString(test)
    42  		s := NewScanner(buf)
    43  		s.Split(ScanBytes)
    44  		var i int
    45  		for i = 0; s.Scan(); i++ {
    46  			if b := s.Bytes(); len(b) != 1 || b[0] != test[i] {
    47  				t.Errorf("#%d: %d: expected %q got %q", n, i, test, b)
    48  			}
    49  		}
    50  		if i != len(test) {
    51  			t.Errorf("#%d: termination expected at %d; got %d", n, len(test), i)
    52  		}
    53  		err := s.Err()
    54  		if err != nil {
    55  			t.Errorf("#%d: %v", n, err)
    56  		}
    57  	}
    58  }
    59  
    60  // Test that the rune splitter returns same sequence of runes (not bytes) as for range string.
    61  func TestScanRune(t *testing.T) {
    62  	for n, test := range scanTests {
    63  		buf := bytes.NewBufferString(test)
    64  		s := NewScanner(buf)
    65  		s.Split(ScanRunes)
    66  		var i, runeCount int
    67  		var expect rune
    68  		// Use a string range loop to validate the sequence of runes.
    69  		for i, expect = range string(test) {
    70  			if !s.Scan() {
    71  				break
    72  			}
    73  			runeCount++
    74  			got, _ := utf8.DecodeRune(s.Bytes())
    75  			if got != expect {
    76  				t.Errorf("#%d: %d: expected %q got %q", n, i, expect, got)
    77  			}
    78  		}
    79  		if s.Scan() {
    80  			t.Errorf("#%d: scan ran too long, got %q", n, s.Text())
    81  		}
    82  		testRuneCount := utf8.RuneCountInString(test)
    83  		if runeCount != testRuneCount {
    84  			t.Errorf("#%d: termination expected at %d; got %d", n, testRuneCount, runeCount)
    85  		}
    86  		err := s.Err()
    87  		if err != nil {
    88  			t.Errorf("#%d: %v", n, err)
    89  		}
    90  	}
    91  }
    92  
    93  var wordScanTests = []string{
    94  	"",
    95  	" ",
    96  	"\n",
    97  	"a",
    98  	" a ",
    99  	"abc def",
   100  	" abc def ",
   101  	" abc\tdef\nghi\rjkl\fmno\vpqr\u0085stu\u00a0\n",
   102  }
   103  
   104  // Test that the word splitter returns the same data as strings.Fields.
   105  func TestScanWords(t *testing.T) {
   106  	for n, test := range wordScanTests {
   107  		buf := bytes.NewBufferString(test)
   108  		s := NewScanner(buf)
   109  		s.Split(ScanWords)
   110  		words := strings.Fields(test)
   111  		var wordCount int
   112  		for wordCount = 0; wordCount < len(words); wordCount++ {
   113  			if !s.Scan() {
   114  				break
   115  			}
   116  			got := s.Text()
   117  			if got != words[wordCount] {
   118  				t.Errorf("#%d: %d: expected %q got %q", n, wordCount, words[wordCount], got)
   119  			}
   120  		}
   121  		if s.Scan() {
   122  			t.Errorf("#%d: scan ran too long, got %q", n, s.Text())
   123  		}
   124  		if wordCount != len(words) {
   125  			t.Errorf("#%d: termination expected at %d; got %d", n, len(words), wordCount)
   126  		}
   127  		err := s.Err()
   128  		if err != nil {
   129  			t.Errorf("#%d: %v", n, err)
   130  		}
   131  	}
   132  }
   133  
   134  // slowReader is a reader that returns only a few bytes at a time, to test the incremental
   135  // reads in Scanner.Scan.
   136  type slowReader struct {
   137  	max int
   138  	buf *bytes.Buffer
   139  }
   140  
   141  func (sr *slowReader) Read(p []byte) (n int, err error) {
   142  	if len(p) > sr.max {
   143  		p = p[0:sr.max]
   144  	}
   145  	return sr.buf.Read(p)
   146  }
   147  
   148  // genLine writes to buf a predictable but non-trivial line of text of length
   149  // n, including the terminal newline and an occasional carriage return.
   150  // If addNewline is false, the \r and \n are not emitted.
   151  func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
   152  	buf.Reset()
   153  	doCR := lineNum%5 == 0
   154  	if doCR {
   155  		n--
   156  	}
   157  	for i := 0; i < n-1; i++ { // Stop early for \n.
   158  		c := 'a' + byte(lineNum+i)
   159  		if c == '\n' || c == '\r' { // Don't confuse us.
   160  			c = 'N'
   161  		}
   162  		buf.WriteByte(c)
   163  	}
   164  	if addNewline {
   165  		if doCR {
   166  			buf.WriteByte('\r')
   167  		}
   168  		buf.WriteByte('\n')
   169  	}
   170  	return
   171  }
   172  
   173  // Test the line splitter, including some carriage returns but no long lines.
   174  func TestScanLongLines(t *testing.T) {
   175  	const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
   176  	// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
   177  	tmp := new(bytes.Buffer)
   178  	buf := new(bytes.Buffer)
   179  	lineNum := 0
   180  	j := 0
   181  	for i := 0; i < 2*smallMaxTokenSize; i++ {
   182  		genLine(tmp, lineNum, j, true)
   183  		if j < smallMaxTokenSize {
   184  			j++
   185  		} else {
   186  			j--
   187  		}
   188  		buf.Write(tmp.Bytes())
   189  		lineNum++
   190  	}
   191  	s := NewScanner(&slowReader{1, buf})
   192  	s.Split(ScanLines)
   193  	s.MaxTokenSize(smallMaxTokenSize)
   194  	j = 0
   195  	for lineNum := 0; s.Scan(); lineNum++ {
   196  		genLine(tmp, lineNum, j, false)
   197  		if j < smallMaxTokenSize {
   198  			j++
   199  		} else {
   200  			j--
   201  		}
   202  		line := tmp.String() // We use the string-valued token here, for variety.
   203  		if s.Text() != line {
   204  			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Text(), line)
   205  		}
   206  	}
   207  	err := s.Err()
   208  	if err != nil {
   209  		t.Fatal(err)
   210  	}
   211  }
   212  
   213  // Test that the line splitter errors out on a long line.
   214  func TestScanLineTooLong(t *testing.T) {
   215  	const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
   216  	// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
   217  	tmp := new(bytes.Buffer)
   218  	buf := new(bytes.Buffer)
   219  	lineNum := 0
   220  	j := 0
   221  	for i := 0; i < 2*smallMaxTokenSize; i++ {
   222  		genLine(tmp, lineNum, j, true)
   223  		j++
   224  		buf.Write(tmp.Bytes())
   225  		lineNum++
   226  	}
   227  	s := NewScanner(&slowReader{3, buf})
   228  	s.Split(ScanLines)
   229  	s.MaxTokenSize(smallMaxTokenSize)
   230  	j = 0
   231  	for lineNum := 0; s.Scan(); lineNum++ {
   232  		genLine(tmp, lineNum, j, false)
   233  		if j < smallMaxTokenSize {
   234  			j++
   235  		} else {
   236  			j--
   237  		}
   238  		line := tmp.Bytes()
   239  		if !bytes.Equal(s.Bytes(), line) {
   240  			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
   241  		}
   242  	}
   243  	err := s.Err()
   244  	if err != ErrTooLong {
   245  		t.Fatalf("expected ErrTooLong; got %s", err)
   246  	}
   247  }
   248  
   249  // Test that the line splitter handles a final line without a newline.
   250  func testNoNewline(text string, lines []string, t *testing.T) {
   251  	buf := bytes.NewBufferString(text)
   252  	s := NewScanner(&slowReader{7, buf})
   253  	s.Split(ScanLines)
   254  	for lineNum := 0; s.Scan(); lineNum++ {
   255  		line := lines[lineNum]
   256  		if s.Text() != line {
   257  			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
   258  		}
   259  	}
   260  	err := s.Err()
   261  	if err != nil {
   262  		t.Fatal(err)
   263  	}
   264  }
   265  
   266  var noNewlineLines = []string{
   267  	"abcdefghijklmn\nopqrstuvwxyz",
   268  }
   269  
   270  // Test that the line splitter handles a final line without a newline.
   271  func TestScanLineNoNewline(t *testing.T) {
   272  	const text = "abcdefghijklmn\nopqrstuvwxyz"
   273  	lines := []string{
   274  		"abcdefghijklmn",
   275  		"opqrstuvwxyz",
   276  	}
   277  	testNoNewline(text, lines, t)
   278  }
   279  
   280  // Test that the line splitter handles a final line with a carriage return but nonewline.
   281  func TestScanLineReturnButNoNewline(t *testing.T) {
   282  	const text = "abcdefghijklmn\nopqrstuvwxyz\r"
   283  	lines := []string{
   284  		"abcdefghijklmn",
   285  		"opqrstuvwxyz",
   286  	}
   287  	testNoNewline(text, lines, t)
   288  }
   289  
   290  // Test that the line splitter handles a final empty line.
   291  func TestScanLineEmptyFinalLine(t *testing.T) {
   292  	const text = "abcdefghijklmn\nopqrstuvwxyz\n\n"
   293  	lines := []string{
   294  		"abcdefghijklmn",
   295  		"opqrstuvwxyz",
   296  		"",
   297  	}
   298  	testNoNewline(text, lines, t)
   299  }
   300  
   301  // Test that the line splitter handles a final empty line with a carriage return but no newline.
   302  func TestScanLineEmptyFinalLineWithCR(t *testing.T) {
   303  	const text = "abcdefghijklmn\nopqrstuvwxyz\n\r"
   304  	lines := []string{
   305  		"abcdefghijklmn",
   306  		"opqrstuvwxyz",
   307  		"",
   308  	}
   309  	testNoNewline(text, lines, t)
   310  }
   311  
   312  var testError = errors.New("testError")
   313  
   314  // Test the correct error is returned when the split function errors out.
   315  func TestSplitError(t *testing.T) {
   316  	// Create a split function that delivers a little data, then a predictable error.
   317  	numSplits := 0
   318  	const okCount = 7
   319  	errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
   320  		if atEOF {
   321  			panic("didn't get enough data")
   322  		}
   323  		if numSplits >= okCount {
   324  			return 0, nil, testError
   325  		}
   326  		numSplits++
   327  		return 1, data[0:1], nil
   328  	}
   329  	// Read the data.
   330  	const text = "abcdefghijklmnopqrstuvwxyz"
   331  	buf := bytes.NewBufferString(text)
   332  	s := NewScanner(&slowReader{1, buf})
   333  	s.Split(errorSplit)
   334  	var i int
   335  	for i = 0; s.Scan(); i++ {
   336  		if len(s.Bytes()) != 1 || text[i] != s.Bytes()[0] {
   337  			t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0])
   338  		}
   339  	}
   340  	// Check correct termination location and error.
   341  	if i != okCount {
   342  		t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i)
   343  	}
   344  	err := s.Err()
   345  	if err != testError {
   346  		t.Fatalf("expected %q got %v", testError, err)
   347  	}
   348  }
   349  
   350  // Test that an EOF is overridden by a user-generated scan error.
   351  func TestErrAtEOF(t *testing.T) {
   352  	s := NewScanner(strings.NewReader("1 2 33"))
   353  	// This spitter will fail on last entry, after s.err==EOF.
   354  	split := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
   355  		advance, token, err = ScanWords(data, atEOF)
   356  		if len(token) > 1 {
   357  			if s.ErrOrEOF() != io.EOF {
   358  				t.Fatal("not testing EOF")
   359  			}
   360  			err = testError
   361  		}
   362  		return
   363  	}
   364  	s.Split(split)
   365  	for s.Scan() {
   366  	}
   367  	if s.Err() != testError {
   368  		t.Fatal("wrong error:", s.Err())
   369  	}
   370  }
   371  
   372  // Test for issue 5268.
   373  type alwaysError struct{}
   374  
   375  func (alwaysError) Read(p []byte) (int, error) {
   376  	return 0, io.ErrUnexpectedEOF
   377  }
   378  
   379  func TestNonEOFWithEmptyRead(t *testing.T) {
   380  	scanner := NewScanner(alwaysError{})
   381  	for scanner.Scan() {
   382  		t.Fatal("read should fail")
   383  	}
   384  	err := scanner.Err()
   385  	if err != io.ErrUnexpectedEOF {
   386  		t.Errorf("unexpected error: %v", err)
   387  	}
   388  }
   389  
   390  // Test that Scan finishes if we have endless empty reads.
   391  type endlessZeros struct{}
   392  
   393  func (endlessZeros) Read(p []byte) (int, error) {
   394  	return 0, nil
   395  }
   396  
   397  func TestBadReader(t *testing.T) {
   398  	scanner := NewScanner(endlessZeros{})
   399  	for scanner.Scan() {
   400  		t.Fatal("read should fail")
   401  	}
   402  	err := scanner.Err()
   403  	if err != io.ErrNoProgress {
   404  		t.Errorf("unexpected error: %v", err)
   405  	}
   406  }