kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/riegeli/riegeli_test.go (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package riegeli
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"io"
    24  	"testing"
    25  
    26  	"kythe.io/kythe/go/util/compare"
    27  	"kythe.io/kythe/go/util/log"
    28  
    29  	"google.golang.org/protobuf/proto"
    30  
    31  	rtpb "kythe.io/kythe/go/util/riegeli/riegeli_test_go_proto"
    32  	rmpb "kythe.io/third_party/riegeli/records_metadata_go_proto"
    33  )
    34  
    35  func TestParseOptions(t *testing.T) {
    36  	tests := []string{
    37  		"",
    38  		"default",
    39  		"brotli",
    40  		"brotli:5",
    41  		"transpose",
    42  		"uncompressed",
    43  		"zstd",
    44  		"zstd:5",
    45  		"snappy",
    46  		"brotli,transpose",
    47  		"transpose,uncompressed",
    48  		"brotli:5,transpose",
    49  		"chunk_size:524288",
    50  	}
    51  
    52  	for _, test := range tests {
    53  		opts, err := ParseOptions(test)
    54  		if err != nil {
    55  			t.Errorf("ParseOptions error: %v", err)
    56  			continue
    57  		}
    58  
    59  		if found := opts.String(); found != test {
    60  			t.Errorf("Expected: %q; found: %q", test, found)
    61  		}
    62  	}
    63  }
    64  
    65  func TestWriteEmpty(t *testing.T) {
    66  	var buf bytes.Buffer
    67  	if err := NewWriter(&buf, nil).Close(); err != nil {
    68  		t.Fatal(err)
    69  	}
    70  
    71  	// The standard Riegeli file header
    72  	expected := []byte{
    73  		0x83, 0xaf, 0x70, 0xd1, 0x0d, 0x88, 0x4a, 0x3f,
    74  		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    75  		0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    76  		0x91, 0xba, 0xc2, 0x3c, 0x92, 0x87, 0xe1, 0xa9,
    77  		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    78  		0xe1, 0x9f, 0x13, 0xc0, 0xe9, 0xb1, 0xc3, 0x72,
    79  		0x73, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    80  		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    81  	}
    82  	if found := buf.Bytes(); !bytes.Equal(found, expected) {
    83  		t.Errorf("Found: %s; expected: %s", hex.EncodeToString(found), hex.EncodeToString(expected))
    84  	}
    85  }
    86  
    87  var testedOptions = []string{
    88  	"default",
    89  	"uncompressed",
    90  	"brotli",
    91  	"zstd",
    92  	"snappy",
    93  
    94  	"transpose",
    95  	"uncompressed,transpose",
    96  	"brotli,transpose",
    97  }
    98  
    99  func TestReadWriteNonProto(t *testing.T) {
   100  	for _, test := range testedOptions {
   101  		opts, err := ParseOptions(test)
   102  		if err != nil {
   103  			t.Fatal(err)
   104  		}
   105  		t.Run(test, func(t *testing.T) {
   106  			t.Parallel()
   107  			testReadWriteStrings(t, opts)
   108  		})
   109  	}
   110  }
   111  
   112  func TestReadWriteProto(t *testing.T) {
   113  	for _, test := range testedOptions {
   114  		opts, err := ParseOptions(test)
   115  		if err != nil {
   116  			t.Fatal(err)
   117  		}
   118  		t.Run(test, func(t *testing.T) {
   119  			t.Parallel()
   120  			testReadWriteProtos(t, opts)
   121  		})
   122  	}
   123  }
   124  
   125  func writeStrings(t *testing.T, opts *WriterOptions, n int) *bytes.Buffer {
   126  	var buf bytes.Buffer
   127  	wr := NewWriter(&buf, opts)
   128  
   129  	for i := 0; i < n; i++ {
   130  		if err := wr.Put([]byte(fmt.Sprintf("%d", i))); err != nil {
   131  			t.Fatalf("Error Put(%d): %v", i, err)
   132  		}
   133  	}
   134  	if err := wr.Close(); err != nil {
   135  		t.Fatalf("Close error: %v", err)
   136  	}
   137  	return &buf
   138  }
   139  
   140  func testReadWriteStrings(t *testing.T, opts *WriterOptions) {
   141  	const N = 1e5
   142  	buf := writeStrings(t, opts, N)
   143  	rd := NewReader(bytes.NewReader(buf.Bytes()))
   144  	for i := 0; i < N; i++ {
   145  		rec, err := rd.Next()
   146  		if err != nil {
   147  			t.Fatalf("Read error: %v", err)
   148  		} else if string(rec) != fmt.Sprintf("%d", i) {
   149  			t.Errorf("Found: %s; expected: %d;", hex.EncodeToString(rec), i)
   150  		}
   151  	}
   152  	rec, err := rd.Next()
   153  	if err != io.EOF {
   154  		t.Errorf("Unexpected final Read: %q %v", hex.EncodeToString(rec), err)
   155  	}
   156  }
   157  
   158  func writeProtos(t *testing.T, opts *WriterOptions, n int) *bytes.Buffer {
   159  	t.Helper()
   160  	var buf bytes.Buffer
   161  	wr := NewWriter(&buf, opts)
   162  	for i := 0; i < n; i++ {
   163  		if err := wr.PutProto(numToProto(i)); err != nil {
   164  			t.Fatalf("Error PutProto(%d): %v", i, err)
   165  		}
   166  	}
   167  	if err := wr.Close(); err != nil {
   168  		t.Fatalf("Close error: %v", err)
   169  	}
   170  	return &buf
   171  }
   172  
   173  // numToProto constructs a *rtpb.Complex using a given integer as its field
   174  // values and as counter for repeated field sizes.
   175  func numToProto(i int) *rtpb.Complex {
   176  	msg := &rtpb.Complex{
   177  		Str:  proto.String(fmt.Sprintf("s%d", i)),
   178  		I32:  proto.Int32(int32(i)),
   179  		I64:  proto.Int64(int64(i)),
   180  		Bits: []byte(fmt.Sprintf("b%d", i)),
   181  		SimpleNested: &rtpb.Simple{
   182  			Name: proto.String(fmt.Sprintf("name%d", i)),
   183  		},
   184  	}
   185  	for j := 0; j < i%8; j++ {
   186  		msg.Rep = append(msg.Rep, fmt.Sprintf("rep%d_%d", i, j))
   187  		msg.Group = append(msg.Group, &rtpb.Complex_Group{
   188  			GrpStr: proto.String(fmt.Sprintf("gs%d_%d", i, j)),
   189  		})
   190  	}
   191  	for j, complexNested := 0, msg; j < i%100; j++ {
   192  		nextLevel := &rtpb.Complex{Str: proto.String(fmt.Sprintf("cn%d_%d", i, j))}
   193  		complexNested.ComplexNested, complexNested = nextLevel, nextLevel
   194  	}
   195  	return msg
   196  }
   197  
   198  func testReadWriteProtos(t *testing.T, opts *WriterOptions) {
   199  	const N = 1e3
   200  	buf := writeProtos(t, opts, N)
   201  	log.Infof("Compressed size of %q: %d bytes", t.Name(), buf.Len())
   202  	rd := NewReader(bytes.NewReader(buf.Bytes()))
   203  	for i := 0; i < N; i++ {
   204  		expected := numToProto(i)
   205  		var found rtpb.Complex
   206  		if err := rd.NextProto(&found); err != nil {
   207  			t.Fatalf("Read error: %v", err)
   208  		} else if diff := compare.ProtoDiff(&found, expected); diff != "" {
   209  			t.Errorf("Unexpected record:  (-: found; +: expected)\n%s", diff)
   210  		}
   211  	}
   212  }
   213  
   214  func TestEmptyRecord(t *testing.T) {
   215  	var buf bytes.Buffer
   216  	wr := NewWriter(&buf, nil)
   217  
   218  	if err := wr.Put([]byte{}); err != nil {
   219  		t.Fatalf("Error writing empty record: %v", err)
   220  	} else if err := wr.Close(); err != nil {
   221  		t.Fatalf("Close error: %v", err)
   222  	}
   223  
   224  	rd := NewReader(bytes.NewReader(buf.Bytes()))
   225  	if rec, err := rd.Next(); err != nil {
   226  		t.Fatalf("Error reading empty record: %v", err)
   227  	} else if len(rec) != 0 {
   228  		t.Fatalf("Found non-empty record: %v", rec)
   229  	}
   230  
   231  	if rec, err := rd.Next(); err != io.EOF {
   232  		t.Fatalf("Unexpected Next record/error: %v %v", rec, err)
   233  	}
   234  }
   235  
   236  func TestWriterSeek(t *testing.T) {
   237  	const N = 1e3
   238  
   239  	buf := bytes.NewBuffer(nil)
   240  	wr := NewWriter(buf, nil)
   241  
   242  	positions := make([]RecordPosition, N)
   243  	for i := 0; i < N; i++ {
   244  		positions[i] = wr.Position()
   245  		if err := wr.PutProto(numToProto(i)); err != nil {
   246  			t.Fatalf("Error PutProto(%d): %v", i, err)
   247  		}
   248  	}
   249  	if err := wr.Close(); err != nil {
   250  		t.Fatalf("Error Close: %v", err)
   251  	}
   252  
   253  	rd := NewReadSeeker(bytes.NewReader(buf.Bytes()))
   254  	for i, p := range positions {
   255  		if err := rd.SeekToRecord(p); err != nil {
   256  			t.Fatalf("Error seeking to record %d at %v: %v", i, p, err)
   257  		}
   258  
   259  		expected := numToProto(i)
   260  		var found rtpb.Complex
   261  		if err := rd.NextProto(&found); err != nil {
   262  			t.Fatalf("Read error: %v", err)
   263  		} else if diff := compare.ProtoDiff(&found, expected); diff != "" {
   264  			t.Errorf("Unexpected record:  (-: found; +: expected)\n%s", diff)
   265  		}
   266  	}
   267  }
   268  
   269  func TestReaderSeekRecords(t *testing.T) {
   270  	const N = 1e4
   271  	buf := writeStrings(t, &WriterOptions{}, N)
   272  
   273  	rd := NewReadSeeker(bytes.NewReader(buf.Bytes()))
   274  	lastIndex := int64(-1)
   275  	var positions []RecordPosition
   276  	for i := 0; i < N; i++ {
   277  		pos, err := rd.Position()
   278  		if err != nil {
   279  			t.Fatalf("Error getting position: %v", err)
   280  		} else if _, err := rd.Next(); err != nil {
   281  			t.Fatalf("Error reading sequentially: %v", err)
   282  		}
   283  		positions = append(positions, pos)
   284  		idx := pos.index()
   285  		if lastIndex >= idx {
   286  			t.Errorf("Position not monotonically increasing: %d >= %d", lastIndex, idx)
   287  		}
   288  		lastIndex = idx
   289  	}
   290  	if rec, err := rd.Next(); err != io.EOF {
   291  		t.Fatalf("Unexpected Next record/error: %v %v", rec, err)
   292  	}
   293  
   294  	// Read all records by seeking to each position in reverse order
   295  	for i := int(N - 1); i >= 0; i-- {
   296  		p := positions[i]
   297  		if err := rd.SeekToRecord(p); err != nil {
   298  			t.Fatalf("Error seeking to record %d at %v: %v", i, p, err)
   299  		}
   300  		rec, err := rd.Next()
   301  		if err != nil {
   302  			t.Fatalf("Read error at %v: %v", p, err)
   303  		} else if string(rec) != fmt.Sprintf("%d", i) {
   304  			t.Errorf("At %v found: %s; expected: %d;", p, hex.EncodeToString(rec), i)
   305  		}
   306  	}
   307  }
   308  
   309  func TestReaderSeekKnownPositions(t *testing.T) {
   310  	const N = 1e4
   311  	buf := writeStrings(t, &WriterOptions{}, N)
   312  
   313  	rd := NewReadSeeker(bytes.NewReader(buf.Bytes()))
   314  	lastIndex := int64(-1)
   315  	var positions []RecordPosition
   316  	for i := 0; i < N; i++ {
   317  		pos, err := rd.Position()
   318  		if err != nil {
   319  			t.Fatalf("Error getting position: %v", err)
   320  		} else if _, err := rd.Next(); err != nil {
   321  			t.Fatalf("Error reading sequentially: %v", err)
   322  		}
   323  		positions = append(positions, pos)
   324  		idx := pos.index()
   325  		if lastIndex >= idx {
   326  			t.Errorf("Position not monotonically increasing: %d >= %d", lastIndex, idx)
   327  		}
   328  		lastIndex = idx
   329  	}
   330  	if rec, err := rd.Next(); err != io.EOF {
   331  		t.Fatalf("Unexpected Next record/error: %v %v", rec, err)
   332  	}
   333  
   334  	// Read all records by seeking to each position in reverse order
   335  	for i := int(N - 1); i >= 0; i-- {
   336  		p := positions[i]
   337  		if err := rd.Seek(p.index()); err != nil {
   338  			t.Fatalf("Error seeking to record %d at %v (%d): %v", i, p, p.index(), err)
   339  		}
   340  		rec, err := rd.Next()
   341  		if err != nil {
   342  			t.Fatalf("Read error at %v: %v", p, err)
   343  		} else if string(rec) != fmt.Sprintf("%d", i) {
   344  			t.Errorf("At %v found: %s; expected: %d;", p, hex.EncodeToString(rec), i)
   345  		}
   346  	}
   347  }
   348  
   349  func TestReaderSeekAllPositions(t *testing.T) {
   350  	const N = 1e4
   351  	buf := writeStrings(t, &WriterOptions{}, N).Bytes()
   352  	rd := NewReadSeeker(bytes.NewReader(buf))
   353  
   354  	// Ensure every byte position is seekable
   355  	var expected int
   356  	for i := 0; i < len(buf); i++ {
   357  		if err := rd.Seek(int64(i)); err != nil {
   358  			t.Fatalf("Error seeking to %d/%d for %d: %v", i, len(buf), expected, err)
   359  		}
   360  		pos, err := rd.Position()
   361  		if err != nil {
   362  			t.Fatalf("Position error: %v", err)
   363  		}
   364  		rec, err := rd.Next()
   365  		if expected == N-1 {
   366  			if err != io.EOF {
   367  				t.Fatalf("Read past end of file at %d (%v): %v %v", i, pos, rec, err)
   368  			}
   369  		} else if err != nil {
   370  			t.Fatalf("Read error at %d/%d: %v; expected: %d", i, len(buf), err, expected)
   371  		}
   372  
   373  		if expected != N-1 && string(rec) != fmt.Sprintf("%d", expected) {
   374  			expected++
   375  			if string(rec) != fmt.Sprintf("%d", expected) {
   376  				t.Fatalf("At %d/%d found: %s; expected: %d;", i, len(buf), string(rec), expected)
   377  			}
   378  		}
   379  	}
   380  
   381  	if expected != N-1 {
   382  		t.Fatalf("Failed to read all known records: %d != %d", expected, int(N)-1)
   383  	}
   384  }
   385  
   386  func TestRecordsMetadata(t *testing.T) {
   387  	opts := &WriterOptions{
   388  		Transpose:   true,
   389  		Compression: BrotliCompression(4),
   390  	}
   391  	expected := &rmpb.RecordsMetadata{}
   392  	expected.RecordWriterOptions = proto.String(opts.String())
   393  
   394  	buf := writeStrings(t, opts, 128)
   395  	rd := NewReader(bytes.NewReader(buf.Bytes()))
   396  
   397  	found, err := rd.RecordsMetadata()
   398  	if err != nil {
   399  		log.Fatal(err)
   400  	} else if diff := compare.ProtoDiff(found, expected); diff != "" {
   401  		t.Errorf("Unexpected RecordsMetadata:  (-: found; +: expected)\n%s", diff)
   402  	}
   403  }
   404  
   405  // TODO(schroederc): test transposed chunks
   406  // TODO(schroederc): test padding