github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/cdctest/testfeed.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package cdctest
    10  
    11  import (
    12  	"bufio"
    13  	"bytes"
    14  	"context"
    15  	gosql "database/sql"
    16  	gojson "encoding/json"
    17  	"fmt"
    18  	"io/ioutil"
    19  	"net/url"
    20  	"os"
    21  	"path/filepath"
    22  	"regexp"
    23  	"strconv"
    24  	"strings"
    25  	"time"
    26  
    27  	"github.com/cockroachdb/cockroach/pkg/jobs"
    28  	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/parser"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    31  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    32  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    33  	"github.com/cockroachdb/cockroach/pkg/util/json"
    34  	"github.com/cockroachdb/cockroach/pkg/util/log"
    35  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    36  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    37  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    38  	"github.com/cockroachdb/errors"
    39  	"github.com/jackc/pgx"
    40  )
    41  
    42  // TestFeedFactory is an interface to create changefeeds.
    43  type TestFeedFactory interface {
    44  	// Feed creates a new TestFeed.
    45  	Feed(create string, args ...interface{}) (TestFeed, error)
    46  	// Server returns the raw underlying TestServer, if applicable.
    47  	Server() serverutils.TestServerInterface
    48  }
    49  
    50  // TestFeedMessage represents one row update or resolved timestamp message from
    51  // a changefeed.
    52  type TestFeedMessage struct {
    53  	Topic, Partition string
    54  	Key, Value       []byte
    55  	Resolved         []byte
    56  }
    57  
    58  func (m TestFeedMessage) String() string {
    59  	if m.Resolved != nil {
    60  		return string(m.Resolved)
    61  	}
    62  	return fmt.Sprintf(`%s: %s->%s`, m.Topic, m.Key, m.Value)
    63  }
    64  
    65  // TestFeed abstracts over reading from the various types of changefeed sinks.
    66  type TestFeed interface {
    67  	// Partitions returns the domain of values that may be returned as a partition
    68  	// by Next.
    69  	Partitions() []string
    70  	// Next returns the next message. Within a given topic+partition, the order is
    71  	// preserved, but not otherwise. Either len(key) and len(value) will be
    72  	// greater than zero (a row updated) or len(payload) will be (a resolved
    73  	// timestamp).
    74  	Next() (*TestFeedMessage, error)
    75  	// Pause stops the feed from running. Next will continue to return any results
    76  	// that were queued before the pause, eventually blocking or erroring once
    77  	// they've all been drained.
    78  	Pause() error
    79  	// Resume restarts the feed from the last changefeed-wide resolved timestamp.
    80  	Resume() error
    81  	// Close shuts down the changefeed and releases resources.
    82  	Close() error
    83  }
    84  
    85  type sinklessFeedFactory struct {
    86  	s    serverutils.TestServerInterface
    87  	sink url.URL
    88  }
    89  
    90  // MakeSinklessFeedFactory returns a TestFeedFactory implementation using the
    91  // `experimental-sql` sink.
    92  func MakeSinklessFeedFactory(s serverutils.TestServerInterface, sink url.URL) TestFeedFactory {
    93  	return &sinklessFeedFactory{s: s, sink: sink}
    94  }
    95  
    96  // Feed implements the TestFeedFactory interface
    97  func (f *sinklessFeedFactory) Feed(create string, args ...interface{}) (TestFeed, error) {
    98  	sink := f.sink
    99  	sink.RawQuery = sink.Query().Encode()
   100  	sink.Path = `d`
   101  	// Use pgx directly instead of database/sql so we can close the conn
   102  	// (instead of returning it to the pool).
   103  	pgxConfig, err := pgx.ParseConnectionString(sink.String())
   104  	if err != nil {
   105  		return nil, err
   106  	}
   107  	s := &sinklessFeed{
   108  		create:  create,
   109  		args:    args,
   110  		connCfg: pgxConfig,
   111  		seen:    make(map[string]struct{}),
   112  	}
   113  	// Resuming a sinkless feed is the same as killing it and creating a brand new
   114  	// one with the 'cursor' option set to the last resolved timestamp returned,
   115  	// so reuse the same code for both.
   116  	return s, s.Resume()
   117  }
   118  
   119  // Server implements the TestFeedFactory interface.
   120  func (f *sinklessFeedFactory) Server() serverutils.TestServerInterface {
   121  	return f.s
   122  }
   123  
   124  // sinklessFeed is an implementation of the `TestFeed` interface for a
   125  // "sinkless" (results returned over pgwire) feed.
   126  type sinklessFeed struct {
   127  	create  string
   128  	args    []interface{}
   129  	connCfg pgx.ConnConfig
   130  
   131  	conn           *pgx.Conn
   132  	rows           *pgx.Rows
   133  	seen           map[string]struct{}
   134  	latestResolved hlc.Timestamp
   135  }
   136  
   137  // Partitions implements the TestFeed interface.
   138  func (c *sinklessFeed) Partitions() []string { return []string{`sinkless`} }
   139  
   140  // Next implements the TestFeed interface.
   141  func (c *sinklessFeed) Next() (*TestFeedMessage, error) {
   142  	m := &TestFeedMessage{Partition: `sinkless`}
   143  	for {
   144  		if !c.rows.Next() {
   145  			return nil, c.rows.Err()
   146  		}
   147  		var maybeTopic gosql.NullString
   148  		if err := c.rows.Scan(&maybeTopic, &m.Key, &m.Value); err != nil {
   149  			return nil, err
   150  		}
   151  		if len(maybeTopic.String) > 0 {
   152  			m.Topic = maybeTopic.String
   153  			// TODO(dan): This skips duplicates, since they're allowed by the
   154  			// semantics of our changefeeds. Now that we're switching to RangeFeed,
   155  			// this can actually happen (usually because of splits) and cause flakes.
   156  			// However, we really should be de-duping key+ts, this is too coarse.
   157  			// Fixme.
   158  			seenKey := m.Topic + m.Partition + string(m.Key) + string(m.Value)
   159  			if _, ok := c.seen[seenKey]; ok {
   160  				continue
   161  			}
   162  			c.seen[seenKey] = struct{}{}
   163  			return m, nil
   164  		}
   165  		m.Resolved = m.Value
   166  		m.Key, m.Value = nil, nil
   167  
   168  		// Keep track of the latest resolved timestamp so Resume can use it.
   169  		// TODO(dan): Also do this for non-json feeds.
   170  		if _, resolved, err := ParseJSONValueTimestamps(m.Resolved); err == nil {
   171  			c.latestResolved.Forward(resolved)
   172  		}
   173  
   174  		return m, nil
   175  	}
   176  }
   177  
   178  // Pause implements the TestFeed interface.
   179  func (c *sinklessFeed) Pause() error {
   180  	return c.Close()
   181  }
   182  
   183  // Resume implements the TestFeed interface.
   184  func (c *sinklessFeed) Resume() error {
   185  	var err error
   186  	c.conn, err = pgx.Connect(c.connCfg)
   187  	if err != nil {
   188  		return err
   189  	}
   190  
   191  	// The syntax for a sinkless changefeed is `EXPERIMENTAL CHANGEFEED FOR ...`
   192  	// but it's convenient to accept the `CREATE CHANGEFEED` syntax from the
   193  	// test, so we can keep the current abstraction of running each test over
   194  	// both types. This bit turns what we received into the real sinkless
   195  	// syntax.
   196  	create := strings.Replace(c.create, `CREATE CHANGEFEED`, `EXPERIMENTAL CHANGEFEED`, 1)
   197  	if !c.latestResolved.IsEmpty() {
   198  		// NB: The TODO in Next means c.latestResolved is currently never set for
   199  		// non-json feeds.
   200  		if strings.Contains(create, `WITH`) {
   201  			create += fmt.Sprintf(`, cursor='%s'`, c.latestResolved.AsOfSystemTime())
   202  		} else {
   203  			create += fmt.Sprintf(` WITH cursor='%s'`, c.latestResolved.AsOfSystemTime())
   204  		}
   205  	}
   206  	c.rows, err = c.conn.Query(create, c.args...)
   207  	return err
   208  }
   209  
   210  // Close implements the TestFeed interface.
   211  func (c *sinklessFeed) Close() error {
   212  	c.rows = nil
   213  	return c.conn.Close()
   214  }
   215  
   216  type jobFeed struct {
   217  	db      *gosql.DB
   218  	flushCh chan struct{}
   219  
   220  	JobID  int64
   221  	jobErr error
   222  }
   223  
   224  func (f *jobFeed) fetchJobError() error {
   225  	// To avoid busy waiting, we wait for the AfterFlushHook (which is called
   226  	// after results are flushed to a sink) in between polls. It is required
   227  	// that this is hooked up to `flushCh`, which is usually handled by the
   228  	// `enterpriseTest` helper.
   229  	//
   230  	// The trickiest bit is handling errors in the changefeed. The tests want to
   231  	// eventually notice them, but want to return all generated results before
   232  	// giving up and returning the error. This is accomplished by checking the
   233  	// job error immediately before every poll. If it's set, the error is
   234  	// stashed and one more poll's result set is paged through, before finally
   235  	// returning the error. If we're careful to run the last poll after getting
   236  	// the error, then it's guaranteed to contain everything flushed by the
   237  	// changefeed before it shut down.
   238  	if f.jobErr != nil {
   239  		return f.jobErr
   240  	}
   241  
   242  	// We're not guaranteed to get a flush notification if the feed exits,
   243  	// so bound how long we wait.
   244  	select {
   245  	case <-f.flushCh:
   246  	case <-time.After(30 * time.Millisecond):
   247  	}
   248  
   249  	// If the error was set, save it, but do one more poll as described
   250  	// above.
   251  	var errorStr gosql.NullString
   252  	if err := f.db.QueryRow(
   253  		`SELECT error FROM [SHOW JOBS] WHERE job_id=$1`, f.JobID,
   254  	).Scan(&errorStr); err != nil {
   255  		return err
   256  	}
   257  	if len(errorStr.String) > 0 {
   258  		f.jobErr = errors.Newf("%s", errorStr.String)
   259  	}
   260  	return nil
   261  }
   262  
   263  func (f *jobFeed) Pause() error {
   264  	_, err := f.db.Exec(`PAUSE JOB $1`, f.JobID)
   265  	if err != nil {
   266  		return err
   267  	}
   268  	// PAUSE JOB does not actually pause the job but only sends a request for
   269  	// it. Actually block until the job state changes.
   270  	opts := retry.Options{
   271  		InitialBackoff: 1 * time.Millisecond,
   272  		MaxBackoff:     time.Second,
   273  		Multiplier:     2,
   274  	}
   275  	ctx := context.Background()
   276  	return retry.WithMaxAttempts(ctx, opts, 10, func() error {
   277  		var status string
   278  		if err := f.db.QueryRowContext(ctx, `SELECT status FROM system.jobs WHERE id = $1`, f.JobID).Scan(&status); err != nil {
   279  			return err
   280  		}
   281  		if jobs.Status(status) != jobs.StatusPaused {
   282  			return errors.New("could not pause job")
   283  		}
   284  		return nil
   285  	})
   286  }
   287  
   288  func (f *jobFeed) Resume() error {
   289  	_, err := f.db.Exec(`RESUME JOB $1`, f.JobID)
   290  	f.jobErr = nil
   291  	return err
   292  }
   293  
   294  func (f *jobFeed) Details() (*jobspb.ChangefeedDetails, error) {
   295  	var payloadBytes []byte
   296  	if err := f.db.QueryRow(
   297  		`SELECT payload FROM system.jobs WHERE id=$1`, f.JobID,
   298  	).Scan(&payloadBytes); err != nil {
   299  		return nil, err
   300  	}
   301  	var payload jobspb.Payload
   302  	if err := protoutil.Unmarshal(payloadBytes, &payload); err != nil {
   303  		return nil, err
   304  	}
   305  	return payload.GetChangefeed(), nil
   306  }
   307  
   308  type tableFeedFactory struct {
   309  	s       serverutils.TestServerInterface
   310  	db      *gosql.DB
   311  	flushCh chan struct{}
   312  	sink    url.URL
   313  }
   314  
   315  // MakeTableFeedFactory returns a TestFeedFactory implementation using the
   316  // `experimental-sql` sink.
   317  func MakeTableFeedFactory(
   318  	s serverutils.TestServerInterface, db *gosql.DB, flushCh chan struct{}, sink url.URL,
   319  ) TestFeedFactory {
   320  	return &tableFeedFactory{s: s, db: db, flushCh: flushCh, sink: sink}
   321  }
   322  
   323  // Feed implements the TestFeedFactory interface
   324  func (f *tableFeedFactory) Feed(create string, args ...interface{}) (_ TestFeed, err error) {
   325  	sink := f.sink
   326  	sink.Path = fmt.Sprintf(`table_%d`, timeutil.Now().UnixNano())
   327  
   328  	db, err := gosql.Open("postgres", sink.String())
   329  	if err != nil {
   330  		return nil, err
   331  	}
   332  	defer func() {
   333  		if err != nil {
   334  			_ = db.Close()
   335  		}
   336  	}()
   337  
   338  	sink.Scheme = `experimental-sql`
   339  	c := &TableFeed{
   340  		jobFeed: jobFeed{
   341  			db:      db,
   342  			flushCh: f.flushCh,
   343  		},
   344  		sinkURI: sink.String(),
   345  		seen:    make(map[string]struct{}),
   346  	}
   347  	if _, err := c.db.Exec(`CREATE DATABASE ` + sink.Path); err != nil {
   348  		return nil, err
   349  	}
   350  
   351  	parsed, err := parser.ParseOne(create)
   352  	if err != nil {
   353  		return nil, err
   354  	}
   355  	createStmt := parsed.AST.(*tree.CreateChangefeed)
   356  	if createStmt.SinkURI != nil {
   357  		return nil, errors.Errorf(
   358  			`unexpected sink provided: "INTO %s"`, tree.AsString(createStmt.SinkURI))
   359  	}
   360  	createStmt.SinkURI = tree.NewStrVal(c.sinkURI)
   361  
   362  	if err := f.db.QueryRow(createStmt.String(), args...).Scan(&c.JobID); err != nil {
   363  		return nil, err
   364  	}
   365  	return c, nil
   366  }
   367  
   368  // Server implements the TestFeedFactory interface.
   369  func (f *tableFeedFactory) Server() serverutils.TestServerInterface {
   370  	return f.s
   371  }
   372  
   373  // TableFeed is a TestFeed implementation using the `experimental-sql` sink.
   374  type TableFeed struct {
   375  	jobFeed
   376  	sinkURI string
   377  
   378  	rows *gosql.Rows
   379  	seen map[string]struct{}
   380  }
   381  
   382  // ResetSeen is useful when manually pausing and resuming a TableFeed.
   383  // We want to be able to assert that rows are not re-emitted in some cases.
   384  func (c *TableFeed) ResetSeen() {
   385  	for k := range c.seen {
   386  		delete(c.seen, k)
   387  	}
   388  }
   389  
   390  // Partitions implements the TestFeed interface.
   391  func (c *TableFeed) Partitions() []string {
   392  	// The sqlSink hardcodes these.
   393  	return []string{`0`, `1`, `2`}
   394  }
   395  
   396  // Next implements the TestFeed interface.
   397  func (c *TableFeed) Next() (*TestFeedMessage, error) {
   398  	// sinkSink writes all changes to a table with primary key of topic,
   399  	// partition, message_id. To simulate the semantics of kafka, message_ids
   400  	// are only comparable within a given (topic, partition). Internally the
   401  	// message ids are generated as a 64 bit int with a timestamp in bits 1-49
   402  	// and a hash of the partition in 50-64. This TableFeed.Next function works
   403  	// by repeatedly fetching and deleting all rows in the table. Then it pages
   404  	// through the results until they are empty and repeats.
   405  	for {
   406  		if c.rows != nil && c.rows.Next() {
   407  			m := &TestFeedMessage{}
   408  			var msgID int64
   409  			if err := c.rows.Scan(
   410  				&m.Topic, &m.Partition, &msgID, &m.Key, &m.Value, &m.Resolved,
   411  			); err != nil {
   412  				return nil, err
   413  			}
   414  
   415  			// Scan turns NULL bytes columns into a 0-length, non-nil byte
   416  			// array, which is pretty unexpected. Nil them out before returning.
   417  			// Either key+value or payload will be set, but not both.
   418  			if len(m.Key) > 0 || len(m.Value) > 0 {
   419  				// TODO(dan): This skips duplicates, since they're allowed by the
   420  				// semantics of our changefeeds. Now that we're switching to RangeFeed,
   421  				// this can actually happen (usually because of splits) and cause
   422  				// flakes. However, we really should be de-duping key+ts, this is too
   423  				// coarse. Fixme.
   424  				seenKey := m.Topic + m.Partition + string(m.Key) + string(m.Value)
   425  				if _, ok := c.seen[seenKey]; ok {
   426  					continue
   427  				}
   428  				c.seen[seenKey] = struct{}{}
   429  
   430  				m.Resolved = nil
   431  				return m, nil
   432  			}
   433  			m.Key, m.Value = nil, nil
   434  			return m, nil
   435  		}
   436  		if c.rows != nil {
   437  			if err := c.rows.Close(); err != nil {
   438  				return nil, err
   439  			}
   440  			c.rows = nil
   441  		}
   442  
   443  		if err := c.fetchJobError(); err != nil {
   444  			return nil, c.jobErr
   445  		}
   446  
   447  		// TODO(dan): It's a bummer that this mutates the sqlsink table. I
   448  		// originally tried paging through message_id by repeatedly generating a
   449  		// new high-water with GenerateUniqueInt, but this was racy with rows
   450  		// being flushed out by the sink. An alternative is to steal the nanos
   451  		// part from `high_water_timestamp` in `crdb_internal.jobs` and run it
   452  		// through `builtins.GenerateUniqueID`, but that would mean we're only
   453  		// ever running tests on rows that have gotten a resolved timestamp,
   454  		// which seems limiting.
   455  		var err error
   456  		c.rows, err = c.db.Query(
   457  			`SELECT * FROM [DELETE FROM sqlsink RETURNING *] ORDER BY topic, partition, message_id`)
   458  		if err != nil {
   459  			return nil, err
   460  		}
   461  	}
   462  }
   463  
   464  // Close implements the TestFeed interface.
   465  func (c *TableFeed) Close() error {
   466  	if c.rows != nil {
   467  		if err := c.rows.Close(); err != nil {
   468  			return errors.Errorf(`could not close rows: %v`, err)
   469  		}
   470  	}
   471  	if _, err := c.db.Exec(`CANCEL JOB $1`, c.JobID); err != nil {
   472  		log.Infof(context.Background(), `could not cancel feed %d: %v`, c.JobID, err)
   473  	}
   474  	return c.db.Close()
   475  }
   476  
   477  var cloudFeedFileRE = regexp.MustCompile(`^\d{33}-(.+?)-(\d+)-(\d+)-([0-9a-fA-F]{8})-(.+?)-`)
   478  
   479  type cloudFeedFactory struct {
   480  	s       serverutils.TestServerInterface
   481  	db      *gosql.DB
   482  	dir     string
   483  	flushCh chan struct{}
   484  
   485  	feedIdx int
   486  }
   487  
   488  // MakeCloudFeedFactory returns a TestFeedFactory implementation using the cloud
   489  // storage sink.
   490  func MakeCloudFeedFactory(
   491  	s serverutils.TestServerInterface, db *gosql.DB, dir string, flushCh chan struct{},
   492  ) TestFeedFactory {
   493  	return &cloudFeedFactory{s: s, db: db, dir: dir, flushCh: flushCh}
   494  }
   495  
   496  // Feed implements the TestFeedFactory interface
   497  func (f *cloudFeedFactory) Feed(create string, args ...interface{}) (TestFeed, error) {
   498  	parsed, err := parser.ParseOne(create)
   499  	if err != nil {
   500  		return nil, err
   501  	}
   502  	createStmt := parsed.AST.(*tree.CreateChangefeed)
   503  	if createStmt.SinkURI != nil {
   504  		return nil, errors.Errorf(`unexpected sink provided: "INTO %s"`, tree.AsString(createStmt.SinkURI))
   505  	}
   506  	feedDir := strconv.Itoa(f.feedIdx)
   507  	f.feedIdx++
   508  	sinkURI := `experimental-nodelocal://0/` + feedDir
   509  	// TODO(dan): This is a pretty unsatisfying way to test that the sink passes
   510  	// through params it doesn't understand to ExternalStorage.
   511  	sinkURI += `?should_be=ignored`
   512  	createStmt.SinkURI = tree.NewStrVal(sinkURI)
   513  
   514  	// Nodelocal puts its dir under `ExternalIODir`, which is passed into
   515  	// cloudFeedFactory.
   516  	feedDir = filepath.Join(f.dir, feedDir)
   517  	if err := os.Mkdir(feedDir, 0755); err != nil {
   518  		return nil, err
   519  	}
   520  
   521  	c := &cloudFeed{
   522  		jobFeed: jobFeed{
   523  			db:      f.db,
   524  			flushCh: f.flushCh,
   525  		},
   526  		dir:  feedDir,
   527  		seen: make(map[string]struct{}),
   528  	}
   529  	if err := f.db.QueryRow(createStmt.String(), args...).Scan(&c.JobID); err != nil {
   530  		return nil, err
   531  	}
   532  	return c, nil
   533  }
   534  
   535  // Server implements the TestFeedFactory interface.
   536  func (f *cloudFeedFactory) Server() serverutils.TestServerInterface {
   537  	return f.s
   538  }
   539  
   540  type cloudFeedEntry struct {
   541  	topic          string
   542  	value, payload []byte
   543  }
   544  
   545  type cloudFeed struct {
   546  	jobFeed
   547  	dir string
   548  
   549  	resolved string
   550  	rows     []cloudFeedEntry
   551  
   552  	seen map[string]struct{}
   553  }
   554  
   555  const cloudFeedPartition = ``
   556  
   557  // Partitions implements the TestFeed interface.
   558  func (c *cloudFeed) Partitions() []string {
   559  	// TODO(dan): Try to plumb these through somehow?
   560  	return []string{cloudFeedPartition}
   561  }
   562  
   563  // ReformatJSON marshals a golang stdlib based JSON into a byte slice preserving
   564  // whitespace in accordance with the crdb json library.
   565  func ReformatJSON(j interface{}) ([]byte, error) {
   566  	printed, err := gojson.Marshal(j)
   567  	if err != nil {
   568  		return nil, err
   569  	}
   570  	// The golang stdlib json library prints whitespace differently than our
   571  	// internal one. Roundtrip through the crdb json library to get the
   572  	// whitespace back to where it started.
   573  	parsed, err := json.ParseJSON(string(printed))
   574  	if err != nil {
   575  		return nil, err
   576  	}
   577  	var buf bytes.Buffer
   578  	parsed.Format(&buf)
   579  	return buf.Bytes(), nil
   580  }
   581  
   582  // extractKeyFromJSONValue extracts the `WITH key_in_value` key from a `WITH
   583  // format=json, envelope=wrapped` value.
   584  func extractKeyFromJSONValue(wrapped []byte) (key []byte, value []byte, _ error) {
   585  	parsed := make(map[string]interface{})
   586  	if err := gojson.Unmarshal(wrapped, &parsed); err != nil {
   587  		return nil, nil, err
   588  	}
   589  	keyParsed := parsed[`key`]
   590  	delete(parsed, `key`)
   591  
   592  	var err error
   593  	if key, err = ReformatJSON(keyParsed); err != nil {
   594  		return nil, nil, err
   595  	}
   596  	if value, err = ReformatJSON(parsed); err != nil {
   597  		return nil, nil, err
   598  	}
   599  	return key, value, nil
   600  }
   601  
   602  // Next implements the TestFeed interface.
   603  func (c *cloudFeed) Next() (*TestFeedMessage, error) {
   604  	for {
   605  		if len(c.rows) > 0 {
   606  			e := c.rows[0]
   607  			c.rows = c.rows[1:]
   608  			m := &TestFeedMessage{
   609  				Topic:    e.topic,
   610  				Value:    e.value,
   611  				Resolved: e.payload,
   612  			}
   613  
   614  			// The other TestFeed impls check both key and value here, but cloudFeeds
   615  			// don't have keys.
   616  			if len(m.Value) > 0 {
   617  				// Cloud storage sinks default the `WITH key_in_value` option so that
   618  				// the key is recoverable. Extract it out of the value (also removing it
   619  				// so the output matches the other sinks). Note that this assumes the
   620  				// format is json, this will have to be fixed once we add format=avro
   621  				// support to cloud storage.
   622  				//
   623  				// TODO(dan): Leave the key in the value if the TestFeed user
   624  				// specifically requested it.
   625  				var err error
   626  				if m.Key, m.Value, err = extractKeyFromJSONValue(m.Value); err != nil {
   627  					return nil, err
   628  				}
   629  
   630  				seenKey := m.Topic + m.Partition + string(m.Key) + string(m.Value)
   631  				if _, ok := c.seen[seenKey]; ok {
   632  					continue
   633  				}
   634  				c.seen[seenKey] = struct{}{}
   635  				m.Resolved = nil
   636  				return m, nil
   637  			}
   638  			m.Key, m.Value = nil, nil
   639  			return m, nil
   640  		}
   641  
   642  		if err := c.fetchJobError(); err != nil {
   643  			return nil, err
   644  		}
   645  		if err := filepath.Walk(c.dir, c.walkDir); err != nil {
   646  			return nil, err
   647  		}
   648  	}
   649  }
   650  
   651  func (c *cloudFeed) walkDir(path string, info os.FileInfo, err error) error {
   652  	if strings.HasSuffix(path, `.tmp`) {
   653  		// File in the process of being written by ExternalStorage. Ignore.
   654  		return nil
   655  	}
   656  
   657  	if err != nil {
   658  		// From filepath.WalkFunc:
   659  		//  If there was a problem walking to the file or directory named by
   660  		//  path, the incoming error will describe the problem and the function
   661  		//  can decide how to handle that error (and Walk will not descend into
   662  		//  that directory). In the case of an error, the info argument will be
   663  		//  nil. If an error is returned, processing stops.
   664  		return err
   665  	}
   666  
   667  	if info.IsDir() {
   668  		// Nothing to do for directories.
   669  		return nil
   670  	}
   671  
   672  	if strings.Compare(c.resolved, path) >= 0 {
   673  		// Already output this in a previous walkDir.
   674  		return nil
   675  	}
   676  	if strings.HasSuffix(path, `RESOLVED`) {
   677  		resolvedPayload, err := ioutil.ReadFile(path)
   678  		if err != nil {
   679  			return err
   680  		}
   681  		resolvedEntry := cloudFeedEntry{payload: resolvedPayload}
   682  		c.rows = append(c.rows, resolvedEntry)
   683  		c.resolved = path
   684  		return nil
   685  	}
   686  
   687  	var topic string
   688  	subs := cloudFeedFileRE.FindStringSubmatch(filepath.Base(path))
   689  	if subs == nil {
   690  		return errors.Errorf(`unexpected file: %s`, path)
   691  	}
   692  	topic = subs[5]
   693  
   694  	f, err := os.Open(path)
   695  	if err != nil {
   696  		return err
   697  	}
   698  	defer f.Close()
   699  	// NB: This is the logic for JSON. Avro will involve parsing an
   700  	// "Object Container File".
   701  	s := bufio.NewScanner(f)
   702  	for s.Scan() {
   703  		c.rows = append(c.rows, cloudFeedEntry{
   704  			topic: topic,
   705  			value: append([]byte(nil), s.Bytes()...),
   706  		})
   707  	}
   708  	return nil
   709  }
   710  
   711  // Close implements the TestFeed interface.
   712  func (c *cloudFeed) Close() error {
   713  	if _, err := c.db.Exec(`CANCEL JOB $1`, c.JobID); err != nil {
   714  		log.Infof(context.Background(), `could not cancel feed %d: %v`, c.JobID, err)
   715  	}
   716  	return c.db.Close()
   717  }