github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/mvdata/table_data_loc.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mvdata
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"sync/atomic"
    21  
    22  	"github.com/dolthub/dolt/go/store/datas"
    23  
    24  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/env"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/row"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/table"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/table/typed/noms"
    31  	"github.com/dolthub/dolt/go/libraries/utils/filesys"
    32  	"github.com/dolthub/dolt/go/store/types"
    33  )
    34  
    35  const (
    36  	// tableWriterStatUpdateRate is the number of writes that will process before the updated stats are displayed.
    37  	tableWriterStatUpdateRate = 64 * 1024
    38  
    39  	// tableWriterGCRate is the number of rows inserted between GCs.  Should be > the frequency at which the table editor
    40  	// flushes to disk or there is a lot of wasted work
    41  	tableWriterGCRate = 2 * 1024 * 1024
    42  )
    43  
    44  // ErrNoPK is an error returned if a schema is missing a required primary key
    45  var ErrNoPK = errors.New("schema does not contain a primary key")
    46  
    47  // TableDataLocation is a dolt table that that can be imported from or exported to.
    48  type TableDataLocation struct {
    49  	// Name the name of a table
    50  	Name string
    51  }
    52  
    53  // String returns a string representation of the data location.
    54  func (dl TableDataLocation) String() string {
    55  	return DoltDB.ReadableStr() + ":" + dl.Name
    56  }
    57  
    58  // Exists returns true if the DataLocation already exists
    59  func (dl TableDataLocation) Exists(ctx context.Context, root *doltdb.RootValue, _ filesys.ReadableFS) (bool, error) {
    60  	return root.HasTable(ctx, dl.Name)
    61  }
    62  
    63  // NewReader creates a TableReadCloser for the DataLocation
    64  func (dl TableDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, _ filesys.ReadableFS, _ interface{}) (rdCl table.TableReadCloser, sorted bool, err error) {
    65  	tbl, ok, err := root.GetTable(ctx, dl.Name)
    66  	if err != nil {
    67  		return nil, false, err
    68  	}
    69  	if !ok {
    70  		return nil, false, doltdb.ErrTableNotFound
    71  	}
    72  
    73  	rd, err := table.NewDoltTableReader(ctx, tbl)
    74  	if err != nil {
    75  		return nil, false, err
    76  	}
    77  
    78  	return rd, true, nil
    79  }
    80  
    81  // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite
    82  // an existing table.
    83  func (dl TableDataLocation) NewCreatingWriter(ctx context.Context, _ DataMoverOptions, dEnv *env.DoltEnv, root *doltdb.RootValue, _ bool, outSch schema.Schema, statsCB noms.StatsCB, useGC bool) (table.TableWriteCloser, error) {
    84  	updatedRoot, err := root.CreateEmptyTable(ctx, dl.Name, outSch)
    85  	if err != nil {
    86  		return nil, err
    87  	}
    88  
    89  	sess := editor.CreateTableEditSession(updatedRoot, editor.TableEditSessionProps{})
    90  	tableEditor, err := sess.GetTableEditor(ctx, dl.Name, outSch)
    91  	if err != nil {
    92  		return nil, err
    93  	}
    94  
    95  	return &tableEditorWriteCloser{
    96  		dEnv:        dEnv,
    97  		insertOnly:  true,
    98  		initialData: types.EmptyMap,
    99  		statsCB:     statsCB,
   100  		tableEditor: tableEditor,
   101  		sess:        sess,
   102  		tableSch:    outSch,
   103  		useGC:       useGC,
   104  	}, nil
   105  }
   106  
   107  // NewUpdatingWriter will create a TableWriteCloser for a DataLocation that will update and append rows based on
   108  // their primary key.
   109  func (dl TableDataLocation) NewUpdatingWriter(ctx context.Context, _ DataMoverOptions, dEnv *env.DoltEnv, root *doltdb.RootValue, _ bool, _ schema.Schema, statsCB noms.StatsCB, useGC bool) (table.TableWriteCloser, error) {
   110  	tbl, ok, err := root.GetTable(ctx, dl.Name)
   111  	if err != nil {
   112  		return nil, err
   113  	}
   114  	if !ok {
   115  		return nil, errors.New("Could not find table " + dl.Name)
   116  	}
   117  
   118  	m, err := tbl.GetRowData(ctx)
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  	tblSch, err := tbl.GetSchema(ctx)
   123  	if err != nil {
   124  		return nil, err
   125  	}
   126  
   127  	sess := editor.CreateTableEditSession(root, editor.TableEditSessionProps{})
   128  	tableEditor, err := sess.GetTableEditor(ctx, dl.Name, tblSch)
   129  	if err != nil {
   130  		return nil, err
   131  	}
   132  
   133  	// keyless tables are updated as append only
   134  	insertOnly := schema.IsKeyless(tblSch)
   135  
   136  	return &tableEditorWriteCloser{
   137  		dEnv:        dEnv,
   138  		insertOnly:  insertOnly,
   139  		initialData: m,
   140  		statsCB:     statsCB,
   141  		tableEditor: tableEditor,
   142  		sess:        sess,
   143  		tableSch:    tblSch,
   144  		useGC:       useGC,
   145  	}, nil
   146  }
   147  
   148  // NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table while
   149  // preserving schema
   150  func (dl TableDataLocation) NewReplacingWriter(ctx context.Context, _ DataMoverOptions, dEnv *env.DoltEnv, root *doltdb.RootValue, _ bool, _ schema.Schema, statsCB noms.StatsCB, useGC bool) (table.TableWriteCloser, error) {
   151  	tbl, ok, err := root.GetTable(ctx, dl.Name)
   152  	if err != nil {
   153  		return nil, err
   154  	}
   155  	if !ok {
   156  		return nil, errors.New("Could not find table " + dl.Name)
   157  	}
   158  
   159  	tblSch, err := tbl.GetSchema(ctx)
   160  	if err != nil {
   161  		return nil, err
   162  	}
   163  
   164  	// overwrites existing table
   165  	updatedRoot, err := root.CreateEmptyTable(ctx, dl.Name, tblSch)
   166  	if err != nil {
   167  		return nil, err
   168  	}
   169  
   170  	sess := editor.CreateTableEditSession(updatedRoot, editor.TableEditSessionProps{})
   171  	tableEditor, err := sess.GetTableEditor(ctx, dl.Name, tblSch)
   172  	if err != nil {
   173  		return nil, err
   174  	}
   175  
   176  	return &tableEditorWriteCloser{
   177  		dEnv:        dEnv,
   178  		insertOnly:  true,
   179  		initialData: types.EmptyMap,
   180  		statsCB:     statsCB,
   181  		tableEditor: tableEditor,
   182  		sess:        sess,
   183  		tableSch:    tblSch,
   184  		useGC:       useGC,
   185  	}, nil
   186  }
   187  
   188  type tableEditorWriteCloser struct {
   189  	dEnv        *env.DoltEnv
   190  	tableEditor editor.TableEditor
   191  	sess        *editor.TableEditSession
   192  	initialData types.Map
   193  	tableSch    schema.Schema
   194  	insertOnly  bool
   195  	useGC       bool
   196  
   197  	statsCB noms.StatsCB
   198  	stats   types.AppliedEditStats
   199  	statOps int64
   200  	gcOps   int64
   201  }
   202  
   203  var _ DataMoverCloser = (*tableEditorWriteCloser)(nil)
   204  
   205  func (te *tableEditorWriteCloser) Flush(ctx context.Context) (*doltdb.RootValue, error) {
   206  	return te.sess.Flush(ctx)
   207  }
   208  
   209  // GetSchema implements TableWriteCloser
   210  func (te *tableEditorWriteCloser) GetSchema() schema.Schema {
   211  	return te.tableSch
   212  }
   213  
   214  // WriteRow implements TableWriteCloser
   215  func (te *tableEditorWriteCloser) WriteRow(ctx context.Context, r row.Row) error {
   216  	if te.statsCB != nil && atomic.LoadInt64(&te.statOps) >= tableWriterStatUpdateRate {
   217  		atomic.StoreInt64(&te.statOps, 0)
   218  		te.statsCB(te.stats)
   219  	}
   220  
   221  	if atomic.LoadInt64(&te.gcOps) >= tableWriterGCRate {
   222  		atomic.StoreInt64(&te.gcOps, 0)
   223  		if err := te.GC(ctx); err != nil {
   224  			return err
   225  		}
   226  	}
   227  	_ = atomic.AddInt64(&te.gcOps, 1)
   228  
   229  	if te.insertOnly {
   230  		err := te.tableEditor.InsertRow(ctx, r, nil)
   231  
   232  		if err != nil {
   233  			return err
   234  		}
   235  
   236  		_ = atomic.AddInt64(&te.statOps, 1)
   237  		te.stats.Additions++
   238  		return nil
   239  
   240  	} else {
   241  		pkTuple, err := r.NomsMapKey(te.tableSch).Value(ctx)
   242  		if err != nil {
   243  			return err
   244  		}
   245  		val, ok, err := te.initialData.MaybeGet(ctx, pkTuple)
   246  		if err != nil {
   247  			return err
   248  		}
   249  		if !ok {
   250  			err := te.tableEditor.InsertRow(ctx, r, nil)
   251  
   252  			if err != nil {
   253  				return err
   254  			}
   255  
   256  			_ = atomic.AddInt64(&te.statOps, 1)
   257  			te.stats.Additions++
   258  			return nil
   259  		}
   260  		oldRow, err := row.FromNoms(te.tableSch, pkTuple.(types.Tuple), val.(types.Tuple))
   261  		if err != nil {
   262  			return err
   263  		}
   264  		if row.AreEqual(r, oldRow, te.tableSch) {
   265  			te.stats.SameVal++
   266  			return nil
   267  		}
   268  		err = te.tableEditor.UpdateRow(ctx, oldRow, r, nil)
   269  
   270  		if err != nil {
   271  			return err
   272  		}
   273  
   274  		_ = atomic.AddInt64(&te.statOps, 1)
   275  		te.stats.Modifications++
   276  		return nil
   277  	}
   278  }
   279  
   280  func (te *tableEditorWriteCloser) GC(ctx context.Context) error {
   281  	if !te.useGC {
   282  		if te.dEnv != nil && te.dEnv.DoltDB != nil {
   283  			db, ok := te.dEnv.DoltDB.ValueReadWriter().(datas.Database)
   284  			if !ok {
   285  				return nil
   286  			}
   287  			return datas.PruneTableFiles(ctx, db)
   288  		}
   289  		return nil
   290  	}
   291  
   292  	inProgressRoot, err := te.sess.Flush(ctx)
   293  	if err != nil {
   294  		return err
   295  	}
   296  
   297  	inProgressHash, err := te.dEnv.DoltDB.WriteRootValue(ctx, inProgressRoot)
   298  	if err != nil {
   299  		return err
   300  	}
   301  
   302  	keepers, err := env.GetGCKeepers(ctx, te.dEnv.RepoStateReader(), te.dEnv.DoltDB)
   303  	if err != nil {
   304  		return err
   305  	}
   306  
   307  	keepers = append(keepers, inProgressHash)
   308  
   309  	return te.dEnv.DoltDB.GC(ctx, keepers...)
   310  }
   311  
   312  // Close implements TableWriteCloser
   313  func (te *tableEditorWriteCloser) Close(ctx context.Context) error {
   314  	if te.statsCB != nil {
   315  		te.statsCB(te.stats)
   316  	}
   317  	return nil
   318  }