github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/mvdata/table_data_loc.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mvdata 16 17 import ( 18 "context" 19 "errors" 20 "sync/atomic" 21 22 "github.com/dolthub/dolt/go/store/datas" 23 24 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 25 "github.com/dolthub/dolt/go/libraries/doltcore/env" 26 "github.com/dolthub/dolt/go/libraries/doltcore/row" 27 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 28 "github.com/dolthub/dolt/go/libraries/doltcore/table" 29 "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" 30 "github.com/dolthub/dolt/go/libraries/doltcore/table/typed/noms" 31 "github.com/dolthub/dolt/go/libraries/utils/filesys" 32 "github.com/dolthub/dolt/go/store/types" 33 ) 34 35 const ( 36 // tableWriterStatUpdateRate is the number of writes that will process before the updated stats are displayed. 37 tableWriterStatUpdateRate = 64 * 1024 38 39 // tableWriterGCRate is the number of rows inserted between GCs. Should be > the frequency at which the table editor 40 // flushes to disk or there is a lot of wasted work 41 tableWriterGCRate = 2 * 1024 * 1024 42 ) 43 44 // ErrNoPK is an error returned if a schema is missing a required primary key 45 var ErrNoPK = errors.New("schema does not contain a primary key") 46 47 // TableDataLocation is a dolt table that that can be imported from or exported to. 48 type TableDataLocation struct { 49 // Name the name of a table 50 Name string 51 } 52 53 // String returns a string representation of the data location. 54 func (dl TableDataLocation) String() string { 55 return DoltDB.ReadableStr() + ":" + dl.Name 56 } 57 58 // Exists returns true if the DataLocation already exists 59 func (dl TableDataLocation) Exists(ctx context.Context, root *doltdb.RootValue, _ filesys.ReadableFS) (bool, error) { 60 return root.HasTable(ctx, dl.Name) 61 } 62 63 // NewReader creates a TableReadCloser for the DataLocation 64 func (dl TableDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, _ filesys.ReadableFS, _ interface{}) (rdCl table.TableReadCloser, sorted bool, err error) { 65 tbl, ok, err := root.GetTable(ctx, dl.Name) 66 if err != nil { 67 return nil, false, err 68 } 69 if !ok { 70 return nil, false, doltdb.ErrTableNotFound 71 } 72 73 rd, err := table.NewDoltTableReader(ctx, tbl) 74 if err != nil { 75 return nil, false, err 76 } 77 78 return rd, true, nil 79 } 80 81 // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite 82 // an existing table. 83 func (dl TableDataLocation) NewCreatingWriter(ctx context.Context, _ DataMoverOptions, dEnv *env.DoltEnv, root *doltdb.RootValue, _ bool, outSch schema.Schema, statsCB noms.StatsCB, useGC bool) (table.TableWriteCloser, error) { 84 updatedRoot, err := root.CreateEmptyTable(ctx, dl.Name, outSch) 85 if err != nil { 86 return nil, err 87 } 88 89 sess := editor.CreateTableEditSession(updatedRoot, editor.TableEditSessionProps{}) 90 tableEditor, err := sess.GetTableEditor(ctx, dl.Name, outSch) 91 if err != nil { 92 return nil, err 93 } 94 95 return &tableEditorWriteCloser{ 96 dEnv: dEnv, 97 insertOnly: true, 98 initialData: types.EmptyMap, 99 statsCB: statsCB, 100 tableEditor: tableEditor, 101 sess: sess, 102 tableSch: outSch, 103 useGC: useGC, 104 }, nil 105 } 106 107 // NewUpdatingWriter will create a TableWriteCloser for a DataLocation that will update and append rows based on 108 // their primary key. 109 func (dl TableDataLocation) NewUpdatingWriter(ctx context.Context, _ DataMoverOptions, dEnv *env.DoltEnv, root *doltdb.RootValue, _ bool, _ schema.Schema, statsCB noms.StatsCB, useGC bool) (table.TableWriteCloser, error) { 110 tbl, ok, err := root.GetTable(ctx, dl.Name) 111 if err != nil { 112 return nil, err 113 } 114 if !ok { 115 return nil, errors.New("Could not find table " + dl.Name) 116 } 117 118 m, err := tbl.GetRowData(ctx) 119 if err != nil { 120 return nil, err 121 } 122 tblSch, err := tbl.GetSchema(ctx) 123 if err != nil { 124 return nil, err 125 } 126 127 sess := editor.CreateTableEditSession(root, editor.TableEditSessionProps{}) 128 tableEditor, err := sess.GetTableEditor(ctx, dl.Name, tblSch) 129 if err != nil { 130 return nil, err 131 } 132 133 // keyless tables are updated as append only 134 insertOnly := schema.IsKeyless(tblSch) 135 136 return &tableEditorWriteCloser{ 137 dEnv: dEnv, 138 insertOnly: insertOnly, 139 initialData: m, 140 statsCB: statsCB, 141 tableEditor: tableEditor, 142 sess: sess, 143 tableSch: tblSch, 144 useGC: useGC, 145 }, nil 146 } 147 148 // NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table while 149 // preserving schema 150 func (dl TableDataLocation) NewReplacingWriter(ctx context.Context, _ DataMoverOptions, dEnv *env.DoltEnv, root *doltdb.RootValue, _ bool, _ schema.Schema, statsCB noms.StatsCB, useGC bool) (table.TableWriteCloser, error) { 151 tbl, ok, err := root.GetTable(ctx, dl.Name) 152 if err != nil { 153 return nil, err 154 } 155 if !ok { 156 return nil, errors.New("Could not find table " + dl.Name) 157 } 158 159 tblSch, err := tbl.GetSchema(ctx) 160 if err != nil { 161 return nil, err 162 } 163 164 // overwrites existing table 165 updatedRoot, err := root.CreateEmptyTable(ctx, dl.Name, tblSch) 166 if err != nil { 167 return nil, err 168 } 169 170 sess := editor.CreateTableEditSession(updatedRoot, editor.TableEditSessionProps{}) 171 tableEditor, err := sess.GetTableEditor(ctx, dl.Name, tblSch) 172 if err != nil { 173 return nil, err 174 } 175 176 return &tableEditorWriteCloser{ 177 dEnv: dEnv, 178 insertOnly: true, 179 initialData: types.EmptyMap, 180 statsCB: statsCB, 181 tableEditor: tableEditor, 182 sess: sess, 183 tableSch: tblSch, 184 useGC: useGC, 185 }, nil 186 } 187 188 type tableEditorWriteCloser struct { 189 dEnv *env.DoltEnv 190 tableEditor editor.TableEditor 191 sess *editor.TableEditSession 192 initialData types.Map 193 tableSch schema.Schema 194 insertOnly bool 195 useGC bool 196 197 statsCB noms.StatsCB 198 stats types.AppliedEditStats 199 statOps int64 200 gcOps int64 201 } 202 203 var _ DataMoverCloser = (*tableEditorWriteCloser)(nil) 204 205 func (te *tableEditorWriteCloser) Flush(ctx context.Context) (*doltdb.RootValue, error) { 206 return te.sess.Flush(ctx) 207 } 208 209 // GetSchema implements TableWriteCloser 210 func (te *tableEditorWriteCloser) GetSchema() schema.Schema { 211 return te.tableSch 212 } 213 214 // WriteRow implements TableWriteCloser 215 func (te *tableEditorWriteCloser) WriteRow(ctx context.Context, r row.Row) error { 216 if te.statsCB != nil && atomic.LoadInt64(&te.statOps) >= tableWriterStatUpdateRate { 217 atomic.StoreInt64(&te.statOps, 0) 218 te.statsCB(te.stats) 219 } 220 221 if atomic.LoadInt64(&te.gcOps) >= tableWriterGCRate { 222 atomic.StoreInt64(&te.gcOps, 0) 223 if err := te.GC(ctx); err != nil { 224 return err 225 } 226 } 227 _ = atomic.AddInt64(&te.gcOps, 1) 228 229 if te.insertOnly { 230 err := te.tableEditor.InsertRow(ctx, r, nil) 231 232 if err != nil { 233 return err 234 } 235 236 _ = atomic.AddInt64(&te.statOps, 1) 237 te.stats.Additions++ 238 return nil 239 240 } else { 241 pkTuple, err := r.NomsMapKey(te.tableSch).Value(ctx) 242 if err != nil { 243 return err 244 } 245 val, ok, err := te.initialData.MaybeGet(ctx, pkTuple) 246 if err != nil { 247 return err 248 } 249 if !ok { 250 err := te.tableEditor.InsertRow(ctx, r, nil) 251 252 if err != nil { 253 return err 254 } 255 256 _ = atomic.AddInt64(&te.statOps, 1) 257 te.stats.Additions++ 258 return nil 259 } 260 oldRow, err := row.FromNoms(te.tableSch, pkTuple.(types.Tuple), val.(types.Tuple)) 261 if err != nil { 262 return err 263 } 264 if row.AreEqual(r, oldRow, te.tableSch) { 265 te.stats.SameVal++ 266 return nil 267 } 268 err = te.tableEditor.UpdateRow(ctx, oldRow, r, nil) 269 270 if err != nil { 271 return err 272 } 273 274 _ = atomic.AddInt64(&te.statOps, 1) 275 te.stats.Modifications++ 276 return nil 277 } 278 } 279 280 func (te *tableEditorWriteCloser) GC(ctx context.Context) error { 281 if !te.useGC { 282 if te.dEnv != nil && te.dEnv.DoltDB != nil { 283 db, ok := te.dEnv.DoltDB.ValueReadWriter().(datas.Database) 284 if !ok { 285 return nil 286 } 287 return datas.PruneTableFiles(ctx, db) 288 } 289 return nil 290 } 291 292 inProgressRoot, err := te.sess.Flush(ctx) 293 if err != nil { 294 return err 295 } 296 297 inProgressHash, err := te.dEnv.DoltDB.WriteRootValue(ctx, inProgressRoot) 298 if err != nil { 299 return err 300 } 301 302 keepers, err := env.GetGCKeepers(ctx, te.dEnv.RepoStateReader(), te.dEnv.DoltDB) 303 if err != nil { 304 return err 305 } 306 307 keepers = append(keepers, inProgressHash) 308 309 return te.dEnv.DoltDB.GC(ctx, keepers...) 310 } 311 312 // Close implements TableWriteCloser 313 func (te *tableEditorWriteCloser) Close(ctx context.Context) error { 314 if te.statsCB != nil { 315 te.statsCB(te.stats) 316 } 317 return nil 318 }