github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/doltdb/commit_hooks.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package doltdb
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"sync"
    22  	"time"
    23  
    24  	"github.com/dolthub/go-mysql-server/sql"
    25  
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/ref"
    27  	"github.com/dolthub/dolt/go/store/datas"
    28  	"github.com/dolthub/dolt/go/store/hash"
    29  	"github.com/dolthub/dolt/go/store/types"
    30  )
    31  
    32  type PushOnWriteHook struct {
    33  	destDB datas.Database
    34  	tmpDir string
    35  	out    io.Writer
    36  	fmt    *types.NomsBinFormat
    37  }
    38  
    39  var _ CommitHook = (*PushOnWriteHook)(nil)
    40  
    41  // NewPushOnWriteHook creates a ReplicateHook, parameterizaed by the backup database
    42  // and a local tempfile for pushing
    43  func NewPushOnWriteHook(destDB *DoltDB, tmpDir string) *PushOnWriteHook {
    44  	return &PushOnWriteHook{
    45  		destDB: destDB.db,
    46  		tmpDir: tmpDir,
    47  		fmt:    destDB.Format(),
    48  	}
    49  }
    50  
    51  // Execute implements CommitHook, replicates head updates to the destDb field
    52  func (ph *PushOnWriteHook) Execute(ctx context.Context, ds datas.Dataset, db datas.Database) (func(context.Context) error, error) {
    53  	return nil, pushDataset(ctx, ph.destDB, db, ds, ph.tmpDir)
    54  }
    55  
    56  func pushDataset(ctx context.Context, destDB, srcDB datas.Database, ds datas.Dataset, tmpDir string) error {
    57  	addr, ok := ds.MaybeHeadAddr()
    58  	if !ok {
    59  		_, err := destDB.Delete(ctx, ds, "")
    60  		return err
    61  	}
    62  
    63  	err := pullHash(ctx, destDB, srcDB, []hash.Hash{addr}, tmpDir, nil, nil)
    64  	if err != nil {
    65  		return err
    66  	}
    67  
    68  	rf, err := ref.Parse(ds.ID())
    69  	if err != nil {
    70  		return err
    71  	}
    72  
    73  	ds, err = destDB.GetDataset(ctx, rf.String())
    74  	if err != nil {
    75  		return err
    76  	}
    77  
    78  	_, err = destDB.SetHead(ctx, ds, addr, "")
    79  	return err
    80  }
    81  
    82  // HandleError implements CommitHook
    83  func (ph *PushOnWriteHook) HandleError(ctx context.Context, err error) error {
    84  	if ph.out != nil {
    85  		_, err := ph.out.Write([]byte(fmt.Sprintf("error pushing: %+v", err)))
    86  		if err != nil {
    87  			return err
    88  		}
    89  	}
    90  	return nil
    91  }
    92  
    93  func (*PushOnWriteHook) ExecuteForWorkingSets() bool {
    94  	return false
    95  }
    96  
    97  // SetLogger implements CommitHook
    98  func (ph *PushOnWriteHook) SetLogger(ctx context.Context, wr io.Writer) error {
    99  	ph.out = wr
   100  	return nil
   101  }
   102  
   103  type PushArg struct {
   104  	ds   datas.Dataset
   105  	db   datas.Database
   106  	hash hash.Hash
   107  }
   108  
   109  type AsyncPushOnWriteHook struct {
   110  	out io.Writer
   111  	ch  chan PushArg
   112  }
   113  
   114  const (
   115  	asyncPushBufferSize    = 2048
   116  	asyncPushInterval      = 500 * time.Millisecond
   117  	asyncPushProcessCommit = "async_push_process_commit"
   118  	asyncPushSyncReplica   = "async_push_sync_replica"
   119  )
   120  
   121  var _ CommitHook = (*AsyncPushOnWriteHook)(nil)
   122  
   123  // NewAsyncPushOnWriteHook creates a AsyncReplicateHook
   124  func NewAsyncPushOnWriteHook(bThreads *sql.BackgroundThreads, destDB *DoltDB, tmpDir string, logger io.Writer) (*AsyncPushOnWriteHook, error) {
   125  	ch := make(chan PushArg, asyncPushBufferSize)
   126  	err := RunAsyncReplicationThreads(bThreads, ch, destDB, tmpDir, logger)
   127  	if err != nil {
   128  		return nil, err
   129  	}
   130  	return &AsyncPushOnWriteHook{ch: ch}, nil
   131  }
   132  
   133  func (*AsyncPushOnWriteHook) ExecuteForWorkingSets() bool {
   134  	return false
   135  }
   136  
   137  // Execute implements CommitHook, replicates head updates to the destDb field
   138  func (ah *AsyncPushOnWriteHook) Execute(ctx context.Context, ds datas.Dataset, db datas.Database) (func(context.Context) error, error) {
   139  	addr, _ := ds.MaybeHeadAddr()
   140  
   141  	select {
   142  	case ah.ch <- PushArg{ds: ds, db: db, hash: addr}:
   143  	case <-ctx.Done():
   144  		ah.ch <- PushArg{ds: ds, db: db, hash: addr}
   145  		return nil, ctx.Err()
   146  	}
   147  	return nil, nil
   148  }
   149  
   150  // HandleError implements CommitHook
   151  func (ah *AsyncPushOnWriteHook) HandleError(ctx context.Context, err error) error {
   152  	if ah.out != nil {
   153  		ah.out.Write([]byte(err.Error()))
   154  	}
   155  	return nil
   156  }
   157  
   158  // SetLogger implements CommitHook
   159  func (ah *AsyncPushOnWriteHook) SetLogger(ctx context.Context, wr io.Writer) error {
   160  	ah.out = wr
   161  	return nil
   162  }
   163  
   164  type LogHook struct {
   165  	msg []byte
   166  	out io.Writer
   167  }
   168  
   169  var _ CommitHook = (*LogHook)(nil)
   170  
   171  // NewLogHook is a noop that logs to a writer when invoked
   172  func NewLogHook(msg []byte) *LogHook {
   173  	return &LogHook{msg: msg}
   174  }
   175  
   176  // Execute implements CommitHook, writes message to log channel
   177  func (lh *LogHook) Execute(ctx context.Context, ds datas.Dataset, db datas.Database) (func(context.Context) error, error) {
   178  	if lh.out != nil {
   179  		_, err := lh.out.Write(lh.msg)
   180  		return nil, err
   181  	}
   182  	return nil, nil
   183  }
   184  
   185  // HandleError implements CommitHook
   186  func (lh *LogHook) HandleError(ctx context.Context, err error) error {
   187  	if lh.out != nil {
   188  		lh.out.Write([]byte(err.Error()))
   189  	}
   190  	return nil
   191  }
   192  
   193  // SetLogger implements CommitHook
   194  func (lh *LogHook) SetLogger(ctx context.Context, wr io.Writer) error {
   195  	lh.out = wr
   196  	return nil
   197  }
   198  
   199  func (*LogHook) ExecuteForWorkingSets() bool {
   200  	return false
   201  }
   202  
   203  func RunAsyncReplicationThreads(bThreads *sql.BackgroundThreads, ch chan PushArg, destDB *DoltDB, tmpDir string, logger io.Writer) error {
   204  	mu := &sync.Mutex{}
   205  	var newHeads = make(map[string]PushArg, asyncPushBufferSize)
   206  
   207  	updateHead := func(p PushArg) {
   208  		mu.Lock()
   209  		newHeads[p.ds.ID()] = p
   210  		mu.Unlock()
   211  	}
   212  
   213  	// newCtx lets first goroutine drain before the second goroutine finalizes
   214  	newCtx, stop := context.WithCancel(context.Background())
   215  
   216  	// The first goroutine amortizes commits into a map keyed by dataset id.
   217  	// When the parent context cancels, this goroutine drains and kills its
   218  	// dependent goroutine.
   219  	//
   220  	// We do not track sequential commits because push follows historical
   221  	// dependencies. This does not account for reset --force, which
   222  	// breaks historical dependence.
   223  	err := bThreads.Add(asyncPushProcessCommit, func(ctx context.Context) {
   224  		for {
   225  			select {
   226  			case p, ok := <-ch:
   227  				if !ok {
   228  					return
   229  				}
   230  				updateHead(p)
   231  			case <-ctx.Done():
   232  				stop()
   233  				return
   234  			}
   235  		}
   236  	})
   237  	if err != nil {
   238  		return err
   239  	}
   240  
   241  	getHeadsCopy := func() map[string]PushArg {
   242  		mu.Lock()
   243  		defer mu.Unlock()
   244  		if len(newHeads) == 0 {
   245  			return nil
   246  		}
   247  
   248  		toRet := newHeads
   249  		newHeads = make(map[string]PushArg, asyncPushBufferSize)
   250  
   251  		return toRet
   252  	}
   253  
   254  	flush := func(newHeads map[string]PushArg, latestHeads map[string]hash.Hash) {
   255  		newHeadsCopy := getHeadsCopy()
   256  		if len(newHeadsCopy) == 0 {
   257  			return
   258  		}
   259  		for id, newCm := range newHeadsCopy {
   260  			if latest, ok := latestHeads[id]; !ok || latest != newCm.hash {
   261  				// use background context to drain after sql context is canceled
   262  				err := pushDataset(context.Background(), destDB.db, newCm.db, newCm.ds, tmpDir)
   263  				if err != nil {
   264  					logger.Write([]byte("replication failed: " + err.Error()))
   265  				}
   266  				if newCm.hash.IsEmpty() {
   267  					delete(latestHeads, id)
   268  				} else {
   269  					latestHeads[id] = newCm.hash
   270  				}
   271  			}
   272  		}
   273  	}
   274  
   275  	// The second goroutine pushes updates to a remote chunkstore.
   276  	// This goroutine waits for first goroutine to drain before closing
   277  	// the channel and exiting.
   278  	err = bThreads.Add(asyncPushSyncReplica, func(ctx context.Context) {
   279  		defer close(ch)
   280  		var latestHeads = make(map[string]hash.Hash, asyncPushBufferSize)
   281  		ticker := time.NewTicker(asyncPushInterval)
   282  		for {
   283  			select {
   284  			case <-newCtx.Done():
   285  				flush(newHeads, latestHeads)
   286  				return
   287  			case <-ticker.C:
   288  				flush(newHeads, latestHeads)
   289  			}
   290  		}
   291  	})
   292  	if err != nil {
   293  		return err
   294  	}
   295  
   296  	return nil
   297  }