github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/doltdb/commit_hooks.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package doltdb 16 17 import ( 18 "context" 19 "fmt" 20 "io" 21 "sync" 22 "time" 23 24 "github.com/dolthub/go-mysql-server/sql" 25 26 "github.com/dolthub/dolt/go/libraries/doltcore/ref" 27 "github.com/dolthub/dolt/go/store/datas" 28 "github.com/dolthub/dolt/go/store/hash" 29 "github.com/dolthub/dolt/go/store/types" 30 ) 31 32 type PushOnWriteHook struct { 33 destDB datas.Database 34 tmpDir string 35 out io.Writer 36 fmt *types.NomsBinFormat 37 } 38 39 var _ CommitHook = (*PushOnWriteHook)(nil) 40 41 // NewPushOnWriteHook creates a ReplicateHook, parameterizaed by the backup database 42 // and a local tempfile for pushing 43 func NewPushOnWriteHook(destDB *DoltDB, tmpDir string) *PushOnWriteHook { 44 return &PushOnWriteHook{ 45 destDB: destDB.db, 46 tmpDir: tmpDir, 47 fmt: destDB.Format(), 48 } 49 } 50 51 // Execute implements CommitHook, replicates head updates to the destDb field 52 func (ph *PushOnWriteHook) Execute(ctx context.Context, ds datas.Dataset, db datas.Database) (func(context.Context) error, error) { 53 return nil, pushDataset(ctx, ph.destDB, db, ds, ph.tmpDir) 54 } 55 56 func pushDataset(ctx context.Context, destDB, srcDB datas.Database, ds datas.Dataset, tmpDir string) error { 57 addr, ok := ds.MaybeHeadAddr() 58 if !ok { 59 _, err := destDB.Delete(ctx, ds, "") 60 return err 61 } 62 63 err := pullHash(ctx, destDB, srcDB, []hash.Hash{addr}, tmpDir, nil, nil) 64 if err != nil { 65 return err 66 } 67 68 rf, err := ref.Parse(ds.ID()) 69 if err != nil { 70 return err 71 } 72 73 ds, err = destDB.GetDataset(ctx, rf.String()) 74 if err != nil { 75 return err 76 } 77 78 _, err = destDB.SetHead(ctx, ds, addr, "") 79 return err 80 } 81 82 // HandleError implements CommitHook 83 func (ph *PushOnWriteHook) HandleError(ctx context.Context, err error) error { 84 if ph.out != nil { 85 _, err := ph.out.Write([]byte(fmt.Sprintf("error pushing: %+v", err))) 86 if err != nil { 87 return err 88 } 89 } 90 return nil 91 } 92 93 func (*PushOnWriteHook) ExecuteForWorkingSets() bool { 94 return false 95 } 96 97 // SetLogger implements CommitHook 98 func (ph *PushOnWriteHook) SetLogger(ctx context.Context, wr io.Writer) error { 99 ph.out = wr 100 return nil 101 } 102 103 type PushArg struct { 104 ds datas.Dataset 105 db datas.Database 106 hash hash.Hash 107 } 108 109 type AsyncPushOnWriteHook struct { 110 out io.Writer 111 ch chan PushArg 112 } 113 114 const ( 115 asyncPushBufferSize = 2048 116 asyncPushInterval = 500 * time.Millisecond 117 asyncPushProcessCommit = "async_push_process_commit" 118 asyncPushSyncReplica = "async_push_sync_replica" 119 ) 120 121 var _ CommitHook = (*AsyncPushOnWriteHook)(nil) 122 123 // NewAsyncPushOnWriteHook creates a AsyncReplicateHook 124 func NewAsyncPushOnWriteHook(bThreads *sql.BackgroundThreads, destDB *DoltDB, tmpDir string, logger io.Writer) (*AsyncPushOnWriteHook, error) { 125 ch := make(chan PushArg, asyncPushBufferSize) 126 err := RunAsyncReplicationThreads(bThreads, ch, destDB, tmpDir, logger) 127 if err != nil { 128 return nil, err 129 } 130 return &AsyncPushOnWriteHook{ch: ch}, nil 131 } 132 133 func (*AsyncPushOnWriteHook) ExecuteForWorkingSets() bool { 134 return false 135 } 136 137 // Execute implements CommitHook, replicates head updates to the destDb field 138 func (ah *AsyncPushOnWriteHook) Execute(ctx context.Context, ds datas.Dataset, db datas.Database) (func(context.Context) error, error) { 139 addr, _ := ds.MaybeHeadAddr() 140 141 select { 142 case ah.ch <- PushArg{ds: ds, db: db, hash: addr}: 143 case <-ctx.Done(): 144 ah.ch <- PushArg{ds: ds, db: db, hash: addr} 145 return nil, ctx.Err() 146 } 147 return nil, nil 148 } 149 150 // HandleError implements CommitHook 151 func (ah *AsyncPushOnWriteHook) HandleError(ctx context.Context, err error) error { 152 if ah.out != nil { 153 ah.out.Write([]byte(err.Error())) 154 } 155 return nil 156 } 157 158 // SetLogger implements CommitHook 159 func (ah *AsyncPushOnWriteHook) SetLogger(ctx context.Context, wr io.Writer) error { 160 ah.out = wr 161 return nil 162 } 163 164 type LogHook struct { 165 msg []byte 166 out io.Writer 167 } 168 169 var _ CommitHook = (*LogHook)(nil) 170 171 // NewLogHook is a noop that logs to a writer when invoked 172 func NewLogHook(msg []byte) *LogHook { 173 return &LogHook{msg: msg} 174 } 175 176 // Execute implements CommitHook, writes message to log channel 177 func (lh *LogHook) Execute(ctx context.Context, ds datas.Dataset, db datas.Database) (func(context.Context) error, error) { 178 if lh.out != nil { 179 _, err := lh.out.Write(lh.msg) 180 return nil, err 181 } 182 return nil, nil 183 } 184 185 // HandleError implements CommitHook 186 func (lh *LogHook) HandleError(ctx context.Context, err error) error { 187 if lh.out != nil { 188 lh.out.Write([]byte(err.Error())) 189 } 190 return nil 191 } 192 193 // SetLogger implements CommitHook 194 func (lh *LogHook) SetLogger(ctx context.Context, wr io.Writer) error { 195 lh.out = wr 196 return nil 197 } 198 199 func (*LogHook) ExecuteForWorkingSets() bool { 200 return false 201 } 202 203 func RunAsyncReplicationThreads(bThreads *sql.BackgroundThreads, ch chan PushArg, destDB *DoltDB, tmpDir string, logger io.Writer) error { 204 mu := &sync.Mutex{} 205 var newHeads = make(map[string]PushArg, asyncPushBufferSize) 206 207 updateHead := func(p PushArg) { 208 mu.Lock() 209 newHeads[p.ds.ID()] = p 210 mu.Unlock() 211 } 212 213 // newCtx lets first goroutine drain before the second goroutine finalizes 214 newCtx, stop := context.WithCancel(context.Background()) 215 216 // The first goroutine amortizes commits into a map keyed by dataset id. 217 // When the parent context cancels, this goroutine drains and kills its 218 // dependent goroutine. 219 // 220 // We do not track sequential commits because push follows historical 221 // dependencies. This does not account for reset --force, which 222 // breaks historical dependence. 223 err := bThreads.Add(asyncPushProcessCommit, func(ctx context.Context) { 224 for { 225 select { 226 case p, ok := <-ch: 227 if !ok { 228 return 229 } 230 updateHead(p) 231 case <-ctx.Done(): 232 stop() 233 return 234 } 235 } 236 }) 237 if err != nil { 238 return err 239 } 240 241 getHeadsCopy := func() map[string]PushArg { 242 mu.Lock() 243 defer mu.Unlock() 244 if len(newHeads) == 0 { 245 return nil 246 } 247 248 toRet := newHeads 249 newHeads = make(map[string]PushArg, asyncPushBufferSize) 250 251 return toRet 252 } 253 254 flush := func(newHeads map[string]PushArg, latestHeads map[string]hash.Hash) { 255 newHeadsCopy := getHeadsCopy() 256 if len(newHeadsCopy) == 0 { 257 return 258 } 259 for id, newCm := range newHeadsCopy { 260 if latest, ok := latestHeads[id]; !ok || latest != newCm.hash { 261 // use background context to drain after sql context is canceled 262 err := pushDataset(context.Background(), destDB.db, newCm.db, newCm.ds, tmpDir) 263 if err != nil { 264 logger.Write([]byte("replication failed: " + err.Error())) 265 } 266 if newCm.hash.IsEmpty() { 267 delete(latestHeads, id) 268 } else { 269 latestHeads[id] = newCm.hash 270 } 271 } 272 } 273 } 274 275 // The second goroutine pushes updates to a remote chunkstore. 276 // This goroutine waits for first goroutine to drain before closing 277 // the channel and exiting. 278 err = bThreads.Add(asyncPushSyncReplica, func(ctx context.Context) { 279 defer close(ch) 280 var latestHeads = make(map[string]hash.Hash, asyncPushBufferSize) 281 ticker := time.NewTicker(asyncPushInterval) 282 for { 283 select { 284 case <-newCtx.Done(): 285 flush(newHeads, latestHeads) 286 return 287 case <-ticker.C: 288 flush(newHeads, latestHeads) 289 } 290 } 291 }) 292 if err != nil { 293 return err 294 } 295 296 return nil 297 }