github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/pingcap/tidb/ddl/ddl_worker.go (about) 1 // Copyright 2015 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package ddl 15 16 import ( 17 "time" 18 19 "github.com/insionng/yougam/libraries/juju/errors" 20 "github.com/insionng/yougam/libraries/ngaut/log" 21 "github.com/insionng/yougam/libraries/pingcap/tidb/context" 22 "github.com/insionng/yougam/libraries/pingcap/tidb/kv" 23 "github.com/insionng/yougam/libraries/pingcap/tidb/meta" 24 "github.com/insionng/yougam/libraries/pingcap/tidb/model" 25 "github.com/insionng/yougam/libraries/pingcap/tidb/terror" 26 ) 27 28 func (d *ddl) doDDLJob(ctx context.Context, job *model.Job) error { 29 // for every DDL, we must commit current transaction. 30 if err := ctx.FinishTxn(false); err != nil { 31 return errors.Trace(err) 32 } 33 34 // Create a new job and queue it. 35 err := kv.RunInNewTxn(d.store, true, func(txn kv.Transaction) error { 36 t := meta.NewMeta(txn) 37 var err error 38 job.ID, err = t.GenGlobalID() 39 if err != nil { 40 return errors.Trace(err) 41 } 42 43 err = t.EnQueueDDLJob(job) 44 return errors.Trace(err) 45 }) 46 47 if err != nil { 48 return errors.Trace(err) 49 } 50 51 // notice worker that we push a new job and wait the job done. 52 asyncNotify(d.ddlJobCh) 53 54 log.Warnf("[ddl] start DDL job %v", job) 55 56 jobID := job.ID 57 58 var historyJob *model.Job 59 60 // for a job from start to end, the state of it will be none -> delete only -> write only -> reorganization -> public 61 // for every state changes, we will wait as lease 2 * lease time, so here the ticker check is 10 * lease. 62 ticker := time.NewTicker(chooseLeaseTime(10*d.lease, 10*time.Second)) 63 defer ticker.Stop() 64 for { 65 select { 66 case <-d.ddlJobDoneCh: 67 case <-ticker.C: 68 } 69 70 historyJob, err = d.getHistoryDDLJob(jobID) 71 if err != nil { 72 log.Errorf("[ddl] get history DDL job err %v, check again", err) 73 continue 74 } else if historyJob == nil { 75 log.Warnf("[ddl] DDL job %d is not in history, maybe not run", jobID) 76 continue 77 } 78 79 // if a job is a history table, the state must be JobDone or JobCancel. 80 if historyJob.State == model.JobDone { 81 return nil 82 } 83 84 return errors.Errorf(historyJob.Error) 85 } 86 } 87 88 func (d *ddl) getHistoryDDLJob(id int64) (*model.Job, error) { 89 var job *model.Job 90 91 err := kv.RunInNewTxn(d.store, false, func(txn kv.Transaction) error { 92 t := meta.NewMeta(txn) 93 var err1 error 94 job, err1 = t.GetHistoryDDLJob(id) 95 return errors.Trace(err1) 96 }) 97 98 return job, errors.Trace(err) 99 } 100 101 func asyncNotify(ch chan struct{}) { 102 select { 103 case ch <- struct{}{}: 104 default: 105 } 106 } 107 108 func (d *ddl) checkOwner(t *meta.Meta, flag JobType) (*model.Owner, error) { 109 var owner *model.Owner 110 var err error 111 112 switch flag { 113 case ddlJobFlag: 114 owner, err = t.GetDDLJobOwner() 115 case bgJobFlag: 116 owner, err = t.GetBgJobOwner() 117 default: 118 err = errInvalidJobFlag 119 } 120 if err != nil { 121 return nil, errors.Trace(err) 122 } 123 124 if owner == nil { 125 owner = &model.Owner{} 126 // try to set onwer 127 owner.OwnerID = d.uuid 128 } 129 130 now := time.Now().UnixNano() 131 // we must wait 2 * lease time to guarantee other servers update the schema, 132 // the owner will update its owner status every 2 * lease time, so here we use 133 // 4 * lease to check its timeout. 134 maxTimeout := int64(4 * d.lease) 135 if owner.OwnerID == d.uuid || now-owner.LastUpdateTS > maxTimeout { 136 owner.OwnerID = d.uuid 137 owner.LastUpdateTS = now 138 // update status. 139 switch flag { 140 case ddlJobFlag: 141 err = t.SetDDLJobOwner(owner) 142 case bgJobFlag: 143 err = t.SetBgJobOwner(owner) 144 } 145 if err != nil { 146 return nil, errors.Trace(err) 147 } 148 log.Debugf("[ddl] become %s job owner %s", flag, owner.OwnerID) 149 } 150 151 if owner.OwnerID != d.uuid { 152 log.Debugf("[ddl] not %s job owner, owner is %s", flag, owner.OwnerID) 153 return nil, errors.Trace(errNotOwner) 154 } 155 156 return owner, nil 157 } 158 159 func (d *ddl) getFirstDDLJob(t *meta.Meta) (*model.Job, error) { 160 job, err := t.GetDDLJob(0) 161 return job, errors.Trace(err) 162 } 163 164 // every time we enter another state except final state, we must call this function. 165 func (d *ddl) updateDDLJob(t *meta.Meta, job *model.Job) error { 166 err := t.UpdateDDLJob(0, job) 167 return errors.Trace(err) 168 } 169 170 func (d *ddl) finishDDLJob(t *meta.Meta, job *model.Job) error { 171 log.Warnf("[ddl] finish DDL job %v", job) 172 // done, notice and run next job. 173 _, err := t.DeQueueDDLJob() 174 if err != nil { 175 return errors.Trace(err) 176 } 177 switch job.Type { 178 case model.ActionDropSchema, model.ActionDropTable: 179 if err = d.prepareBgJob(job); err != nil { 180 return errors.Trace(err) 181 } 182 } 183 184 err = t.AddHistoryDDLJob(job) 185 return errors.Trace(err) 186 } 187 188 // JobType is job type, including ddl/background. 189 type JobType int 190 191 const ( 192 ddlJobFlag = iota + 1 193 bgJobFlag 194 ) 195 196 func (j JobType) String() string { 197 switch j { 198 case ddlJobFlag: 199 return "ddl" 200 case bgJobFlag: 201 return "background" 202 } 203 204 return "unknown" 205 } 206 207 func (d *ddl) handleDDLJobQueue() error { 208 for { 209 if d.isClosed() { 210 return nil 211 } 212 213 waitTime := 2 * d.lease 214 215 var job *model.Job 216 err := kv.RunInNewTxn(d.store, false, func(txn kv.Transaction) error { 217 t := meta.NewMeta(txn) 218 owner, err := d.checkOwner(t, ddlJobFlag) 219 if terror.ErrorEqual(err, errNotOwner) { 220 // we are not owner, return and retry checking later. 221 return nil 222 } else if err != nil { 223 return errors.Trace(err) 224 } 225 226 // become the owner 227 // get the first job and run 228 job, err = d.getFirstDDLJob(t) 229 if job == nil || err != nil { 230 return errors.Trace(err) 231 } 232 233 if job.IsRunning() { 234 // if we enter a new state, crash when waiting 2 * lease time, and restart quickly, 235 // we may run the job immediately again, but we don't wait enough 2 * lease time to 236 // let other servers update the schema. 237 // so here we must check the elapsed time from last update, if < 2 * lease, we must 238 // wait again. 239 elapsed := time.Duration(time.Now().UnixNano() - job.LastUpdateTS) 240 if elapsed > 0 && elapsed < waitTime { 241 log.Warnf("[ddl] the elapsed time from last update is %s < %s, wait again", elapsed, waitTime) 242 waitTime -= elapsed 243 return nil 244 } 245 } 246 247 log.Warnf("[ddl] run DDL job %v", job) 248 249 d.hook.OnJobRunBefore(job) 250 251 // if run job meets error, we will save this error in job Error 252 // and retry later if the job is not cancelled. 253 d.runDDLJob(t, job) 254 255 if job.IsFinished() { 256 err = d.finishDDLJob(t, job) 257 } else { 258 err = d.updateDDLJob(t, job) 259 } 260 if err != nil { 261 return errors.Trace(err) 262 } 263 264 // running job may cost some time, so here we must update owner status to 265 // prevent other become the owner. 266 owner.LastUpdateTS = time.Now().UnixNano() 267 err = t.SetDDLJobOwner(owner) 268 269 return errors.Trace(err) 270 }) 271 if err != nil { 272 return errors.Trace(err) 273 } else if job == nil { 274 // no job now, return and retry get later. 275 return nil 276 } 277 278 d.hook.OnJobUpdated(job) 279 280 // here means the job enters another state (delete only, write only, public, etc...) or is cancelled. 281 // if the job is done or still running, we will wait 2 * lease time to guarantee other servers to update 282 // the newest schema. 283 if job.State == model.JobRunning || job.State == model.JobDone { 284 d.waitSchemaChanged(waitTime) 285 } 286 287 if job.IsFinished() { 288 d.startBgJob(job.Type) 289 asyncNotify(d.ddlJobDoneCh) 290 } 291 } 292 } 293 294 func chooseLeaseTime(n1 time.Duration, n2 time.Duration) time.Duration { 295 if n1 > 0 { 296 return n1 297 } 298 299 return n2 300 } 301 302 // onDDLWorker is for async online schema change, it will try to become the owner first, 303 // then wait or pull the job queue to handle a schema change job. 304 func (d *ddl) onDDLWorker() { 305 defer d.wait.Done() 306 307 // we use 4 * lease time to check owner's timeout, so here, we will update owner's status 308 // every 2 * lease time, if lease is 0, we will use default 10s. 309 checkTime := chooseLeaseTime(2*d.lease, 10*time.Second) 310 311 ticker := time.NewTicker(checkTime) 312 defer ticker.Stop() 313 314 for { 315 select { 316 case <-ticker.C: 317 log.Debugf("[ddl] wait %s to check DDL status again", checkTime) 318 case <-d.ddlJobCh: 319 case <-d.quitCh: 320 return 321 } 322 323 err := d.handleDDLJobQueue() 324 if err != nil { 325 log.Errorf("[ddl] handle ddl job err %v", errors.ErrorStack(err)) 326 } 327 } 328 } 329 330 func (d *ddl) runDDLJob(t *meta.Meta, job *model.Job) { 331 if job.IsFinished() { 332 return 333 } 334 335 job.State = model.JobRunning 336 337 var err error 338 switch job.Type { 339 case model.ActionCreateSchema: 340 err = d.onCreateSchema(t, job) 341 case model.ActionDropSchema: 342 err = d.onDropSchema(t, job) 343 case model.ActionCreateTable: 344 err = d.onCreateTable(t, job) 345 case model.ActionDropTable: 346 err = d.onDropTable(t, job) 347 case model.ActionAddColumn: 348 err = d.onAddColumn(t, job) 349 case model.ActionDropColumn: 350 err = d.onDropColumn(t, job) 351 case model.ActionAddIndex: 352 err = d.onCreateIndex(t, job) 353 case model.ActionDropIndex: 354 err = d.onDropIndex(t, job) 355 case model.ActionAddForeignKey: 356 err = d.onCreateForeignKey(t, job) 357 case model.ActionDropForeignKey: 358 err = d.onDropForeignKey(t, job) 359 default: 360 // invalid job, cancel it. 361 job.State = model.JobCancelled 362 err = errInvalidDDLJob.Gen("invalid ddl job %v", job) 363 } 364 365 // saves error in job, so that others can know error happens. 366 if err != nil { 367 // if job is not cancelled, we should log this error. 368 if job.State != model.JobCancelled { 369 log.Errorf("run ddl job err %v", errors.ErrorStack(err)) 370 } 371 372 job.Error = err.Error() 373 job.ErrorCount++ 374 } 375 } 376 377 // for every lease seconds, we will re-update the whole schema, so we will wait 2 * lease time 378 // to guarantee that all servers have already updated schema. 379 func (d *ddl) waitSchemaChanged(waitTime time.Duration) { 380 if waitTime == 0 { 381 return 382 } 383 384 select { 385 case <-time.After(waitTime): 386 case <-d.quitCh: 387 } 388 }