github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/db/merge/mod.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package merge
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"strconv"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    27  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    28  	"github.com/matrixorigin/matrixone/pkg/objectio"
    29  	"github.com/matrixorigin/matrixone/pkg/pb/api"
    30  	taskpb "github.com/matrixorigin/matrixone/pkg/pb/task"
    31  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    32  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog"
    33  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    34  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/txnif"
    35  )
    36  
    37  var StopMerge atomic.Bool
    38  
    39  type CNMergeScheduler interface {
    40  	SendMergeTask(ctx context.Context, task *api.MergeTaskEntry) error
    41  }
    42  
    43  func NewTaskServiceGetter(getter taskservice.Getter) CNMergeScheduler {
    44  	return &taskServiceGetter{
    45  		Getter: getter,
    46  	}
    47  }
    48  
    49  type taskServiceGetter struct {
    50  	taskservice.Getter
    51  }
    52  
    53  func (tsg *taskServiceGetter) SendMergeTask(ctx context.Context, task *api.MergeTaskEntry) error {
    54  	ts, ok := tsg.Getter()
    55  	if !ok {
    56  		return taskservice.ErrNotReady
    57  	}
    58  	taskIDPrefix := "Merge:" + task.TableName
    59  	asyncTask, err := ts.QueryAsyncTask(ctx,
    60  		taskservice.WithTaskMetadataId(taskservice.LIKE, taskIDPrefix+"%"),
    61  		taskservice.WithTaskStatusCond(taskpb.TaskStatus_Created, taskpb.TaskStatus_Running))
    62  	if err != nil {
    63  		return err
    64  	}
    65  	if len(asyncTask) != 0 {
    66  		return moerr.NewInternalError(ctx, fmt.Sprintf("table %q is merging", task.TableName))
    67  	}
    68  	b, err := task.Marshal()
    69  	if err != nil {
    70  		return err
    71  	}
    72  	return ts.CreateAsyncTask(ctx,
    73  		taskpb.TaskMetadata{
    74  			ID:       taskIDPrefix + ":" + strconv.FormatInt(time.Now().Unix(), 10),
    75  			Executor: taskpb.TaskCode_MergeObject,
    76  			Context:  b,
    77  			Options:  taskpb.TaskOptions{Resource: &taskpb.Resource{Memory: task.EstimatedMemUsage}},
    78  		})
    79  }
    80  
    81  type TaskHostKind int
    82  
    83  const (
    84  	TaskHostCN TaskHostKind = iota
    85  	TaskHostDN
    86  )
    87  
    88  type activeEntry struct {
    89  	tid      uint64
    90  	insertAt time.Time
    91  }
    92  
    93  var ActiveCNObj ActiveCNObjMap = ActiveCNObjMap{
    94  	o: make(map[objectio.ObjectId]activeEntry),
    95  }
    96  
    97  type ActiveCNObjMap struct {
    98  	sync.Mutex
    99  	o map[objectio.ObjectId]activeEntry
   100  }
   101  
   102  func (e *ActiveCNObjMap) Prune(id uint64, ago time.Duration) {
   103  	e.Lock()
   104  	defer e.Unlock()
   105  	now := time.Now()
   106  	if ago == 0 {
   107  		for k, v := range e.o {
   108  			if v.tid == id {
   109  				delete(e.o, k)
   110  			}
   111  		}
   112  		return
   113  	}
   114  
   115  	if id == 0 && ago > 1*time.Second {
   116  		for k, v := range e.o {
   117  			if now.Sub(v.insertAt) > ago {
   118  				delete(e.o, k)
   119  			}
   120  		}
   121  		return
   122  	}
   123  	for k, v := range e.o {
   124  		if v.tid == id && now.Sub(v.insertAt) > ago {
   125  			delete(e.o, k)
   126  		}
   127  	}
   128  }
   129  
   130  func (e *ActiveCNObjMap) String() string {
   131  	e.Lock()
   132  	defer e.Unlock()
   133  
   134  	b := &bytes.Buffer{}
   135  	now := time.Now()
   136  	for k, v := range e.o {
   137  		b.WriteString(fmt.Sprintf(" id: %v, table: %v, insertAt: %s ago\n",
   138  			k.String(), v.tid, now.Sub(v.insertAt).String()))
   139  	}
   140  	return b.String()
   141  }
   142  
   143  func (e *ActiveCNObjMap) AddActiveCNObj(entries []*catalog.ObjectEntry) {
   144  	e.Lock()
   145  	for _, entry := range entries {
   146  		e.o[entry.ID] = activeEntry{
   147  			entry.GetTable().ID,
   148  			time.Now(),
   149  		}
   150  	}
   151  	e.Unlock()
   152  }
   153  
   154  func (e *ActiveCNObjMap) RemoveActiveCNObj(ids []objectio.ObjectId) {
   155  	e.Lock()
   156  	defer e.Unlock()
   157  	for _, id := range ids {
   158  		delete(e.o, id)
   159  	}
   160  }
   161  
   162  func (e *ActiveCNObjMap) CheckOverlapOnCNActive(entries []*catalog.ObjectEntry) bool {
   163  	e.Lock()
   164  	defer e.Unlock()
   165  	for _, entry := range entries {
   166  		if _, ok := e.o[entry.ID]; ok {
   167  			return true
   168  		}
   169  	}
   170  	return false
   171  }
   172  
   173  func CleanUpUselessFiles(entry *api.MergeCommitEntry, fs fileservice.FileService) {
   174  	if entry == nil {
   175  		return
   176  	}
   177  	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
   178  	defer cancel()
   179  	if len(entry.BookingLoc) != 0 {
   180  		loc := objectio.Location(entry.BookingLoc)
   181  		_ = fs.Delete(ctx, loc.Name().String())
   182  	}
   183  	if len(entry.CreatedObjs) != 0 {
   184  		for _, obj := range entry.CreatedObjs {
   185  			if len(obj) == 0 {
   186  				continue
   187  			}
   188  			s := objectio.ObjectStats(obj)
   189  			_ = fs.Delete(ctx, s.ObjectName().String())
   190  		}
   191  	}
   192  }
   193  
   194  const (
   195  	constMergeMinBlks       = 5
   196  	constMergeExpansionRate = 6
   197  	constMaxMemCap          = 4 * constMergeExpansionRate * common.Const1GBytes // max orginal memory for a object
   198  	constSmallMergeGap      = 3 * time.Minute
   199  )
   200  
   201  type Policy interface {
   202  	OnObject(obj *catalog.ObjectEntry)
   203  	Revise(cpu, mem int64) ([]*catalog.ObjectEntry, TaskHostKind)
   204  	ResetForTable(*catalog.TableEntry)
   205  	SetConfig(*catalog.TableEntry, func() txnif.AsyncTxn, any)
   206  	GetConfig(*catalog.TableEntry) any
   207  }
   208  
   209  func NewUpdatePolicyReq(c *BasicPolicyConfig) *api.AlterTableReq {
   210  	return &api.AlterTableReq{
   211  		Kind: api.AlterKind_UpdatePolicy,
   212  		Operation: &api.AlterTableReq_UpdatePolicy{
   213  			UpdatePolicy: &api.AlterTablePolicy{
   214  				MinOsizeQuailifed: uint32(c.ObjectMinOsize),
   215  				MaxObjOnerun:      uint32(c.MergeMaxOneRun),
   216  				MaxOsizeMergedObj: uint32(c.MaxOsizeMergedObj),
   217  				MinCnMergeSize:    uint64(c.MinCNMergeSize),
   218  				Hints:             c.MergeHints,
   219  			},
   220  		},
   221  	}
   222  }