github.com/square/finch@v0.0.0-20240412205204-6530c03e2b96/limit/data.go (about)

     1  package limit
     2  
     3  import (
     4  	"context"
     5  	"database/sql"
     6  	"log"
     7  	"strings"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/dustin/go-humanize"
    12  
    13  	"github.com/square/finch"
    14  )
    15  
    16  type Data interface {
    17  	Affected(n int64)
    18  	More(*sql.Conn) bool
    19  }
    20  
    21  // --------------------------------------------------------------------------
    22  
    23  type or struct {
    24  	c chan bool
    25  	n uint64
    26  	a Data
    27  	b Data
    28  }
    29  
    30  var _ Data = or{}
    31  
    32  // Or makes a Data limiter that allows more data until a or b reaches its limit.
    33  // This is used to combine row and size limits, like "insert 1M rows or 2G of data,
    34  // whichever occurs first."
    35  func Or(a, b Data) Data {
    36  	if a == nil && b == nil {
    37  		return nil
    38  	}
    39  	if a == nil && b != nil {
    40  		return b
    41  	}
    42  	if a != nil && b == nil {
    43  		return a
    44  	}
    45  	lm := or{
    46  		a: a,
    47  		b: b,
    48  		c: make(chan bool, 1),
    49  	}
    50  	return lm
    51  }
    52  
    53  func (lm or) Affected(n int64) {
    54  	lm.a.Affected(n)
    55  	lm.b.Affected(n)
    56  }
    57  
    58  func (lm or) More(conn *sql.Conn) bool {
    59  	return lm.a.More(conn) && lm.b.More(conn)
    60  }
    61  
    62  // --------------------------------------------------------------------------
    63  
    64  type Rows struct {
    65  	max int64
    66  	n   int64
    67  	p   float64 // = size / max * 100
    68  	r   uint    // report p every r%
    69  	t   time.Time
    70  	pn  int64
    71  	*sync.Mutex
    72  }
    73  
    74  var _ Data = &Rows{}
    75  
    76  func NewRows(max, offset int64) *Rows {
    77  	if max == 0 {
    78  		return nil
    79  	}
    80  	lm := &Rows{
    81  		max:   max,
    82  		Mutex: &sync.Mutex{},
    83  		r:     5,
    84  		n:     offset,
    85  		pn:    offset,
    86  	}
    87  	return lm
    88  }
    89  
    90  func (lm *Rows) Affected(n int64) {
    91  	lm.Lock()
    92  	lm.n += n
    93  	// Report progress every r%
    94  	p := float64(lm.n) / float64(lm.max) * 100
    95  	if p-lm.p > float64(lm.r) {
    96  		d := time.Now().Sub(lm.t)
    97  		rate := float64(lm.n-lm.pn) / d.Seconds()
    98  		eta := time.Duration(float64(lm.max-lm.n)/rate) * time.Second
    99  		log.Printf("%s / %s = %.1f%% in %s: %s rows/s (ETA %s)\n",
   100  			humanize.Comma(lm.n), humanize.Comma(lm.max), p, d.Round(time.Second), humanize.Comma(int64(rate)), eta)
   101  		lm.p = p
   102  		lm.t = time.Now()
   103  		lm.pn = lm.n
   104  	}
   105  	lm.Unlock()
   106  }
   107  
   108  func (lm *Rows) More(_ *sql.Conn) bool {
   109  	lm.Lock()
   110  	if lm.t.IsZero() {
   111  		lm.t = time.Now()
   112  	}
   113  	more := lm.n < lm.max
   114  	lm.Unlock()
   115  	return more
   116  }
   117  
   118  // --------------------------------------------------------------------------
   119  
   120  type SizeFunc func(*sql.Conn) (uint64, error)
   121  
   122  type Size struct {
   123  	max     uint64 // 200000000, converted from maxStr
   124  	maxStr  string // 200MB, exactly as specified by user
   125  	db      string // database-size: DB maxStr
   126  	tbl     string // table-size: TABLE maxStr
   127  	query   string
   128  	analyze string
   129  	n       uint    // calls to More
   130  	m       uint    // how often to check stats: n % m
   131  	p       float64 // = size / max * 100
   132  	r       uint    // report p every r%
   133  	t       time.Time
   134  	bytes   uint64
   135  	*sync.Mutex
   136  }
   137  
   138  var _ Data = &Size{}
   139  
   140  func NewSize(max uint64, maxStr string, db, tbl string) *Size {
   141  	if db == "" && tbl == "" {
   142  		panic("limit.NewSize called without a db or tbl name")
   143  	}
   144  
   145  	// ANALYZE TABLE every n % m == 0. Default m=5 so we don't check too often.
   146  	// But if max size is small, <=1G, that will probably be written very quickly,
   147  	// so check every 3rd call to avoid surpassing the max by too much.
   148  	var m uint = 5
   149  	var r uint = 5
   150  	if max <= 1073741824 { // 1G
   151  		m = 3
   152  		r = 10
   153  	}
   154  	if max >= 107374182400 { // 100 GB
   155  		m = 1000
   156  		r = 2
   157  	}
   158  
   159  	finch.Debug("limit size db %s tbl %s = %d bytes (m=%d r=%d)", db, tbl, max, m, r)
   160  	lm := &Size{
   161  		db:     db,
   162  		tbl:    tbl,
   163  		max:    max,
   164  		maxStr: maxStr,
   165  		Mutex:  &sync.Mutex{},
   166  		m:      m,
   167  		r:      r,
   168  	}
   169  	return lm
   170  }
   171  
   172  func (lm *Size) Affected(n int64) {
   173  }
   174  
   175  func (lm *Size) More(conn *sql.Conn) bool {
   176  	lm.Lock()
   177  	defer lm.Unlock()
   178  
   179  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
   180  	defer cancel()
   181  
   182  	// Set queries on first call
   183  	if lm.query == "" {
   184  		lm.query = "SELECT COALESCE(data_length + index_length, 0) AS bytes FROM information_schema.TABLES WHERE "
   185  		if lm.db != "" {
   186  			log.Printf("Database size limit: %s %s (progress report every %d%%)", lm.db, lm.maxStr, lm.r)
   187  			lm.query += "table_schema='" + lm.db + "'"
   188  
   189  			var tbls []string
   190  			rows, err := conn.QueryContext(ctx, "SHOW FULL TABLES")
   191  			if err != nil {
   192  				log.Printf("Error running SHOW FULL TABLES: %s", err)
   193  				return false
   194  			}
   195  			rows.Close()
   196  			for rows.Next() {
   197  				var name, base string
   198  				err = rows.Scan(&name, &base)
   199  				if err != nil {
   200  					break
   201  				}
   202  				if base != "BASE TABLE" {
   203  					continue
   204  				}
   205  				tbls = append(tbls, name)
   206  			}
   207  			lm.analyze = "ANALYZE TABLE " + strings.Join(tbls, ", ")
   208  		} else {
   209  			log.Printf("Table size limit: %s %s (progress report every %d%%)", lm.tbl, lm.maxStr, lm.r)
   210  			err := conn.QueryRowContext(ctx, "SELECT DATABASE()").Scan(&lm.db)
   211  			if err != nil {
   212  				log.Printf("Error getting current database: %s", err)
   213  				return false
   214  			}
   215  			lm.query += "table_schema='" + lm.db + "' AND table_name='" + lm.tbl + "'"
   216  			lm.analyze = "ANALYZE TABLE " + lm.tbl
   217  		}
   218  		finch.Debug(lm.query)
   219  		finch.Debug(lm.analyze)
   220  
   221  		lm.t = time.Now()
   222  	}
   223  
   224  	// Every few calls, run ANALYZE TABLE to update the stats, then fech latest size
   225  	lm.n++
   226  	if lm.n%lm.m != 0 {
   227  		return true // not time to check; presume there's more to load
   228  	}
   229  
   230  	if _, err := conn.ExecContext(ctx, lm.analyze); err != nil {
   231  		log.Printf("Error running ANALYZE TABLE: %s", err)
   232  		return false
   233  	}
   234  
   235  	// Get database/table size in bytes
   236  	var bytes uint64
   237  	err := conn.QueryRowContext(ctx, lm.query).Scan(&bytes)
   238  	if err != nil {
   239  		log.Printf("Error query data size: %s", err)
   240  		return false
   241  	}
   242  
   243  	// Report progress every r%
   244  	p := float64(bytes) / float64(lm.max) * 100
   245  	if p-lm.p > float64(lm.r) {
   246  		d := time.Now().Sub(lm.t)
   247  		rate := float64(bytes-lm.bytes) / d.Seconds()
   248  		eta := time.Duration(float64(lm.max-bytes)/rate) * time.Second
   249  		log.Printf("%s / %s = %.1f%% in %s: %s/s (ETA %s)\n",
   250  			humanize.Bytes(bytes), lm.maxStr, p, d.Round(time.Second), humanize.Bytes(uint64(rate)), eta)
   251  		lm.p = p
   252  		lm.t = time.Now()
   253  		lm.bytes = bytes
   254  	}
   255  
   256  	return bytes < lm.max
   257  }