github.com/square/finch@v0.0.0-20240412205204-6530c03e2b96/limit/data.go (about) 1 package limit 2 3 import ( 4 "context" 5 "database/sql" 6 "log" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/dustin/go-humanize" 12 13 "github.com/square/finch" 14 ) 15 16 type Data interface { 17 Affected(n int64) 18 More(*sql.Conn) bool 19 } 20 21 // -------------------------------------------------------------------------- 22 23 type or struct { 24 c chan bool 25 n uint64 26 a Data 27 b Data 28 } 29 30 var _ Data = or{} 31 32 // Or makes a Data limiter that allows more data until a or b reaches its limit. 33 // This is used to combine row and size limits, like "insert 1M rows or 2G of data, 34 // whichever occurs first." 35 func Or(a, b Data) Data { 36 if a == nil && b == nil { 37 return nil 38 } 39 if a == nil && b != nil { 40 return b 41 } 42 if a != nil && b == nil { 43 return a 44 } 45 lm := or{ 46 a: a, 47 b: b, 48 c: make(chan bool, 1), 49 } 50 return lm 51 } 52 53 func (lm or) Affected(n int64) { 54 lm.a.Affected(n) 55 lm.b.Affected(n) 56 } 57 58 func (lm or) More(conn *sql.Conn) bool { 59 return lm.a.More(conn) && lm.b.More(conn) 60 } 61 62 // -------------------------------------------------------------------------- 63 64 type Rows struct { 65 max int64 66 n int64 67 p float64 // = size / max * 100 68 r uint // report p every r% 69 t time.Time 70 pn int64 71 *sync.Mutex 72 } 73 74 var _ Data = &Rows{} 75 76 func NewRows(max, offset int64) *Rows { 77 if max == 0 { 78 return nil 79 } 80 lm := &Rows{ 81 max: max, 82 Mutex: &sync.Mutex{}, 83 r: 5, 84 n: offset, 85 pn: offset, 86 } 87 return lm 88 } 89 90 func (lm *Rows) Affected(n int64) { 91 lm.Lock() 92 lm.n += n 93 // Report progress every r% 94 p := float64(lm.n) / float64(lm.max) * 100 95 if p-lm.p > float64(lm.r) { 96 d := time.Now().Sub(lm.t) 97 rate := float64(lm.n-lm.pn) / d.Seconds() 98 eta := time.Duration(float64(lm.max-lm.n)/rate) * time.Second 99 log.Printf("%s / %s = %.1f%% in %s: %s rows/s (ETA %s)\n", 100 humanize.Comma(lm.n), humanize.Comma(lm.max), p, d.Round(time.Second), humanize.Comma(int64(rate)), eta) 101 lm.p = p 102 lm.t = time.Now() 103 lm.pn = lm.n 104 } 105 lm.Unlock() 106 } 107 108 func (lm *Rows) More(_ *sql.Conn) bool { 109 lm.Lock() 110 if lm.t.IsZero() { 111 lm.t = time.Now() 112 } 113 more := lm.n < lm.max 114 lm.Unlock() 115 return more 116 } 117 118 // -------------------------------------------------------------------------- 119 120 type SizeFunc func(*sql.Conn) (uint64, error) 121 122 type Size struct { 123 max uint64 // 200000000, converted from maxStr 124 maxStr string // 200MB, exactly as specified by user 125 db string // database-size: DB maxStr 126 tbl string // table-size: TABLE maxStr 127 query string 128 analyze string 129 n uint // calls to More 130 m uint // how often to check stats: n % m 131 p float64 // = size / max * 100 132 r uint // report p every r% 133 t time.Time 134 bytes uint64 135 *sync.Mutex 136 } 137 138 var _ Data = &Size{} 139 140 func NewSize(max uint64, maxStr string, db, tbl string) *Size { 141 if db == "" && tbl == "" { 142 panic("limit.NewSize called without a db or tbl name") 143 } 144 145 // ANALYZE TABLE every n % m == 0. Default m=5 so we don't check too often. 146 // But if max size is small, <=1G, that will probably be written very quickly, 147 // so check every 3rd call to avoid surpassing the max by too much. 148 var m uint = 5 149 var r uint = 5 150 if max <= 1073741824 { // 1G 151 m = 3 152 r = 10 153 } 154 if max >= 107374182400 { // 100 GB 155 m = 1000 156 r = 2 157 } 158 159 finch.Debug("limit size db %s tbl %s = %d bytes (m=%d r=%d)", db, tbl, max, m, r) 160 lm := &Size{ 161 db: db, 162 tbl: tbl, 163 max: max, 164 maxStr: maxStr, 165 Mutex: &sync.Mutex{}, 166 m: m, 167 r: r, 168 } 169 return lm 170 } 171 172 func (lm *Size) Affected(n int64) { 173 } 174 175 func (lm *Size) More(conn *sql.Conn) bool { 176 lm.Lock() 177 defer lm.Unlock() 178 179 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 180 defer cancel() 181 182 // Set queries on first call 183 if lm.query == "" { 184 lm.query = "SELECT COALESCE(data_length + index_length, 0) AS bytes FROM information_schema.TABLES WHERE " 185 if lm.db != "" { 186 log.Printf("Database size limit: %s %s (progress report every %d%%)", lm.db, lm.maxStr, lm.r) 187 lm.query += "table_schema='" + lm.db + "'" 188 189 var tbls []string 190 rows, err := conn.QueryContext(ctx, "SHOW FULL TABLES") 191 if err != nil { 192 log.Printf("Error running SHOW FULL TABLES: %s", err) 193 return false 194 } 195 rows.Close() 196 for rows.Next() { 197 var name, base string 198 err = rows.Scan(&name, &base) 199 if err != nil { 200 break 201 } 202 if base != "BASE TABLE" { 203 continue 204 } 205 tbls = append(tbls, name) 206 } 207 lm.analyze = "ANALYZE TABLE " + strings.Join(tbls, ", ") 208 } else { 209 log.Printf("Table size limit: %s %s (progress report every %d%%)", lm.tbl, lm.maxStr, lm.r) 210 err := conn.QueryRowContext(ctx, "SELECT DATABASE()").Scan(&lm.db) 211 if err != nil { 212 log.Printf("Error getting current database: %s", err) 213 return false 214 } 215 lm.query += "table_schema='" + lm.db + "' AND table_name='" + lm.tbl + "'" 216 lm.analyze = "ANALYZE TABLE " + lm.tbl 217 } 218 finch.Debug(lm.query) 219 finch.Debug(lm.analyze) 220 221 lm.t = time.Now() 222 } 223 224 // Every few calls, run ANALYZE TABLE to update the stats, then fech latest size 225 lm.n++ 226 if lm.n%lm.m != 0 { 227 return true // not time to check; presume there's more to load 228 } 229 230 if _, err := conn.ExecContext(ctx, lm.analyze); err != nil { 231 log.Printf("Error running ANALYZE TABLE: %s", err) 232 return false 233 } 234 235 // Get database/table size in bytes 236 var bytes uint64 237 err := conn.QueryRowContext(ctx, lm.query).Scan(&bytes) 238 if err != nil { 239 log.Printf("Error query data size: %s", err) 240 return false 241 } 242 243 // Report progress every r% 244 p := float64(bytes) / float64(lm.max) * 100 245 if p-lm.p > float64(lm.r) { 246 d := time.Now().Sub(lm.t) 247 rate := float64(bytes-lm.bytes) / d.Seconds() 248 eta := time.Duration(float64(lm.max-bytes)/rate) * time.Second 249 log.Printf("%s / %s = %.1f%% in %s: %s/s (ETA %s)\n", 250 humanize.Bytes(bytes), lm.maxStr, p, d.Round(time.Second), humanize.Bytes(uint64(rate)), eta) 251 lm.p = p 252 lm.t = time.Now() 253 lm.bytes = bytes 254 } 255 256 return bytes < lm.max 257 }