github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/dtables/statistics_table.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package dtables 16 17 import ( 18 "fmt" 19 20 "github.com/dolthub/go-mysql-server/sql" 21 "github.com/dolthub/go-mysql-server/sql/stats" 22 23 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 24 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 25 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" 26 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/index" 27 ) 28 29 // StatisticsTable is a sql.Table implementation that implements a system table which shows the dolt commit log 30 type StatisticsTable struct { 31 dbName string 32 branch string 33 ddb *doltdb.DoltDB 34 } 35 36 var _ sql.Table = (*StatisticsTable)(nil) 37 var _ sql.StatisticsTable = (*StatisticsTable)(nil) 38 39 // NewStatisticsTable creates a StatisticsTable 40 func NewStatisticsTable(_ *sql.Context, dbName string, ddb *doltdb.DoltDB, asOf interface{}) sql.Table { 41 ret := &StatisticsTable{dbName: dbName, ddb: ddb} 42 if branch, ok := asOf.(string); ok { 43 ret.branch = branch 44 } 45 return ret 46 } 47 48 // DataLength implements sql.StatisticsTable 49 func (st *StatisticsTable) DataLength(ctx *sql.Context) (uint64, error) { 50 numBytesPerRow := schema.SchemaAvgLength(schema.StatsTableSqlSchema(st.dbName).Schema) 51 numRows, _, err := st.RowCount(ctx) 52 if err != nil { 53 return 0, err 54 } 55 56 // maxSize is the upper bound for how much space a table takes up on disk. It will typically 57 // greatly overestimate the actual size of the table on disk because it does not take into 58 // account that the data on disk is compressed and it assumes that every variable length 59 // field is fully used. Because of this, maxSize can easily be several orders of magnitude 60 // larger than the actual space used by the table on disk. 61 maxSize := numBytesPerRow * numRows 62 63 // To return a more realistic estimate of the size of the table on disk, we multiply maxSize by 64 // compressionFactor. This will still not give an accurate size of the table on disk, but it 65 // will generally be much closer than maxSize. This value comes from quickly testing some dbs 66 // with only columns that have a fixed length (e.g. int) and some with only columns that have 67 // a variable length (e.g. TEXT). 0.002 was between the two sets of values. Ultimately, having 68 // accurate table statistics is a better long term solution for this. 69 // https://github.com/dolthub/dolt/issues/6624 70 const compressionFactor = 0.002 71 estimatedSize := float64(maxSize) * compressionFactor 72 return uint64(estimatedSize), nil 73 } 74 75 type BranchStatsProvider interface { 76 GetTableDoltStats(ctx *sql.Context, branch, db, table string) ([]sql.Statistic, error) 77 } 78 79 // RowCount implements sql.StatisticsTable 80 func (st *StatisticsTable) RowCount(ctx *sql.Context) (uint64, bool, error) { 81 dSess := dsess.DSessFromSess(ctx.Session) 82 prov := dSess.Provider() 83 84 sqlDb, err := prov.Database(ctx, st.dbName) 85 if err != nil { 86 return 0, false, err 87 } 88 89 tables, err := sqlDb.GetTableNames(ctx) 90 if err != nil { 91 return 0, false, err 92 } 93 94 var cnt int 95 for _, table := range tables { 96 // only Dolt-specific provider has branch support 97 dbStats, err := dSess.StatsProvider().(BranchStatsProvider).GetTableDoltStats(ctx, st.branch, st.dbName, table) 98 if err != nil { 99 100 } 101 for _, dbStat := range dbStats { 102 cnt += len(dbStat.Histogram()) 103 } 104 } 105 106 return uint64(cnt), true, nil 107 } 108 109 // Name is a sql.Table interface function which returns the name of the table which is defined by the constant 110 // StatisticsTableName 111 func (st *StatisticsTable) Name() string { 112 return doltdb.StatisticsTableName 113 } 114 115 // String is a sql.Table interface function which returns the name of the table which is defined by the constant 116 // StatisticsTableName 117 func (st *StatisticsTable) String() string { 118 return doltdb.StatisticsTableName 119 } 120 121 // Schema is a sql.Table interface function that gets the sql.Schema of the log system table. 122 func (st *StatisticsTable) Schema() sql.Schema { 123 return schema.StatsTableSqlSchema(st.dbName).Schema 124 } 125 126 // Collation implements the sql.Table interface. 127 func (st *StatisticsTable) Collation() sql.CollationID { 128 return sql.Collation_Default 129 } 130 131 // Partitions is a sql.Table interface function that returns a partition of the data. Currently the data is unpartitioned. 132 func (st *StatisticsTable) Partitions(*sql.Context) (sql.PartitionIter, error) { 133 return index.SinglePartitionIterFromNomsMap(nil), nil 134 } 135 136 // PartitionRows is a sql.Table interface function that gets a row iterator for a partition 137 func (st *StatisticsTable) PartitionRows(ctx *sql.Context, _ sql.Partition) (sql.RowIter, error) { 138 dSess := dsess.DSessFromSess(ctx.Session) 139 prov := dSess.Provider() 140 141 var sqlDb sql.Database 142 var err error 143 if st.branch != "" { 144 sqlDb, err = prov.Database(ctx, fmt.Sprintf("%s/%s", st.dbName, st.branch)) 145 } else { 146 sqlDb, err = prov.Database(ctx, st.dbName) 147 } 148 if err != nil { 149 return nil, err 150 } 151 152 tables, err := sqlDb.GetTableNames(ctx) 153 if err != nil { 154 return nil, err 155 } 156 157 statsPro := dSess.StatsProvider().(BranchStatsProvider) 158 var dStats []sql.Statistic 159 for _, table := range tables { 160 dbStats, err := statsPro.GetTableDoltStats(ctx, st.branch, st.dbName, table) 161 if err != nil { 162 return nil, err 163 } 164 dStats = append(dStats, dbStats...) 165 } 166 return stats.NewStatsIter(ctx, dStats...) 167 } 168 169 // PreciseMatch implements sql.IndexAddressable 170 func (st *StatisticsTable) PreciseMatch() bool { 171 return true 172 }