github.com/dolthub/go-mysql-server@v0.18.0/sql/rowexec/fulltext_filter.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package rowexec 16 17 import ( 18 "fmt" 19 "io" 20 21 "github.com/dolthub/go-mysql-server/sql" 22 "github.com/dolthub/go-mysql-server/sql/expression" 23 "github.com/dolthub/go-mysql-server/sql/fulltext" 24 ) 25 26 // FulltextFilterTable handles row iteration for filters involving Full-Text indexes, as they behave differently than 27 // other indexes. This acts as a sort of wrapper, so that integrators do not need to implement special logic on their 28 // side. 29 // 30 // This takes a MatchAgainst expression, as it will have already resolved the index and necessary tables, therefore we 31 // do not need to replicate the work here. Although they may seem similar in functionality, they are performing two 32 // different functions. This filter table determines if we need to calculate the relevancy of a word, by only returning 33 // rows that exist within our index tables. If a word does not exist within the tables, then we can assume that it has a 34 // relevancy of zero (for the default search mode). Therefore, we can skip processing that row altogether. The existence 35 // of a row does not imply that the relevancy value will be non-zero though, as the relevancy calculation can return a 36 // zero due to rounding (as is the case with the MyISAM backend, which we currently do not support). 37 type FulltextFilterTable struct { 38 MatchAgainst *expression.MatchAgainst 39 Table sql.TableNode 40 } 41 42 var _ sql.IndexedTable = (*FulltextFilterTable)(nil) 43 44 // Name implements the interface sql.IndexedTable. 45 func (f *FulltextFilterTable) Name() string { 46 return f.Table.Name() 47 } 48 49 // String implements the interface sql.IndexedTable. 50 func (f *FulltextFilterTable) String() string { 51 return f.Table.String() 52 } 53 54 // Schema implements the interface sql.IndexedTable. 55 func (f *FulltextFilterTable) Schema() sql.Schema { 56 return f.Table.Schema() 57 } 58 59 // Collation implements the interface sql.IndexedTable. 60 func (f *FulltextFilterTable) Collation() sql.CollationID { 61 return f.Table.Collation() 62 } 63 64 // Partitions implements the interface sql.IndexedTable. 65 func (f *FulltextFilterTable) Partitions(ctx *sql.Context) (sql.PartitionIter, error) { 66 if f.MatchAgainst.KeyCols.Type == fulltext.KeyType_None { 67 return f.Table.Partitions(ctx) 68 } 69 return &fulltextFilterTablePartitionIter{false}, nil 70 } 71 72 // PartitionRows implements the interface sql.IndexedTable. 73 func (f *FulltextFilterTable) PartitionRows(ctx *sql.Context, partition sql.Partition) (sql.RowIter, error) { 74 // Keyless just iterates over the entire table. Not the most performant, but it works. 75 if f.MatchAgainst.KeyCols.Type == fulltext.KeyType_None { 76 return f.Table.PartitionRows(ctx, partition) 77 } 78 79 // Get the word parser for our string literal 80 words, err := f.MatchAgainst.Expr.Eval(ctx, nil) 81 if err != nil { 82 return nil, err 83 } 84 wordsStr, ok := words.(string) 85 if !ok { 86 if words != nil { 87 return nil, fmt.Errorf("expected WORD to be a string, but had type `%T`", words) 88 } 89 } 90 collation := fulltext.GetCollationFromSchema(ctx, f.MatchAgainst.DocCountTable.Schema()) 91 parser, err := fulltext.NewDefaultParser(ctx, collation, wordsStr) 92 if err != nil { 93 return nil, err 94 } 95 96 // Get the primary key index for the document count table 97 docCountIndexes, err := f.MatchAgainst.DocCountTable.GetIndexes(ctx) 98 if err != nil { 99 return nil, err 100 } 101 // There should only be a single index on this table 102 if len(docCountIndexes) == 0 { 103 return nil, fmt.Errorf("expected to find a primary key on the table `%s`", f.MatchAgainst.DocCountTable.Name()) 104 } else if len(docCountIndexes) > 1 { 105 return nil, fmt.Errorf("found too many indexes on the table `%s`", f.MatchAgainst.DocCountTable.Name()) 106 } 107 108 // Get the primary or unique key index for the parent table 109 parentIndexes, err := f.MatchAgainst.ParentTable.GetIndexes(ctx) 110 if err != nil { 111 return nil, err 112 } 113 var parentIndex sql.Index 114 for _, index := range parentIndexes { 115 switch f.MatchAgainst.KeyCols.Type { 116 case fulltext.KeyType_Primary: 117 if index.ID() == "PRIMARY" { 118 parentIndex = index 119 break 120 } 121 case fulltext.KeyType_Unique: 122 if index.ID() == f.MatchAgainst.KeyCols.Name { 123 parentIndex = index 124 break 125 } 126 } 127 } 128 if parentIndex == nil { 129 return nil, fmt.Errorf("Full-Text filter cannot find the index on the table `%s`", f.MatchAgainst.ParentTable.Name()) 130 } 131 132 return &fulltextFilterTableRowIter{ 133 matchAgainst: f.MatchAgainst, 134 parser: parser, 135 parentIndex: parentIndex, 136 docCountIndex: docCountIndexes[0], 137 parentIter: nil, 138 docCountIter: nil, 139 }, nil 140 } 141 142 // LookupPartitions implements the interface sql.IndexedTable. 143 func (f *FulltextFilterTable) LookupPartitions(ctx *sql.Context, lookup sql.IndexLookup) (sql.PartitionIter, error) { 144 return f.Partitions(ctx) 145 } 146 147 // fulltextFilterTablePartition is a partition that is used exclusively by FulltextFilterTable. 148 type fulltextFilterTablePartition struct{} 149 150 var _ sql.Partition = fulltextFilterTablePartition{} 151 152 // Key implements the interface sql.Partition. 153 func (f fulltextFilterTablePartition) Key() []byte { 154 return nil 155 } 156 157 // fulltextFilterTablePartitionIter is a partition iterator that is used exclusively by FulltextFilterTable. 158 type fulltextFilterTablePartitionIter struct { 159 once bool 160 } 161 162 var _ sql.PartitionIter = (*fulltextFilterTablePartitionIter)(nil) 163 164 // Next implements the interface sql.PartitionIter. 165 func (f *fulltextFilterTablePartitionIter) Next(ctx *sql.Context) (sql.Partition, error) { 166 if !f.once { 167 f.once = true 168 return fulltextFilterTablePartition{}, nil 169 } 170 return nil, io.EOF 171 } 172 173 // Close implements the interface sql.PartitionIter. 174 func (f *fulltextFilterTablePartitionIter) Close(*sql.Context) error { 175 return nil 176 } 177 178 // fulltextFilterTableRowIter is a row iterator that is used exclusively by FulltextFilterTable. Handles the 179 // communication between multiple tables to function similarly to a regular indexed table row iterator. 180 type fulltextFilterTableRowIter struct { 181 matchAgainst *expression.MatchAgainst 182 parser fulltext.DefaultParser 183 parentIndex sql.Index 184 docCountIndex sql.Index 185 parentIter *sql.TableRowIter 186 docCountIter *sql.TableRowIter 187 } 188 189 var _ sql.RowIter = (*fulltextFilterTableRowIter)(nil) 190 191 // Next implements the interface sql.RowIter. 192 func (f *fulltextFilterTableRowIter) Next(ctx *sql.Context) (sql.Row, error) { 193 for { 194 // If we don't have an iterator for the parent table, then we need to get one 195 if f.parentIter == nil { 196 // If we don't have an iterator for the doc counts, then we need one first before we can iterate the parent 197 if f.docCountIter == nil { 198 word, reachedTheEnd, err := f.parser.NextUnique(ctx) 199 if err != nil { 200 return nil, err 201 } 202 if reachedTheEnd { 203 return nil, io.EOF 204 } 205 lookup := sql.IndexLookup{Ranges: []sql.Range{{ 206 sql.ClosedRangeColumnExpr(word, word, f.matchAgainst.DocCountTable.Schema()[0].Type), 207 }}, Index: f.docCountIndex} 208 209 docCountData := f.matchAgainst.DocCountTable.IndexedAccess(lookup) 210 if err != nil { 211 return nil, err 212 } 213 214 partIter, err := docCountData.LookupPartitions(ctx, lookup) 215 if err != nil { 216 return nil, err 217 } 218 219 f.docCountIter = sql.NewTableRowIter(ctx, docCountData, partIter) 220 } 221 222 // We have an iterator for the document table, so grab the next row 223 docRow, err := f.docCountIter.Next(ctx) 224 if err != nil { 225 if err == io.EOF { 226 if err = f.docCountIter.Close(ctx); err != nil { 227 return nil, err 228 } 229 f.docCountIter = nil 230 continue 231 } 232 return nil, err 233 } 234 235 // Get the key so that we may get rows from the parent table 236 ranges := make(sql.Range, len(docRow)-2) 237 for i, val := range docRow[1 : len(docRow)-1] { 238 ranges[i] = sql.ClosedRangeColumnExpr(val, val, f.matchAgainst.DocCountTable.Schema()[i+1].Type) 239 } 240 lookup := sql.IndexLookup{Ranges: []sql.Range{ranges}, Index: f.parentIndex} 241 242 parentData := f.matchAgainst.ParentTable.IndexedAccess(lookup) 243 if err != nil { 244 return nil, err 245 } 246 247 partIter, err := parentData.LookupPartitions(ctx, lookup) 248 if err != nil { 249 return nil, err 250 } 251 252 f.parentIter = sql.NewTableRowIter(ctx, parentData, partIter) 253 } 254 255 // We have an iterator for the parent table, so grab the next row 256 parentRow, err := f.parentIter.Next(ctx) 257 if err != nil { 258 if err == io.EOF { 259 if err = f.parentIter.Close(ctx); err != nil { 260 return nil, err 261 } 262 f.parentIter = nil 263 continue 264 } 265 return nil, err 266 } 267 return parentRow, nil 268 } 269 } 270 271 // Close implements the interface sql.RowIter. 272 func (f *fulltextFilterTableRowIter) Close(ctx *sql.Context) error { 273 var err error 274 if f.docCountIter != nil { 275 if nErr := f.docCountIter.Close(ctx); err == nil { 276 err = nErr 277 } 278 f.docCountIter = nil 279 } 280 if f.parentIter != nil { 281 if nErr := f.parentIter.Close(ctx); err == nil { 282 err = nErr 283 } 284 f.parentIter = nil 285 } 286 return err 287 }