github.com/dolthub/go-mysql-server@v0.18.0/sql/rowexec/fulltext_filter.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package rowexec
    16  
    17  import (
    18  	"fmt"
    19  	"io"
    20  
    21  	"github.com/dolthub/go-mysql-server/sql"
    22  	"github.com/dolthub/go-mysql-server/sql/expression"
    23  	"github.com/dolthub/go-mysql-server/sql/fulltext"
    24  )
    25  
    26  // FulltextFilterTable handles row iteration for filters involving Full-Text indexes, as they behave differently than
    27  // other indexes. This acts as a sort of wrapper, so that integrators do not need to implement special logic on their
    28  // side.
    29  //
    30  // This takes a MatchAgainst expression, as it will have already resolved the index and necessary tables, therefore we
    31  // do not need to replicate the work here. Although they may seem similar in functionality, they are performing two
    32  // different functions. This filter table determines if we need to calculate the relevancy of a word, by only returning
    33  // rows that exist within our index tables. If a word does not exist within the tables, then we can assume that it has a
    34  // relevancy of zero (for the default search mode). Therefore, we can skip processing that row altogether. The existence
    35  // of a row does not imply that the relevancy value will be non-zero though, as the relevancy calculation can return a
    36  // zero due to rounding (as is the case with the MyISAM backend, which we currently do not support).
    37  type FulltextFilterTable struct {
    38  	MatchAgainst *expression.MatchAgainst
    39  	Table        sql.TableNode
    40  }
    41  
    42  var _ sql.IndexedTable = (*FulltextFilterTable)(nil)
    43  
    44  // Name implements the interface sql.IndexedTable.
    45  func (f *FulltextFilterTable) Name() string {
    46  	return f.Table.Name()
    47  }
    48  
    49  // String implements the interface sql.IndexedTable.
    50  func (f *FulltextFilterTable) String() string {
    51  	return f.Table.String()
    52  }
    53  
    54  // Schema implements the interface sql.IndexedTable.
    55  func (f *FulltextFilterTable) Schema() sql.Schema {
    56  	return f.Table.Schema()
    57  }
    58  
    59  // Collation implements the interface sql.IndexedTable.
    60  func (f *FulltextFilterTable) Collation() sql.CollationID {
    61  	return f.Table.Collation()
    62  }
    63  
    64  // Partitions implements the interface sql.IndexedTable.
    65  func (f *FulltextFilterTable) Partitions(ctx *sql.Context) (sql.PartitionIter, error) {
    66  	if f.MatchAgainst.KeyCols.Type == fulltext.KeyType_None {
    67  		return f.Table.Partitions(ctx)
    68  	}
    69  	return &fulltextFilterTablePartitionIter{false}, nil
    70  }
    71  
    72  // PartitionRows implements the interface sql.IndexedTable.
    73  func (f *FulltextFilterTable) PartitionRows(ctx *sql.Context, partition sql.Partition) (sql.RowIter, error) {
    74  	// Keyless just iterates over the entire table. Not the most performant, but it works.
    75  	if f.MatchAgainst.KeyCols.Type == fulltext.KeyType_None {
    76  		return f.Table.PartitionRows(ctx, partition)
    77  	}
    78  
    79  	// Get the word parser for our string literal
    80  	words, err := f.MatchAgainst.Expr.Eval(ctx, nil)
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  	wordsStr, ok := words.(string)
    85  	if !ok {
    86  		if words != nil {
    87  			return nil, fmt.Errorf("expected WORD to be a string, but had type `%T`", words)
    88  		}
    89  	}
    90  	collation := fulltext.GetCollationFromSchema(ctx, f.MatchAgainst.DocCountTable.Schema())
    91  	parser, err := fulltext.NewDefaultParser(ctx, collation, wordsStr)
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  
    96  	// Get the primary key index for the document count table
    97  	docCountIndexes, err := f.MatchAgainst.DocCountTable.GetIndexes(ctx)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  	// There should only be a single index on this table
   102  	if len(docCountIndexes) == 0 {
   103  		return nil, fmt.Errorf("expected to find a primary key on the table `%s`", f.MatchAgainst.DocCountTable.Name())
   104  	} else if len(docCountIndexes) > 1 {
   105  		return nil, fmt.Errorf("found too many indexes on the table `%s`", f.MatchAgainst.DocCountTable.Name())
   106  	}
   107  
   108  	// Get the primary or unique key index for the parent table
   109  	parentIndexes, err := f.MatchAgainst.ParentTable.GetIndexes(ctx)
   110  	if err != nil {
   111  		return nil, err
   112  	}
   113  	var parentIndex sql.Index
   114  	for _, index := range parentIndexes {
   115  		switch f.MatchAgainst.KeyCols.Type {
   116  		case fulltext.KeyType_Primary:
   117  			if index.ID() == "PRIMARY" {
   118  				parentIndex = index
   119  				break
   120  			}
   121  		case fulltext.KeyType_Unique:
   122  			if index.ID() == f.MatchAgainst.KeyCols.Name {
   123  				parentIndex = index
   124  				break
   125  			}
   126  		}
   127  	}
   128  	if parentIndex == nil {
   129  		return nil, fmt.Errorf("Full-Text filter cannot find the index on the table `%s`", f.MatchAgainst.ParentTable.Name())
   130  	}
   131  
   132  	return &fulltextFilterTableRowIter{
   133  		matchAgainst:  f.MatchAgainst,
   134  		parser:        parser,
   135  		parentIndex:   parentIndex,
   136  		docCountIndex: docCountIndexes[0],
   137  		parentIter:    nil,
   138  		docCountIter:  nil,
   139  	}, nil
   140  }
   141  
   142  // LookupPartitions implements the interface sql.IndexedTable.
   143  func (f *FulltextFilterTable) LookupPartitions(ctx *sql.Context, lookup sql.IndexLookup) (sql.PartitionIter, error) {
   144  	return f.Partitions(ctx)
   145  }
   146  
   147  // fulltextFilterTablePartition is a partition that is used exclusively by FulltextFilterTable.
   148  type fulltextFilterTablePartition struct{}
   149  
   150  var _ sql.Partition = fulltextFilterTablePartition{}
   151  
   152  // Key implements the interface sql.Partition.
   153  func (f fulltextFilterTablePartition) Key() []byte {
   154  	return nil
   155  }
   156  
   157  // fulltextFilterTablePartitionIter is a partition iterator that is used exclusively by FulltextFilterTable.
   158  type fulltextFilterTablePartitionIter struct {
   159  	once bool
   160  }
   161  
   162  var _ sql.PartitionIter = (*fulltextFilterTablePartitionIter)(nil)
   163  
   164  // Next implements the interface sql.PartitionIter.
   165  func (f *fulltextFilterTablePartitionIter) Next(ctx *sql.Context) (sql.Partition, error) {
   166  	if !f.once {
   167  		f.once = true
   168  		return fulltextFilterTablePartition{}, nil
   169  	}
   170  	return nil, io.EOF
   171  }
   172  
   173  // Close implements the interface sql.PartitionIter.
   174  func (f *fulltextFilterTablePartitionIter) Close(*sql.Context) error {
   175  	return nil
   176  }
   177  
   178  // fulltextFilterTableRowIter is a row iterator that is used exclusively by FulltextFilterTable. Handles the
   179  // communication between multiple tables to function similarly to a regular indexed table row iterator.
   180  type fulltextFilterTableRowIter struct {
   181  	matchAgainst  *expression.MatchAgainst
   182  	parser        fulltext.DefaultParser
   183  	parentIndex   sql.Index
   184  	docCountIndex sql.Index
   185  	parentIter    *sql.TableRowIter
   186  	docCountIter  *sql.TableRowIter
   187  }
   188  
   189  var _ sql.RowIter = (*fulltextFilterTableRowIter)(nil)
   190  
   191  // Next implements the interface sql.RowIter.
   192  func (f *fulltextFilterTableRowIter) Next(ctx *sql.Context) (sql.Row, error) {
   193  	for {
   194  		// If we don't have an iterator for the parent table, then we need to get one
   195  		if f.parentIter == nil {
   196  			// If we don't have an iterator for the doc counts, then we need one first before we can iterate the parent
   197  			if f.docCountIter == nil {
   198  				word, reachedTheEnd, err := f.parser.NextUnique(ctx)
   199  				if err != nil {
   200  					return nil, err
   201  				}
   202  				if reachedTheEnd {
   203  					return nil, io.EOF
   204  				}
   205  				lookup := sql.IndexLookup{Ranges: []sql.Range{{
   206  					sql.ClosedRangeColumnExpr(word, word, f.matchAgainst.DocCountTable.Schema()[0].Type),
   207  				}}, Index: f.docCountIndex}
   208  
   209  				docCountData := f.matchAgainst.DocCountTable.IndexedAccess(lookup)
   210  				if err != nil {
   211  					return nil, err
   212  				}
   213  
   214  				partIter, err := docCountData.LookupPartitions(ctx, lookup)
   215  				if err != nil {
   216  					return nil, err
   217  				}
   218  
   219  				f.docCountIter = sql.NewTableRowIter(ctx, docCountData, partIter)
   220  			}
   221  
   222  			// We have an iterator for the document table, so grab the next row
   223  			docRow, err := f.docCountIter.Next(ctx)
   224  			if err != nil {
   225  				if err == io.EOF {
   226  					if err = f.docCountIter.Close(ctx); err != nil {
   227  						return nil, err
   228  					}
   229  					f.docCountIter = nil
   230  					continue
   231  				}
   232  				return nil, err
   233  			}
   234  
   235  			// Get the key so that we may get rows from the parent table
   236  			ranges := make(sql.Range, len(docRow)-2)
   237  			for i, val := range docRow[1 : len(docRow)-1] {
   238  				ranges[i] = sql.ClosedRangeColumnExpr(val, val, f.matchAgainst.DocCountTable.Schema()[i+1].Type)
   239  			}
   240  			lookup := sql.IndexLookup{Ranges: []sql.Range{ranges}, Index: f.parentIndex}
   241  
   242  			parentData := f.matchAgainst.ParentTable.IndexedAccess(lookup)
   243  			if err != nil {
   244  				return nil, err
   245  			}
   246  
   247  			partIter, err := parentData.LookupPartitions(ctx, lookup)
   248  			if err != nil {
   249  				return nil, err
   250  			}
   251  
   252  			f.parentIter = sql.NewTableRowIter(ctx, parentData, partIter)
   253  		}
   254  
   255  		// We have an iterator for the parent table, so grab the next row
   256  		parentRow, err := f.parentIter.Next(ctx)
   257  		if err != nil {
   258  			if err == io.EOF {
   259  				if err = f.parentIter.Close(ctx); err != nil {
   260  					return nil, err
   261  				}
   262  				f.parentIter = nil
   263  				continue
   264  			}
   265  			return nil, err
   266  		}
   267  		return parentRow, nil
   268  	}
   269  }
   270  
   271  // Close implements the interface sql.RowIter.
   272  func (f *fulltextFilterTableRowIter) Close(ctx *sql.Context) error {
   273  	var err error
   274  	if f.docCountIter != nil {
   275  		if nErr := f.docCountIter.Close(ctx); err == nil {
   276  			err = nErr
   277  		}
   278  		f.docCountIter = nil
   279  	}
   280  	if f.parentIter != nil {
   281  		if nErr := f.parentIter.Close(ctx); err == nil {
   282  			err = nErr
   283  		}
   284  		f.parentIter = nil
   285  	}
   286  	return err
   287  }