github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/tok/tokens.go (about)

     1  /*
     2   * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package tok
    18  
    19  import (
    20  	"github.com/pkg/errors"
    21  )
    22  
    23  // GetLangTokenizer returns the correct full-text tokenizer for the given language.
    24  func GetLangTokenizer(t Tokenizer, lang string) Tokenizer {
    25  	if lang == "" {
    26  		return t
    27  	}
    28  	switch t.(type) {
    29  	case FullTextTokenizer:
    30  		// We must return a new instance because another goroutine might be calling this
    31  		// with a different lang.
    32  		return FullTextTokenizer{lang: lang}
    33  	}
    34  	return t
    35  }
    36  
    37  // GetTokens returns the tokens for the given tokenizer ID and value.
    38  // funcArgs should only have one element which is the value that needs to be tokenized.
    39  func GetTokens(id byte, funcArgs ...string) ([]string, error) {
    40  	if l := len(funcArgs); l != 1 {
    41  		return nil, errors.Errorf("Function requires 1 arguments, but got %d", l)
    42  	}
    43  	tokenizer, ok := GetTokenizerByID(id)
    44  	if !ok {
    45  		return nil, errors.Errorf("No tokenizer was found with id %v", id)
    46  	}
    47  	return BuildTokens(funcArgs[0], tokenizer)
    48  }
    49  
    50  // GetTermTokens returns the term tokens for the given value.
    51  func GetTermTokens(funcArgs []string) ([]string, error) {
    52  	return GetTokens(IdentTerm, funcArgs...)
    53  }
    54  
    55  // GetFullTextTokens returns the full-text tokens for the given value.
    56  func GetFullTextTokens(funcArgs []string, lang string) ([]string, error) {
    57  	if l := len(funcArgs); l != 1 {
    58  		return nil, errors.Errorf("Function requires 1 arguments, but got %d", l)
    59  	}
    60  	return BuildTokens(funcArgs[0], FullTextTokenizer{lang: lang})
    61  }