github.com/weaviate/weaviate@v1.24.6/modules/text2vec-contextionary/classification/splitter.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package classification 13 14 // TODO: This code is duplicated across weaviate and contextionary which makes 15 // changes risky. Can we find a single source of truth for this logic 16 17 import ( 18 "strings" 19 "unicode" 20 ) 21 22 func newSplitter() *splitter { 23 return &splitter{} 24 } 25 26 type splitter struct{} 27 28 func (s *splitter) Split(corpus string) []string { 29 return strings.FieldsFunc(corpus, func(c rune) bool { 30 return !unicode.IsLetter(c) && !unicode.IsNumber(c) 31 }) 32 }