github.com/blend/go-sdk@v1.20220411.3/stringutil/split_space_quoted.go (about) 1 /* 2 3 Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file. 5 6 */ 7 8 package stringutil 9 10 import "unicode" 11 12 // SplitSpaceQuoted splits a corpus on space but treats quoted strings 13 // i.e. within `"` as being atomic chunks. 14 func SplitSpaceQuoted(text string) (output []string) { 15 if len(text) == 0 { 16 return 17 } 18 19 // fsm states 20 const ( 21 stateLeadingSpace = iota 22 stateWord = iota 23 stateIntraSpace = iota 24 stateLeadingQuoted = iota 25 stateIntraWordQuoted = iota 26 ) 27 28 var state int 29 var word []rune 30 var opened rune 31 for _, r := range text { 32 switch state { 33 case stateLeadingSpace: //leading whitespace until quote or alpha 34 if !unicode.IsSpace(r) { 35 if isQuote(r) { // start a quoted section 36 opened = r 37 state = stateLeadingQuoted 38 } else { 39 state = stateWord 40 word = append(word, r) 41 } 42 } 43 case stateWord: // within a word 44 if isQuote(r) { 45 opened = r 46 word = append(word, r) 47 state = stateIntraWordQuoted 48 } else if unicode.IsSpace(r) { 49 if len(word) > 0 { 50 output = append(output, string(word)) 51 word = nil 52 } 53 state = stateIntraSpace 54 } else { 55 word = append(word, r) 56 } 57 case stateIntraSpace: // we've seen a space after we've seen at least one word 58 // consume spaces until a non-space character 59 if !unicode.IsSpace(r) { 60 if isQuote(r) { // start a quoted section 61 opened = r 62 state = stateLeadingQuoted 63 } else { 64 state = stateWord 65 word = append(word, r) 66 } 67 } 68 case stateLeadingQuoted: // leading quoted section 69 // if we close a quoted section, switch 70 // back to normal word mode 71 if matchesQuote(opened, r) { 72 state = stateWord 73 } else { 74 word = append(word, r) 75 } 76 case stateIntraWordQuoted: // quoted section within a word 77 // if we close a quoted section, switch 78 // back to normal word mode 79 if matchesQuote(opened, r) { 80 state = stateWord 81 } 82 word = append(word, r) 83 } 84 } 85 86 if len(word) > 0 { 87 output = append(output, string(word)) 88 } 89 return 90 } 91 92 func isQuote(r rune) bool { 93 return r == '"' || 94 r == '\'' || 95 r == '“' || 96 r == '”' || 97 r == '`' || 98 r == '‘' || 99 r == '’' 100 } 101 102 func matchesQuote(a, b rune) bool { 103 if a == '“' && b == '”' { 104 return true 105 } 106 if a == '”' && b == '“' { 107 return true 108 } 109 if a == '‘' && b == '’' { 110 return true 111 } 112 if a == '’' && b == '‘' { 113 return true 114 } 115 return a == b 116 }