go.charczuk.com@v0.0.0-20240327042549-bc490516bd1a/sdk/stringutil/split_quoted.go (about) 1 /* 2 3 Copyright (c) 2023 - Present. Will Charczuk. All rights reserved. 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file at the root of the repository. 5 6 */ 7 8 package stringutil 9 10 // SplitQuoted splits a corpus on a given string but treats quoted strings, 11 // specifically any text within `"` as whole values. 12 func SplitQuoted(text, sep string) (output []string) { 13 if text == "" || sep == "" { 14 return 15 } 16 17 // generally we read the text rune by rune 18 // if we see a rune that is the start of the separator 19 // we consume the separator until we either miss 20 // or we reach the end of the separator 21 // 22 // if we miss, we move the separator contents to the normal 23 // accumulation working word. 24 // 25 // if we run out of separator, we collect the previous 26 // accumulation working word as an output. 27 // 28 // we aim generally to do a single pass through the text 29 // and prioritize _not_ re-reading the same rune multiple times. 30 31 // fsm states 32 const ( 33 stateWord = iota 34 stateSep = iota 35 stateQuoted = iota 36 ) 37 38 var state int 39 var working, workingSep []rune 40 var openingQuote rune 41 sepRunes := []rune(sep) 42 var sepIndex int 43 for _, r := range text { 44 switch state { 45 case stateWord: 46 { 47 if r == sepRunes[0] { 48 state = stateSep 49 workingSep = append(workingSep, r) 50 sepIndex = 1 51 continue 52 } 53 54 working = append(working, r) 55 56 if fieldsIsQuote(r) { 57 openingQuote = r 58 state = stateQuoted 59 continue 60 } 61 62 continue 63 } 64 65 case stateSep: 66 { 67 if sepIndex == len(sepRunes) { 68 workingSep = nil 69 sepIndex = 0 70 71 if len(working) > 0 { 72 output = append(output, string(working)) 73 } 74 75 working = []rune{r} 76 77 if fieldsIsQuote(r) { 78 openingQuote = r 79 state = stateQuoted 80 continue 81 } 82 83 state = stateWord 84 continue 85 } 86 87 if r == sepRunes[sepIndex] { 88 workingSep = append(workingSep, r) 89 sepIndex++ 90 continue 91 } 92 93 // if we have a separator miss, add 94 // whatever we've collected so far to the 95 // working word 96 working = append(working, workingSep...) 97 workingSep = nil 98 sepIndex = 0 99 100 working = append(working, r) 101 102 if fieldsIsQuote(r) { 103 openingQuote = r 104 state = stateQuoted 105 continue 106 } 107 108 state = stateWord 109 continue 110 } 111 112 case stateQuoted: 113 { 114 // if we hit a quote, and it matches the "opening" quote 115 // switch back to normal word mode 116 if fieldsIsQuote(r) && fieldsMatchesQuote(openingQuote, r) { 117 state = stateWord 118 } 119 working = append(working, r) 120 continue 121 } 122 123 } 124 } 125 126 if len(workingSep) > 0 { 127 if string(workingSep) != sep { 128 working = append(working, workingSep...) 129 } 130 } 131 if len(working) > 0 { 132 output = append(output, string(working)) 133 } 134 return 135 }