github.com/cyverse/go-irodsclient@v0.13.2/irods/connection/xml.go (about) 1 package connection 2 3 import ( 4 "bytes" 5 "strconv" 6 "strings" 7 "unicode/utf8" 8 9 "github.com/cyverse/go-irodsclient/irods/message" 10 "golang.org/x/xerrors" 11 ) 12 13 var ( 14 // escapes from xml.Encode 15 escQuot = []byte(""") // shorter than """, \" 16 escApos = []byte("'") // shorter than "'", \' 17 escTab = []byte("	") 18 escNL = []byte("
") 19 escCR = []byte("
") 20 escFFFD = []byte("\uFFFD") // Unicode replacement character 21 22 // escapes for irods 23 irodsEscQuot = []byte(""") 24 irodsEscApos = []byte("'") 25 ) 26 27 // ErrInvalidUTF8 is returned if an invalid utf-8 character is found. 28 var ErrInvalidUTF8 = xerrors.Errorf("invalid utf-8 character") 29 30 func (conn *IRODSConnection) talksCorrectXML() bool { 31 if conn.serverVersion == nil { 32 // We don't know the server version yet, assume the best 33 return true 34 } 35 36 if !strings.HasPrefix(conn.serverVersion.ReleaseVersion, "rods") { 37 // Strange, but hopefully it talks correct xml 38 return true 39 } 40 41 version := strings.Split(conn.serverVersion.ReleaseVersion[4:], ".") 42 43 if len(version) != 3 { 44 // Strange, but hopefully it talks correct xml 45 return true 46 } 47 48 major, _ := strconv.Atoi(version[0]) 49 minor, _ := strconv.Atoi(version[1]) 50 release, _ := strconv.Atoi(version[2]) 51 52 return major > 4 || (major == 4 && minor > 2) || (major == 4 && minor == 2 && release > 8) 53 } 54 55 // PostprocessMessage prepares a message that is received from irods for XML parsing. 56 func (conn *IRODSConnection) PostprocessMessage(msg *message.IRODSMessage) error { 57 if msg.Body == nil || msg.Body.Message == nil { 58 return nil 59 } 60 61 var err error 62 63 msg.Body.Message, err = conn.PostprocessXML(msg.Body.Message) 64 msg.Header.MessageLen = uint32(len(msg.Body.Message)) 65 66 return err 67 } 68 69 // PostprocessXML translates IRODS XML into valid XML. 70 // We fix the invalid encoding of ` as ". 71 func (conn *IRODSConnection) PostprocessXML(in []byte) ([]byte, error) { 72 buf := in 73 out := &bytes.Buffer{} 74 75 for len(buf) > 0 { 76 switch { 77 // turn " into ` 78 case bytes.HasPrefix(buf, irodsEscQuot) && !conn.talksCorrectXML(): 79 out.WriteByte('`') 80 buf = buf[len(irodsEscQuot):] 81 // turn ' into ' 82 case buf[0] == '\'' && !conn.talksCorrectXML(): 83 out.Write(escApos) 84 buf = buf[1:] 85 // check utf8 characters for validity 86 default: 87 r, size := utf8.DecodeRune(buf) 88 if r == utf8.RuneError && size == 1 { 89 return in, ErrInvalidUTF8 90 } 91 92 if isValidChar(r) { 93 out.Write(buf[:size]) 94 } else { 95 out.Write(escFFFD) 96 } 97 98 buf = buf[size:] 99 } 100 } 101 102 return out.Bytes(), nil 103 } 104 105 // PreprocessMessage modifies a request message to use irods dialect for XML. 106 func (conn *IRODSConnection) PreprocessMessage(msg *message.IRODSMessage, forPassword bool) error { 107 if msg.Body == nil || msg.Body.Message == nil { 108 return nil 109 } 110 111 var err error 112 113 if forPassword { 114 msg.Body.Message, err = conn.PreprocessXMLForPassword(msg.Body.Message) 115 } else { 116 msg.Body.Message, err = conn.PreprocessXML(msg.Body.Message) 117 } 118 119 msg.Header.MessageLen = uint32(len(msg.Body.Message)) 120 121 return err 122 } 123 124 // PreprocessXML translates output of xml.Marshal into XML that IRODS understands. 125 func (conn *IRODSConnection) PreprocessXML(in []byte) ([]byte, error) { 126 buf := in 127 out := &bytes.Buffer{} 128 129 for len(buf) > 0 { 130 switch { 131 // turn " into " 132 case bytes.HasPrefix(buf, escQuot): 133 out.Write(irodsEscQuot) 134 buf = buf[len(escQuot):] 135 // turn ' into ' or ' 136 case bytes.HasPrefix(buf, escApos): 137 if conn.talksCorrectXML() { 138 out.Write(irodsEscApos) 139 } else { 140 out.WriteByte('\'') 141 } 142 buf = buf[len(escApos):] 143 // irods does not decode encoded tabs 144 case bytes.HasPrefix(buf, escTab): 145 out.WriteByte('\t') 146 buf = buf[len(escTab):] 147 // irods does not decode encoded carriage returns 148 case bytes.HasPrefix(buf, escCR): 149 out.WriteByte('\r') 150 buf = buf[len(escCR):] 151 // irods does not decode encoded newlines 152 case bytes.HasPrefix(buf, escNL): 153 out.WriteByte('\n') 154 buf = buf[len(escNL):] 155 // turn ` into ' 156 case buf[0] == '`' && !conn.talksCorrectXML(): 157 out.Write(irodsEscApos) 158 buf = buf[1:] 159 // pass utf8 characters 160 default: 161 r, size := utf8.DecodeRune(buf) 162 if r == utf8.RuneError && size == 1 { 163 return in, ErrInvalidUTF8 164 } 165 166 out.Write(buf[:size]) 167 buf = buf[size:] 168 } 169 } 170 171 return out.Bytes(), nil 172 } 173 174 // PreprocessXMLForPassword translates output of xml.Marshal into XML that IRODS understands. 175 func (conn *IRODSConnection) PreprocessXMLForPassword(in []byte) ([]byte, error) { 176 buf := in 177 out := &bytes.Buffer{} 178 179 for len(buf) > 0 { 180 switch { 181 // turn " into \" 182 case bytes.HasPrefix(buf, escQuot): 183 out.WriteByte('"') 184 buf = buf[len(escQuot):] 185 // turn ' into \' 186 case bytes.HasPrefix(buf, escApos): 187 out.WriteByte('\'') 188 buf = buf[len(escApos):] 189 // irods does not decode encoded tabs 190 case bytes.HasPrefix(buf, escTab): 191 out.WriteByte('\t') 192 buf = buf[len(escTab):] 193 // irods does not decode encoded carriage returns 194 case bytes.HasPrefix(buf, escCR): 195 out.WriteByte('\r') 196 buf = buf[len(escCR):] 197 // irods does not decode encoded newlines 198 case bytes.HasPrefix(buf, escNL): 199 out.WriteByte('\n') 200 buf = buf[len(escNL):] 201 // pass utf8 characters 202 default: 203 r, size := utf8.DecodeRune(buf) 204 if r == utf8.RuneError && size == 1 { 205 return in, ErrInvalidUTF8 206 } 207 208 out.Write(buf[:size]) 209 buf = buf[size:] 210 } 211 } 212 213 return out.Bytes(), nil 214 } 215 216 func isValidChar(r rune) bool { 217 return r == 0x09 || 218 r == 0x0A || 219 r == 0x0D || 220 r >= 0x20 && r <= 0xD7FF || 221 r >= 0xE000 && r <= 0xFFFD || 222 r >= 0x10000 && r <= 0x10FFFF 223 }