github.com/Jeffail/benthos/v3@v3.65.0/internal/impl/parquet/processor_test.go (about) 1 package parquet 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "testing" 8 9 "github.com/Jeffail/benthos/v3/public/service" 10 "github.com/stretchr/testify/assert" 11 "github.com/stretchr/testify/require" 12 ) 13 14 func TestParquetProcessorConfigLinting(t *testing.T) { 15 configTests := []struct { 16 name string 17 config string 18 errContains string 19 }{ 20 { 21 name: "missing operator", 22 config: ` 23 parquet: 24 schema: '{}' 25 `, 26 errContains: `field operator is required`, 27 }, 28 { 29 name: "invalid operator", 30 config: ` 31 parquet: 32 operator: not_real 33 schema: no 34 `, 35 errContains: `value not_real is not a valid`, 36 }, 37 } 38 39 env := service.NewEnvironment() 40 for _, test := range configTests { 41 t.Run(test.name, func(t *testing.T) { 42 strm := env.NewStreamBuilder() 43 err := strm.AddProcessorYAML(test.config) 44 if test.errContains == "" { 45 require.NoError(t, err) 46 } else { 47 require.Error(t, err) 48 assert.Contains(t, err.Error(), test.errContains) 49 } 50 }) 51 } 52 } 53 54 func TestParquetProcessorConfigParse(t *testing.T) { 55 tmpSchemaFile, err := os.CreateTemp("", "benthos_parquet_test") 56 require.NoError(t, err) 57 58 _, err = tmpSchemaFile.WriteString(`{ 59 "Tag": "name=root, repetitiontype=REQUIRED", 60 "Fields": [ 61 {"Tag": "name=name, inname=NameIn, type=BYTE_ARRAY, convertedtype=UTF8, repetitiontype=REQUIRED"}, 62 {"Tag": "name=age, inname=Age, type=INT32, repetitiontype=REQUIRED"}, 63 {"Tag": "name=id, inname=Id, type=INT64, repetitiontype=REQUIRED"} 64 ] 65 }`) 66 require.NoError(t, err) 67 68 configTests := []struct { 69 name string 70 config string 71 schema string 72 errContains string 73 }{ 74 { 75 name: "no schema or schema file", 76 config: ` 77 operator: to_json 78 `, 79 errContains: "either a raw `schema` or a non-empty `schema_file` must be specified", 80 }, 81 { 82 name: "raw schema", 83 config: ` 84 operator: to_json 85 schema: | 86 { 87 "Tag": "name=root, repetitiontype=REQUIRED", 88 "Fields": [ 89 {"Tag": "name=name, inname=NameIn, type=BYTE_ARRAY, convertedtype=UTF8, repetitiontype=REQUIRED"}, 90 {"Tag": "name=age, inname=Age, type=INT32, repetitiontype=REQUIRED"}, 91 {"Tag": "name=id, inname=Id, type=INT64, repetitiontype=REQUIRED"} 92 ] 93 } 94 `, 95 schema: `{ 96 "Tag": "name=root, repetitiontype=REQUIRED", 97 "Fields": [ 98 {"Tag": "name=name, inname=NameIn, type=BYTE_ARRAY, convertedtype=UTF8, repetitiontype=REQUIRED"}, 99 {"Tag": "name=age, inname=Age, type=INT32, repetitiontype=REQUIRED"}, 100 {"Tag": "name=id, inname=Id, type=INT64, repetitiontype=REQUIRED"} 101 ] 102 } 103 `, 104 }, 105 { 106 name: "schema file", 107 config: fmt.Sprintf(` 108 operator: to_json 109 schema_file: %v 110 `, tmpSchemaFile.Name()), 111 schema: `{ 112 "Tag": "name=root, repetitiontype=REQUIRED", 113 "Fields": [ 114 {"Tag": "name=name, inname=NameIn, type=BYTE_ARRAY, convertedtype=UTF8, repetitiontype=REQUIRED"}, 115 {"Tag": "name=age, inname=Age, type=INT32, repetitiontype=REQUIRED"}, 116 {"Tag": "name=id, inname=Id, type=INT64, repetitiontype=REQUIRED"} 117 ] 118 }`, 119 }, 120 } 121 122 confSpec := parquetProcessorConfig() 123 env := service.NewEnvironment() 124 125 for _, test := range configTests { 126 t.Run(test.name, func(t *testing.T) { 127 pConf, err := confSpec.ParseYAML(test.config, env) 128 require.NoError(t, err) 129 130 proc, err := newParquetProcessorFromConfig(pConf, nil) 131 if test.errContains == "" { 132 require.NoError(t, err) 133 assert.Equal(t, test.schema, proc.schema) 134 } else { 135 require.Error(t, err) 136 assert.Contains(t, err.Error(), test.errContains) 137 } 138 }) 139 } 140 } 141 142 func TestParquetJSONSchemaRoundTrip(t *testing.T) { 143 schema := `{ 144 "Tag": "name=root, repetitiontype=REQUIRED", 145 "Fields": [ 146 {"Tag": "name=name, inname=NameIn, type=BYTE_ARRAY, convertedtype=UTF8, repetitiontype=REQUIRED"}, 147 {"Tag": "name=age, inname=Age, type=INT32, repetitiontype=REQUIRED"}, 148 {"Tag": "name=id, inname=Id, type=INT64, repetitiontype=REQUIRED"}, 149 {"Tag": "name=weight, inname=Weight, type=FLOAT, repetitiontype=REQUIRED"}, 150 { 151 "Tag": "name=favPokemon, inname=FavPokemon, type=LIST, repetitiontype=OPTIONAL", 152 "Fields": [ 153 { "Tag": "name=element, repetitiontype=REQUIRED", "Fields": [ 154 { "Tag": "name=name, inname=PokeName, type=BYTE_ARRAY, convertedtype=UTF8, repetitiontype=REQUIRED" }, 155 { "Tag": "name=coolness, inname=Coolness, type=FLOAT, repetitiontype=REQUIRED" } 156 ] } 157 ] 158 } 159 ] 160 }` 161 162 inputDocs := []string{ 163 `{"NameIn":"fooer first","age":21,"id":1,"weight":60.1}`, 164 `{"NameIn":"fooer second","age":22,"id":2,"weight":60.2}`, 165 `{"NameIn":"fooer third","age":23,"id":3,"weight":60.3,"favPokemon":[{"PokeName":"bulbasaur","Coolness":99}]}`, 166 `{"NameIn":"fooer fourth","age":24,"id":4,"weight":60.4}`, 167 `{"NameIn":"fooer fifth","age":25,"id":5,"weight":60.5}`, 168 `{"NameIn":"fooer sixth","age":26,"id":6,"weight":60.6}`, 169 } 170 171 // Test every compression codec 172 for _, c := range []string{ 173 "uncompressed", "snappy", "gzip", "lz4", "zstd", 174 // "lzo", "brotli", "lz4_raw", 175 } { 176 t.Run(fmt.Sprintf("with %v codec", c), func(t *testing.T) { 177 writer, err := newParquetProcessor("from_json", c, schema, nil) 178 require.NoError(t, err) 179 180 reader, err := newParquetProcessor("to_json", "", schema, nil) 181 require.NoError(t, err) 182 183 var inputBatch service.MessageBatch 184 for _, d := range inputDocs { 185 inputBatch = append(inputBatch, service.NewMessage([]byte(d))) 186 } 187 188 writerResBatches, err := writer.ProcessBatch(context.Background(), inputBatch) 189 require.NoError(t, err) 190 require.Len(t, writerResBatches, 1) 191 require.Len(t, writerResBatches[0], 1) 192 193 readerResBatches, err := reader.ProcessBatch(context.Background(), writerResBatches[0]) 194 require.NoError(t, err) 195 require.Len(t, writerResBatches, 1) 196 197 var readerResStrs []string 198 for _, m := range readerResBatches[0] { 199 mBytes, err := m.AsBytes() 200 require.NoError(t, err) 201 readerResStrs = append(readerResStrs, string(mBytes)) 202 } 203 204 assert.Equal(t, []string{ 205 `{"NameIn":"fooer first","Age":21,"Id":1,"Weight":60.1,"FavPokemon":null}`, 206 `{"NameIn":"fooer second","Age":22,"Id":2,"Weight":60.2,"FavPokemon":null}`, 207 `{"NameIn":"fooer third","Age":23,"Id":3,"Weight":60.3,"FavPokemon":[{"PokeName":"bulbasaur","Coolness":99}]}`, 208 `{"NameIn":"fooer fourth","Age":24,"Id":4,"Weight":60.4,"FavPokemon":null}`, 209 `{"NameIn":"fooer fifth","Age":25,"Id":5,"Weight":60.5,"FavPokemon":null}`, 210 `{"NameIn":"fooer sixth","Age":26,"Id":6,"Weight":60.6,"FavPokemon":null}`, 211 }, readerResStrs) 212 }) 213 } 214 }