github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/subscriber/common/message/message_parser.go (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package message 16 17 import ( 18 "fmt" 19 "github.com/uber-go/tally" 20 "github.com/uber/aresdb/client" 21 memcom "github.com/uber/aresdb/memstore/common" 22 metaCom "github.com/uber/aresdb/metastore/common" 23 "github.com/uber/aresdb/subscriber/common/rules" 24 "github.com/uber/aresdb/subscriber/common/sink" 25 "github.com/uber/aresdb/subscriber/config" 26 "github.com/uber/aresdb/utils" 27 "go.uber.org/zap" 28 "runtime" 29 "sort" 30 ) 31 32 // Parser holds all resources needed to parse one message 33 // into one or multiple row objects with respect to different destinations 34 type Parser struct { 35 // ServiceConfig is ares-subscriber configure 36 ServiceConfig config.ServiceConfig 37 // JobName is job name 38 JobName string 39 // Cluster is ares cluster name 40 Cluster string 41 // destinations each message will be parsed and written into 42 Destination sink.Destination 43 // Transformations are keyed on the output column name 44 Transformations map[string]*rules.TransformationConfig 45 scope tally.Scope 46 } 47 48 // NewParser will create a Parser for given JobConfig 49 func NewParser(jobConfig *rules.JobConfig, serviceConfig config.ServiceConfig) *Parser { 50 mp := &Parser{ 51 ServiceConfig: serviceConfig, 52 JobName: jobConfig.Name, 53 Cluster: jobConfig.AresTableConfig.Cluster, 54 Transformations: jobConfig.GetTranformations(), 55 scope: serviceConfig.Scope.Tagged(map[string]string{ 56 "job": jobConfig.Name, 57 "aresCluster": jobConfig.AresTableConfig.Cluster, 58 }), 59 } 60 mp.populateDestination(jobConfig) 61 return mp 62 } 63 64 func (mp *Parser) populateDestination(jobConfig *rules.JobConfig) { 65 columnNames := []string{} 66 updateModes := []memcom.ColumnUpdateMode{} 67 primaryKeys := make(map[string]int) 68 primaryKeysInSchema := make(map[string]int) 69 destinations := jobConfig.GetDestinations() 70 71 for _, dstConfig := range destinations { 72 columnNames = append(columnNames, dstConfig.Column) 73 } 74 75 // sort column names in destination for consistent query order 76 sort.Strings(columnNames) 77 78 for id, column := range columnNames { 79 updateModes = append(updateModes, destinations[column].UpdateMode) 80 if oid, ok := jobConfig.GetPrimaryKeys()[column]; ok { 81 primaryKeysInSchema[column] = oid 82 primaryKeys[column] = id 83 } 84 } 85 86 mp.Destination = sink.Destination{ 87 Table: jobConfig.AresTableConfig.Table.Name, 88 ColumnNames: columnNames, 89 PrimaryKeys: primaryKeys, 90 PrimaryKeysInSchema: primaryKeysInSchema, 91 AresUpdateModes: updateModes, 92 NumShards: jobConfig.NumShards, 93 } 94 } 95 96 // ParseMessage will parse given message to fit the destination 97 func (mp *Parser) ParseMessage(msg map[string]interface{}, destination sink.Destination) (client.Row, error) { 98 mp.ServiceConfig.Logger.Debug("Parsing", zap.Any("msg", msg)) 99 var row client.Row 100 for _, col := range destination.ColumnNames { 101 transformation := mp.Transformations[col] 102 fromValue := mp.extractSourceFieldValue(msg, col) 103 toValue, err := transformation.Transform(fromValue) 104 if err != nil { 105 mp.ServiceConfig.Logger.Error("Tranformation error", 106 zap.String("job", mp.JobName), 107 zap.String("cluster", mp.Cluster), 108 zap.String("field", col), 109 zap.String("name", GetFuncName()), 110 zap.Error(err)) 111 } 112 row = append(row, toValue) 113 } 114 return row, nil 115 } 116 117 // IsMessageValid checks if the message is valid 118 func (mp *Parser) IsMessageValid(msg map[string]interface{}, destination sink.Destination) error { 119 if len(destination.ColumnNames) == 0 { 120 return utils.StackError(nil, "No column names specified") 121 } 122 123 if len(destination.AresUpdateModes) != len(destination.ColumnNames) { 124 return utils.StackError(nil, 125 "length of column update modes %d does not equal to number of columns %d", 126 len(destination.AresUpdateModes), len(destination.ColumnNames)) 127 } 128 129 return nil 130 } 131 132 // CheckPrimaryKeys returns error if the value of primary key column is nil 133 func (mp *Parser) CheckPrimaryKeys(destination sink.Destination, row client.Row) error { 134 for columnName, columnID := range destination.PrimaryKeys { 135 if row[columnID] == nil { 136 return utils.StackError(nil, "Primary key column %s is nil", columnName) 137 } 138 } 139 return nil 140 } 141 142 // CheckTimeColumnExistence checks if time column is missing for fact table 143 func (mp *Parser) CheckTimeColumnExistence(schema metaCom.Table, columnDict map[string]int, 144 destination sink.Destination, row client.Row) error { 145 if !schema.IsFactTable || schema.Config.AllowMissingEventTime { 146 return nil 147 } 148 149 for id, columnName := range destination.ColumnNames { 150 151 columnID := columnDict[columnName] 152 if columnID == 0 && row[id] != nil { 153 return nil 154 } 155 } 156 return utils.StackError(nil, "Missing time column") 157 } 158 159 func (mp *Parser) extractSourceFieldValue(msg map[string]interface{}, fieldName string) interface{} { 160 value, err := mp.getValue(msg, fieldName) 161 if err != nil { 162 mp.ServiceConfig.Logger.Debug("Failed to get value for", 163 zap.String("job", mp.JobName), 164 zap.String("cluster", mp.Cluster), 165 zap.String("field", fieldName), 166 zap.String("name", GetFuncName()), 167 zap.Error(err)) 168 } 169 return value 170 } 171 172 func (mp *Parser) getValue(msg map[string]interface{}, fieldName string) (interface{}, error) { 173 if value, found := msg[fieldName]; found { 174 return value, nil 175 } 176 return nil, 177 fmt.Errorf("Message does not contain key: %s, job: %s, cluster: %s", fieldName, mp.JobName, mp.Cluster) 178 } 179 180 //GetFuncName get the function name of the calling function 181 func GetFuncName() string { 182 p, _, _, _ := runtime.Caller(1) 183 fn := runtime.FuncForPC(p).Name() 184 return fn 185 }