github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/subscriber/common/message/message_parser.go (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package message
    16  
    17  import (
    18  	"fmt"
    19  	"github.com/uber-go/tally"
    20  	"github.com/uber/aresdb/client"
    21  	memcom "github.com/uber/aresdb/memstore/common"
    22  	metaCom "github.com/uber/aresdb/metastore/common"
    23  	"github.com/uber/aresdb/subscriber/common/rules"
    24  	"github.com/uber/aresdb/subscriber/common/sink"
    25  	"github.com/uber/aresdb/subscriber/config"
    26  	"github.com/uber/aresdb/utils"
    27  	"go.uber.org/zap"
    28  	"runtime"
    29  	"sort"
    30  )
    31  
    32  // Parser holds all resources needed to parse one message
    33  // into one or multiple row objects with respect to different destinations
    34  type Parser struct {
    35  	// ServiceConfig is ares-subscriber configure
    36  	ServiceConfig config.ServiceConfig
    37  	// JobName is job name
    38  	JobName string
    39  	// Cluster is ares cluster name
    40  	Cluster string
    41  	// destinations each message will be parsed and written into
    42  	Destination sink.Destination
    43  	// Transformations are keyed on the output column name
    44  	Transformations map[string]*rules.TransformationConfig
    45  	scope           tally.Scope
    46  }
    47  
    48  // NewParser will create a Parser for given JobConfig
    49  func NewParser(jobConfig *rules.JobConfig, serviceConfig config.ServiceConfig) *Parser {
    50  	mp := &Parser{
    51  		ServiceConfig:   serviceConfig,
    52  		JobName:         jobConfig.Name,
    53  		Cluster:         jobConfig.AresTableConfig.Cluster,
    54  		Transformations: jobConfig.GetTranformations(),
    55  		scope: serviceConfig.Scope.Tagged(map[string]string{
    56  			"job":         jobConfig.Name,
    57  			"aresCluster": jobConfig.AresTableConfig.Cluster,
    58  		}),
    59  	}
    60  	mp.populateDestination(jobConfig)
    61  	return mp
    62  }
    63  
    64  func (mp *Parser) populateDestination(jobConfig *rules.JobConfig) {
    65  	columnNames := []string{}
    66  	updateModes := []memcom.ColumnUpdateMode{}
    67  	primaryKeys := make(map[string]int)
    68  	primaryKeysInSchema := make(map[string]int)
    69  	destinations := jobConfig.GetDestinations()
    70  
    71  	for _, dstConfig := range destinations {
    72  		columnNames = append(columnNames, dstConfig.Column)
    73  	}
    74  
    75  	// sort column names in destination for consistent query order
    76  	sort.Strings(columnNames)
    77  
    78  	for id, column := range columnNames {
    79  		updateModes = append(updateModes, destinations[column].UpdateMode)
    80  		if oid, ok := jobConfig.GetPrimaryKeys()[column]; ok {
    81  			primaryKeysInSchema[column] = oid
    82  			primaryKeys[column] = id
    83  		}
    84  	}
    85  
    86  	mp.Destination = sink.Destination{
    87  		Table:               jobConfig.AresTableConfig.Table.Name,
    88  		ColumnNames:         columnNames,
    89  		PrimaryKeys:         primaryKeys,
    90  		PrimaryKeysInSchema: primaryKeysInSchema,
    91  		AresUpdateModes:     updateModes,
    92  		NumShards:           jobConfig.NumShards,
    93  	}
    94  }
    95  
    96  // ParseMessage will parse given message to fit the destination
    97  func (mp *Parser) ParseMessage(msg map[string]interface{}, destination sink.Destination) (client.Row, error) {
    98  	mp.ServiceConfig.Logger.Debug("Parsing", zap.Any("msg", msg))
    99  	var row client.Row
   100  	for _, col := range destination.ColumnNames {
   101  		transformation := mp.Transformations[col]
   102  		fromValue := mp.extractSourceFieldValue(msg, col)
   103  		toValue, err := transformation.Transform(fromValue)
   104  		if err != nil {
   105  			mp.ServiceConfig.Logger.Error("Tranformation error",
   106  				zap.String("job", mp.JobName),
   107  				zap.String("cluster", mp.Cluster),
   108  				zap.String("field", col),
   109  				zap.String("name", GetFuncName()),
   110  				zap.Error(err))
   111  		}
   112  		row = append(row, toValue)
   113  	}
   114  	return row, nil
   115  }
   116  
   117  // IsMessageValid checks if the message is valid
   118  func (mp *Parser) IsMessageValid(msg map[string]interface{}, destination sink.Destination) error {
   119  	if len(destination.ColumnNames) == 0 {
   120  		return utils.StackError(nil, "No column names specified")
   121  	}
   122  
   123  	if len(destination.AresUpdateModes) != len(destination.ColumnNames) {
   124  		return utils.StackError(nil,
   125  			"length of column update modes %d does not equal to number of columns %d",
   126  			len(destination.AresUpdateModes), len(destination.ColumnNames))
   127  	}
   128  
   129  	return nil
   130  }
   131  
   132  // CheckPrimaryKeys returns error if the value of primary key column is nil
   133  func (mp *Parser) CheckPrimaryKeys(destination sink.Destination, row client.Row) error {
   134  	for columnName, columnID := range destination.PrimaryKeys {
   135  		if row[columnID] == nil {
   136  			return utils.StackError(nil, "Primary key column %s is nil", columnName)
   137  		}
   138  	}
   139  	return nil
   140  }
   141  
   142  // CheckTimeColumnExistence checks if time column is missing for fact table
   143  func (mp *Parser) CheckTimeColumnExistence(schema metaCom.Table, columnDict map[string]int,
   144  	destination sink.Destination, row client.Row) error {
   145  	if !schema.IsFactTable || schema.Config.AllowMissingEventTime {
   146  		return nil
   147  	}
   148  
   149  	for id, columnName := range destination.ColumnNames {
   150  
   151  		columnID := columnDict[columnName]
   152  		if columnID == 0 && row[id] != nil {
   153  			return nil
   154  		}
   155  	}
   156  	return utils.StackError(nil, "Missing time column")
   157  }
   158  
   159  func (mp *Parser) extractSourceFieldValue(msg map[string]interface{}, fieldName string) interface{} {
   160  	value, err := mp.getValue(msg, fieldName)
   161  	if err != nil {
   162  		mp.ServiceConfig.Logger.Debug("Failed to get value for",
   163  			zap.String("job", mp.JobName),
   164  			zap.String("cluster", mp.Cluster),
   165  			zap.String("field", fieldName),
   166  			zap.String("name", GetFuncName()),
   167  			zap.Error(err))
   168  	}
   169  	return value
   170  }
   171  
   172  func (mp *Parser) getValue(msg map[string]interface{}, fieldName string) (interface{}, error) {
   173  	if value, found := msg[fieldName]; found {
   174  		return value, nil
   175  	}
   176  	return nil,
   177  		fmt.Errorf("Message does not contain key: %s, job: %s, cluster: %s", fieldName, mp.JobName, mp.Cluster)
   178  }
   179  
   180  //GetFuncName get the function name of the calling function
   181  func GetFuncName() string {
   182  	p, _, _, _ := runtime.Caller(1)
   183  	fn := runtime.FuncForPC(p).Name()
   184  	return fn
   185  }