github.com/aristanetworks/goarista@v0.0.0-20240514173732-cca2755bbd44/elasticsearch/schema.go (about)

     1  // Copyright (c) 2018 Arista Networks, Inc.  All rights reserved.
     2  // Arista Networks, Inc. Confidential and Proprietary.
     3  // Subject to Arista Networks, Inc.'s EULA.
     4  // FOR INTERNAL USE ONLY. NOT FOR DISTRIBUTION.
     5  
     6  package elasticsearch
     7  
     8  type field struct {
     9  	Name   string   `json:"name,omitempty"`
    10  	String *string  `json:"string,omitempty"`
    11  	Double *float64 `json:"double,omitempty"`
    12  	Long   *int64   `json:"long,omitempty"`
    13  	Bool   *bool    `json:"bool,omitempty"`
    14  	Ptr    *string  `json:"ptr,omitempty"`
    15  	// If the string looks like an ip address
    16  	// we also index it here as ip address
    17  	IP string `json:"ip,omitempty"`
    18  	// If the string looks like a mac address
    19  	// we also index it here as mac address
    20  	MAC string `json:"mac,omitempty"`
    21  }
    22  
    23  // Data represents the document format for a notification
    24  type Data struct {
    25  	// The timestamp in nanosecond resolution
    26  	Timestamp uint64
    27  	// Organization ID
    28  	OrgID       string
    29  	DatasetType string
    30  	// The datasetID
    31  	DatasetID string
    32  	// The stringified path
    33  	Path string
    34  	// The codec encoded key
    35  	Key []byte
    36  	// The key data
    37  	// this array will have each entry as an object with "name" field
    38  	// and "<type>" field for value.
    39  	// If name is not set, the data is put in one of the simple type fields
    40  	// The problem with nested types is that each entry in the array is creating a doc
    41  	// and the number of docs is exploding which is not good.
    42  	// So one optimization is to flatten for simple values and not use the nested field.
    43  	KeyData   []*field `json:",omitempty"`
    44  	KeyString *string  `json:",omitempty"`
    45  	KeyDouble *float64 `json:",omitempty"`
    46  	KeyLong   *int64   `json:",omitempty"`
    47  	KeyBool   *bool    `json:",omitempty"`
    48  	KeyPtr    *string  `json:",omitempty"`
    49  	// If the simple string looks like an ip address
    50  	// we also index it here as ip address
    51  	KeyIP string `json:",omitempty"`
    52  	// If the simple string looks like a mac address
    53  	// we also index it here as mac address
    54  	KeyMAC string `json:",omitempty"`
    55  	// The value data
    56  	// this array will have each entry as an object with "name" field
    57  	// and "<type>" field for value.
    58  	// If name is not set, the data was a simple value
    59  	// The problem with nested types is that each entry in the array is creating a doc
    60  	// and the number of docs is exploding which is not good.
    61  	// So one optimization is to flatten for simple values and not use the nested field.
    62  	Value       []*field `json:",omitempty"`
    63  	ValueString *string  `json:",omitempty"`
    64  	ValueDouble *float64 `json:",omitempty"`
    65  	ValueLong   *int64   `json:",omitempty"`
    66  	ValueBool   *bool    `json:",omitempty"`
    67  	ValuePtr    *string  `json:",omitempty"`
    68  	// If the simple string looks like an ip address
    69  	// we also index it here as ip address
    70  	ValueIP string `json:",omitempty"`
    71  	// If the simple string looks like a mac address
    72  	// we also index it here as mac address
    73  	ValueMAC string `json:",omitempty"`
    74  
    75  	// Present when it's a delete
    76  	// In this case, value will not be present
    77  	Del *bool `json:",omitempty"`
    78  	// Present when it's a deleteAll
    79  	// In this case, key and value will not be present
    80  	DelAll *bool `json:",omitempty"`
    81  }
    82  
    83  var index = map[string]interface{}{
    84  	"settings": map[string]interface{}{
    85  		"index": map[string]interface{}{
    86  			"codec":              "best_compression",
    87  			"number_of_shards":   5,
    88  			"number_of_replicas": 2,
    89  		},
    90  		"analysis": map[string]interface{}{
    91  			"analyzer": map[string]interface{}{
    92  				"mac_analyzer": map[string]interface{}{
    93  					"tokenizer": "mac_tokenizer",
    94  					"filter": []string{
    95  						"lowercase",
    96  					},
    97  				},
    98  				"path_analyzer": map[string]interface{}{
    99  					"tokenizer": "path_tokenizer",
   100  				},
   101  			},
   102  			"tokenizer": map[string]interface{}{
   103  				"mac_tokenizer": map[string]interface{}{
   104  					"type":     "edgeNGram",
   105  					"min_gram": "2",
   106  					"max_gram": "17",
   107  				},
   108  				"path_tokenizer": map[string]interface{}{
   109  					"type":      "path_hierarchy",
   110  					"delimiter": "/",
   111  				},
   112  			},
   113  		},
   114  	},
   115  
   116  	// ID of the doc is:
   117  	// {orgid}-{dataset_id}-{md5 "{tsnano}-{codec_path}-{codec_key}"}
   118  	// Note: For DeleteAll the "-codec_key" is ommited
   119  	// id in elasticsearch can be 512 bytes max, so we use sha1 to hash.
   120  	// We theorically can have collision. It will unlikely happen.
   121  	// In case there is a collision, too bad, we'll have corrupted data.
   122  	// We have the datasetid in the id, so in the unlikely case we have a collision,
   123  	// this collision cannot happen across organizations/devices.
   124  	"mappings": map[string]interface{}{
   125  		"_doc": map[string]interface{}{
   126  			"properties": map[string]interface{}{
   127  				// 		Timestamp in nanoseconds
   128  				"Timestamp": map[string]interface{}{
   129  					"type": "long",
   130  				},
   131  				// 		Organization id
   132  				"OrgID": map[string]interface{}{
   133  					"type": "long",
   134  				},
   135  				// 		Dataset type
   136  				"DatasetType": map[string]interface{}{
   137  					"type": "text",
   138  				},
   139  				// 		Dataset id
   140  				"DatasetID": map[string]interface{}{
   141  					"type": "long",
   142  				},
   143  				// 		base64 encoded of codec encoded representation of the path
   144  				// 		"path": {
   145  				// 			"type": "binary"
   146  				// 		},
   147  				// 		The stringified version of the path
   148  				"Path": map[string]interface{}{
   149  					"type": "keyword",
   150  				},
   151  				// 		base64 encoded of codec encoded representation of the key
   152  				"Key": map[string]interface{}{
   153  					"type":       "binary",
   154  					"doc_values": true,
   155  				},
   156  				// this array will have each entry as an object with "name" field
   157  				// and "<type>" field for value.
   158  				// If name is not set, the data was a simple value
   159  				"KeyData": map[string]interface{}{
   160  					"type": "nested",
   161  					"properties": map[string]interface{}{
   162  						"name": map[string]interface{}{
   163  							"type": "text",
   164  						},
   165  						"long": map[string]interface{}{
   166  							"type": "long",
   167  						},
   168  						"string": map[string]interface{}{
   169  							"type": "text",
   170  						},
   171  						"double": map[string]interface{}{
   172  							"type": "double",
   173  						},
   174  						"bool": map[string]interface{}{
   175  							"type": "boolean",
   176  						},
   177  						"ptr": map[string]interface{}{
   178  							"type": "keyword",
   179  						},
   180  						"ip": map[string]interface{}{
   181  							"type": "ip",
   182  						},
   183  						"mac": map[string]interface{}{
   184  							"type":            "text",
   185  							"analyzer":        "mac_analyzer",
   186  							"search_analyzer": "keyword",
   187  						},
   188  					},
   189  				},
   190  				"KeyLong": map[string]interface{}{
   191  					"type": "long",
   192  				},
   193  				"KeyString": map[string]interface{}{
   194  					"type": "text",
   195  				},
   196  				"KeyDouble": map[string]interface{}{
   197  					"type": "double",
   198  				},
   199  				"KeyBool": map[string]interface{}{
   200  					"type": "boolean",
   201  				},
   202  				"KeyPtr": map[string]interface{}{
   203  					"type": "keyword",
   204  				},
   205  				"KeyIP": map[string]interface{}{
   206  					"type": "ip",
   207  				},
   208  				"KeyMAC": map[string]interface{}{
   209  					"type":            "text",
   210  					"analyzer":        "mac_analyzer",
   211  					"search_analyzer": "keyword",
   212  				},
   213  				// 		this array will have each entry as an object with "name" field
   214  				// 		and "<type>" field for value.
   215  				// 		If name is not set, the data was a simple value
   216  				"Value": map[string]interface{}{
   217  					"type": "nested",
   218  					"properties": map[string]interface{}{
   219  						"name": map[string]interface{}{
   220  							"type": "text",
   221  						},
   222  						"long": map[string]interface{}{
   223  							"type": "long",
   224  						},
   225  						"string": map[string]interface{}{
   226  							"type": "text",
   227  						},
   228  						"double": map[string]interface{}{
   229  							"type": "double",
   230  						},
   231  						"bool": map[string]interface{}{
   232  							"type": "boolean",
   233  						},
   234  						"ptr": map[string]interface{}{
   235  							"type": "keyword",
   236  						},
   237  						"ip": map[string]interface{}{
   238  							"type": "ip",
   239  						},
   240  						"mac": map[string]interface{}{
   241  							"type":            "text",
   242  							"analyzer":        "mac_analyzer",
   243  							"search_analyzer": "keyword",
   244  						},
   245  					},
   246  				},
   247  				"ValueLong": map[string]interface{}{
   248  					"type": "long",
   249  				},
   250  				"ValueString": map[string]interface{}{
   251  					"type": "text",
   252  				},
   253  				"ValueDouble": map[string]interface{}{
   254  					"type": "double",
   255  				},
   256  				"ValueBool": map[string]interface{}{
   257  					"type": "boolean",
   258  				},
   259  				"ValuePtr": map[string]interface{}{
   260  					"type": "keyword",
   261  				},
   262  				"ValueIP": map[string]interface{}{
   263  					"type": "ip",
   264  				},
   265  				"ValueMAC": map[string]interface{}{
   266  					"type":            "text",
   267  					"analyzer":        "mac_analyzer",
   268  					"search_analyzer": "keyword",
   269  				},
   270  				// 		Present when it's a delete
   271  				// 		In this case, value will not be present
   272  				"Del": map[string]interface{}{
   273  					"type": "boolean",
   274  				},
   275  				//      Present when it's a deleteAll
   276  				//      In this case, key and value will not be present
   277  				"DelAll": map[string]interface{}{
   278  					"type": "boolean",
   279  				},
   280  				"query": map[string]interface{}{
   281  					"type": "percolator",
   282  				},
   283  			},
   284  		},
   285  	},
   286  }
   287  
   288  // excludedFields are fields that are not affected by init options
   289  // this is mainly to make excluded numeric types queryable
   290  var excludedFields = map[string]interface{}{
   291  	"Timestamp":   struct{}{},
   292  	"OrgID":       struct{}{},
   293  	"DatasetType": struct{}{},
   294  	"DatasetID":   struct{}{},
   295  }