github.com/weaviate/weaviate@v1.24.6/modules/text2vec-contextionary/classification/schema_for_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package classification
    13  
    14  import (
    15  	"fmt"
    16  
    17  	"github.com/go-openapi/strfmt"
    18  	"github.com/weaviate/weaviate/entities/models"
    19  	"github.com/weaviate/weaviate/entities/schema"
    20  	"github.com/weaviate/weaviate/entities/search"
    21  )
    22  
    23  func testSchema() schema.Schema {
    24  	return schema.Schema{
    25  		Objects: &models.Schema{
    26  			Classes: []*models.Class{
    27  				{
    28  					Class: "ExactCategory",
    29  				},
    30  				{
    31  					Class: "MainCategory",
    32  				},
    33  				{
    34  					Class: "Article",
    35  					Properties: []*models.Property{
    36  						{
    37  							Name:     "description",
    38  							DataType: []string{string(schema.DataTypeText)},
    39  						},
    40  						{
    41  							Name:         "name",
    42  							DataType:     schema.DataTypeText.PropString(),
    43  							Tokenization: models.PropertyTokenizationWhitespace,
    44  						},
    45  						{
    46  							Name:     "exactCategory",
    47  							DataType: []string{"ExactCategory"},
    48  						},
    49  						{
    50  							Name:     "mainCategory",
    51  							DataType: []string{"MainCategory"},
    52  						},
    53  						{
    54  							Name:     "categories",
    55  							DataType: []string{"ExactCategory"},
    56  						},
    57  						{
    58  							Name:     "anyCategory",
    59  							DataType: []string{"MainCategory", "ExactCategory"},
    60  						},
    61  					},
    62  				},
    63  			},
    64  		},
    65  	}
    66  }
    67  
    68  // vector position close to [1,0,0] means -> politics, [0,1,0] means -> society, [0, 0, 1] -> food&drink
    69  func testDataToBeClassified() search.Results {
    70  	return search.Results{
    71  		search.Result{
    72  			ID:        "75ba35af-6a08-40ae-b442-3bec69b355f9",
    73  			ClassName: "Article",
    74  			Vector:    []float32{0.78, 0, 0},
    75  			Schema: map[string]interface{}{
    76  				"description": "Barack Obama is a former US president",
    77  			},
    78  		},
    79  		search.Result{
    80  			ID:        "f850439a-d3cd-4f17-8fbf-5a64405645cd",
    81  			ClassName: "Article",
    82  			Vector:    []float32{0.90, 0, 0},
    83  			Schema: map[string]interface{}{
    84  				"description": "Michelle Obama is Barack Obamas wife",
    85  			},
    86  		},
    87  		search.Result{
    88  			ID:        "a2bbcbdc-76e1-477d-9e72-a6d2cfb50109",
    89  			ClassName: "Article",
    90  			Vector:    []float32{0, 0.78, 0},
    91  			Schema: map[string]interface{}{
    92  				"description": "Johnny Depp is an actor",
    93  			},
    94  		},
    95  		search.Result{
    96  			ID:        "069410c3-4b9e-4f68-8034-32a066cb7997",
    97  			ClassName: "Article",
    98  			Vector:    []float32{0, 0.90, 0},
    99  			Schema: map[string]interface{}{
   100  				"description": "Brad Pitt starred in a Quentin Tarantino movie",
   101  			},
   102  		},
   103  		search.Result{
   104  			ID:        "06a1e824-889c-4649-97f9-1ed3fa401d8e",
   105  			ClassName: "Article",
   106  			Vector:    []float32{0, 0, 0.78},
   107  			Schema: map[string]interface{}{
   108  				"description": "Ice Cream often contains a lot of sugar",
   109  			},
   110  		},
   111  		search.Result{
   112  			ID:        "6402e649-b1e0-40ea-b192-a64eab0d5e56",
   113  			ClassName: "Article",
   114  			Vector:    []float32{0, 0, 0.90},
   115  			Schema: map[string]interface{}{
   116  				"description": "French Fries are more common in Belgium and the US than in France",
   117  			},
   118  		},
   119  	}
   120  }
   121  
   122  func testDataVectors() map[string][]float32 {
   123  	return map[string][]float32{
   124  		"barack":   {0.7, 0, 0},
   125  		"michelle": {0.7, 0, 0},
   126  		"obama":    {1.0, 0, 0},
   127  		"us":       {0.6, 0.5, 0.4},
   128  		"depp":     {0.1, 0.8, 0.2},
   129  		"actor":    {0.1, 0.9, 0.0},
   130  		"brad":     {0.1, 0.8, 0.2},
   131  		"starred":  {0.1, 0.9, 0.0},
   132  		"ice":      {0, 0.1, 0.9},
   133  		"cream":    {0, 0.1, 0.8},
   134  		"sugar":    {0.3, 0.2, 0.9},
   135  		"french":   {0.5, 0.5, 0.4},
   136  		"fries":    {0, 0.1, 0.95},
   137  		"belgium":  {0.3, 0.3, 0.2},
   138  	}
   139  }
   140  
   141  const (
   142  	idMainCategoryPoliticsAndSociety = "39c6abe3-4bbe-4c4e-9e60-ca5e99ec6b4e"
   143  	idMainCategoryFoodAndDrink       = "5a3d909a-4f0d-4168-8f5c-cd3074d1e79a"
   144  	idCategoryPolitics               = "1b204f16-7da6-44fd-bbd2-8cc4a7414bc3"
   145  	idCategorySociety                = "ec500f39-1dc9-4580-9bd1-55a8ea8e37a2"
   146  	idCategoryFoodAndDrink           = "027b708a-31ca-43ea-9001-88bec864c79c"
   147  )
   148  
   149  // only used for contextual type classification
   150  func testDataPossibleTargets() search.Results {
   151  	return search.Results{
   152  		search.Result{
   153  			ID:        idMainCategoryPoliticsAndSociety,
   154  			ClassName: "MainCategory",
   155  			Vector:    []float32{1.01, 1.01, 0},
   156  			Schema: map[string]interface{}{
   157  				"name": "Politics and Society",
   158  			},
   159  		},
   160  		search.Result{
   161  			ID:        idMainCategoryFoodAndDrink,
   162  			ClassName: "MainCategory",
   163  			Vector:    []float32{0, 0, 0.99},
   164  			Schema: map[string]interface{}{
   165  				"name": "Food and Drinks",
   166  			},
   167  		},
   168  		search.Result{
   169  			ID:        idCategoryPolitics,
   170  			ClassName: "ExactCategory",
   171  			Vector:    []float32{0.99, 0, 0},
   172  			Schema: map[string]interface{}{
   173  				"name": "Politics",
   174  			},
   175  		},
   176  		search.Result{
   177  			ID:        idCategorySociety,
   178  			ClassName: "ExactCategory",
   179  			Vector:    []float32{0, 0.90, 0},
   180  			Schema: map[string]interface{}{
   181  				"name": "Society",
   182  			},
   183  		},
   184  		search.Result{
   185  			ID:        idCategoryFoodAndDrink,
   186  			ClassName: "ExactCategory",
   187  			Vector:    []float32{0, 0, 0.99},
   188  			Schema: map[string]interface{}{
   189  				"name": "Food and Drink",
   190  			},
   191  		},
   192  	}
   193  }
   194  
   195  func beaconRef(target string) *models.SingleRef {
   196  	beacon := fmt.Sprintf("weaviate://localhost/%s", target)
   197  	return &models.SingleRef{Beacon: strfmt.URI(beacon)}
   198  }
   199  
   200  // only used for knn-type
   201  func testDataAlreadyClassified() search.Results {
   202  	return search.Results{
   203  		search.Result{
   204  			ID:        "8aeecd06-55a0-462c-9853-81b31a284d80",
   205  			ClassName: "Article",
   206  			Vector:    []float32{1, 0, 0},
   207  			Schema: map[string]interface{}{
   208  				"description":   "This article talks about politics",
   209  				"exactCategory": models.MultipleRef{beaconRef(idCategoryPolitics)},
   210  				"mainCategory":  models.MultipleRef{beaconRef(idMainCategoryPoliticsAndSociety)},
   211  			},
   212  		},
   213  		search.Result{
   214  			ID:        "9f4c1847-2567-4de7-8861-34cf47a071ae",
   215  			ClassName: "Article",
   216  			Vector:    []float32{0, 1, 0},
   217  			Schema: map[string]interface{}{
   218  				"description":   "This articles talks about society",
   219  				"exactCategory": models.MultipleRef{beaconRef(idCategorySociety)},
   220  				"mainCategory":  models.MultipleRef{beaconRef(idMainCategoryPoliticsAndSociety)},
   221  			},
   222  		},
   223  		search.Result{
   224  			ID:        "926416ec-8fb1-4e40-ab8c-37b226b3d68e",
   225  			ClassName: "Article",
   226  			Vector:    []float32{0, 0, 1},
   227  			Schema: map[string]interface{}{
   228  				"description":   "This article talks about food",
   229  				"exactCategory": models.MultipleRef{beaconRef(idCategoryFoodAndDrink)},
   230  				"mainCategory":  models.MultipleRef{beaconRef(idMainCategoryFoodAndDrink)},
   231  			},
   232  		},
   233  	}
   234  }