github.com/weaviate/weaviate@v1.24.6/modules/text2vec-contextionary/classification/schema_for_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package classification 13 14 import ( 15 "fmt" 16 17 "github.com/go-openapi/strfmt" 18 "github.com/weaviate/weaviate/entities/models" 19 "github.com/weaviate/weaviate/entities/schema" 20 "github.com/weaviate/weaviate/entities/search" 21 ) 22 23 func testSchema() schema.Schema { 24 return schema.Schema{ 25 Objects: &models.Schema{ 26 Classes: []*models.Class{ 27 { 28 Class: "ExactCategory", 29 }, 30 { 31 Class: "MainCategory", 32 }, 33 { 34 Class: "Article", 35 Properties: []*models.Property{ 36 { 37 Name: "description", 38 DataType: []string{string(schema.DataTypeText)}, 39 }, 40 { 41 Name: "name", 42 DataType: schema.DataTypeText.PropString(), 43 Tokenization: models.PropertyTokenizationWhitespace, 44 }, 45 { 46 Name: "exactCategory", 47 DataType: []string{"ExactCategory"}, 48 }, 49 { 50 Name: "mainCategory", 51 DataType: []string{"MainCategory"}, 52 }, 53 { 54 Name: "categories", 55 DataType: []string{"ExactCategory"}, 56 }, 57 { 58 Name: "anyCategory", 59 DataType: []string{"MainCategory", "ExactCategory"}, 60 }, 61 }, 62 }, 63 }, 64 }, 65 } 66 } 67 68 // vector position close to [1,0,0] means -> politics, [0,1,0] means -> society, [0, 0, 1] -> food&drink 69 func testDataToBeClassified() search.Results { 70 return search.Results{ 71 search.Result{ 72 ID: "75ba35af-6a08-40ae-b442-3bec69b355f9", 73 ClassName: "Article", 74 Vector: []float32{0.78, 0, 0}, 75 Schema: map[string]interface{}{ 76 "description": "Barack Obama is a former US president", 77 }, 78 }, 79 search.Result{ 80 ID: "f850439a-d3cd-4f17-8fbf-5a64405645cd", 81 ClassName: "Article", 82 Vector: []float32{0.90, 0, 0}, 83 Schema: map[string]interface{}{ 84 "description": "Michelle Obama is Barack Obamas wife", 85 }, 86 }, 87 search.Result{ 88 ID: "a2bbcbdc-76e1-477d-9e72-a6d2cfb50109", 89 ClassName: "Article", 90 Vector: []float32{0, 0.78, 0}, 91 Schema: map[string]interface{}{ 92 "description": "Johnny Depp is an actor", 93 }, 94 }, 95 search.Result{ 96 ID: "069410c3-4b9e-4f68-8034-32a066cb7997", 97 ClassName: "Article", 98 Vector: []float32{0, 0.90, 0}, 99 Schema: map[string]interface{}{ 100 "description": "Brad Pitt starred in a Quentin Tarantino movie", 101 }, 102 }, 103 search.Result{ 104 ID: "06a1e824-889c-4649-97f9-1ed3fa401d8e", 105 ClassName: "Article", 106 Vector: []float32{0, 0, 0.78}, 107 Schema: map[string]interface{}{ 108 "description": "Ice Cream often contains a lot of sugar", 109 }, 110 }, 111 search.Result{ 112 ID: "6402e649-b1e0-40ea-b192-a64eab0d5e56", 113 ClassName: "Article", 114 Vector: []float32{0, 0, 0.90}, 115 Schema: map[string]interface{}{ 116 "description": "French Fries are more common in Belgium and the US than in France", 117 }, 118 }, 119 } 120 } 121 122 func testDataVectors() map[string][]float32 { 123 return map[string][]float32{ 124 "barack": {0.7, 0, 0}, 125 "michelle": {0.7, 0, 0}, 126 "obama": {1.0, 0, 0}, 127 "us": {0.6, 0.5, 0.4}, 128 "depp": {0.1, 0.8, 0.2}, 129 "actor": {0.1, 0.9, 0.0}, 130 "brad": {0.1, 0.8, 0.2}, 131 "starred": {0.1, 0.9, 0.0}, 132 "ice": {0, 0.1, 0.9}, 133 "cream": {0, 0.1, 0.8}, 134 "sugar": {0.3, 0.2, 0.9}, 135 "french": {0.5, 0.5, 0.4}, 136 "fries": {0, 0.1, 0.95}, 137 "belgium": {0.3, 0.3, 0.2}, 138 } 139 } 140 141 const ( 142 idMainCategoryPoliticsAndSociety = "39c6abe3-4bbe-4c4e-9e60-ca5e99ec6b4e" 143 idMainCategoryFoodAndDrink = "5a3d909a-4f0d-4168-8f5c-cd3074d1e79a" 144 idCategoryPolitics = "1b204f16-7da6-44fd-bbd2-8cc4a7414bc3" 145 idCategorySociety = "ec500f39-1dc9-4580-9bd1-55a8ea8e37a2" 146 idCategoryFoodAndDrink = "027b708a-31ca-43ea-9001-88bec864c79c" 147 ) 148 149 // only used for contextual type classification 150 func testDataPossibleTargets() search.Results { 151 return search.Results{ 152 search.Result{ 153 ID: idMainCategoryPoliticsAndSociety, 154 ClassName: "MainCategory", 155 Vector: []float32{1.01, 1.01, 0}, 156 Schema: map[string]interface{}{ 157 "name": "Politics and Society", 158 }, 159 }, 160 search.Result{ 161 ID: idMainCategoryFoodAndDrink, 162 ClassName: "MainCategory", 163 Vector: []float32{0, 0, 0.99}, 164 Schema: map[string]interface{}{ 165 "name": "Food and Drinks", 166 }, 167 }, 168 search.Result{ 169 ID: idCategoryPolitics, 170 ClassName: "ExactCategory", 171 Vector: []float32{0.99, 0, 0}, 172 Schema: map[string]interface{}{ 173 "name": "Politics", 174 }, 175 }, 176 search.Result{ 177 ID: idCategorySociety, 178 ClassName: "ExactCategory", 179 Vector: []float32{0, 0.90, 0}, 180 Schema: map[string]interface{}{ 181 "name": "Society", 182 }, 183 }, 184 search.Result{ 185 ID: idCategoryFoodAndDrink, 186 ClassName: "ExactCategory", 187 Vector: []float32{0, 0, 0.99}, 188 Schema: map[string]interface{}{ 189 "name": "Food and Drink", 190 }, 191 }, 192 } 193 } 194 195 func beaconRef(target string) *models.SingleRef { 196 beacon := fmt.Sprintf("weaviate://localhost/%s", target) 197 return &models.SingleRef{Beacon: strfmt.URI(beacon)} 198 } 199 200 // only used for knn-type 201 func testDataAlreadyClassified() search.Results { 202 return search.Results{ 203 search.Result{ 204 ID: "8aeecd06-55a0-462c-9853-81b31a284d80", 205 ClassName: "Article", 206 Vector: []float32{1, 0, 0}, 207 Schema: map[string]interface{}{ 208 "description": "This article talks about politics", 209 "exactCategory": models.MultipleRef{beaconRef(idCategoryPolitics)}, 210 "mainCategory": models.MultipleRef{beaconRef(idMainCategoryPoliticsAndSociety)}, 211 }, 212 }, 213 search.Result{ 214 ID: "9f4c1847-2567-4de7-8861-34cf47a071ae", 215 ClassName: "Article", 216 Vector: []float32{0, 1, 0}, 217 Schema: map[string]interface{}{ 218 "description": "This articles talks about society", 219 "exactCategory": models.MultipleRef{beaconRef(idCategorySociety)}, 220 "mainCategory": models.MultipleRef{beaconRef(idMainCategoryPoliticsAndSociety)}, 221 }, 222 }, 223 search.Result{ 224 ID: "926416ec-8fb1-4e40-ab8c-37b226b3d68e", 225 ClassName: "Article", 226 Vector: []float32{0, 0, 1}, 227 Schema: map[string]interface{}{ 228 "description": "This article talks about food", 229 "exactCategory": models.MultipleRef{beaconRef(idCategoryFoodAndDrink)}, 230 "mainCategory": models.MultipleRef{beaconRef(idMainCategoryFoodAndDrink)}, 231 }, 232 }, 233 } 234 }