github.com/hiyorimi/geobed@v0.0.0-20190227204948-42ebdc6a8871/geobed_test.go (about) 1 package geobed 2 3 import ( 4 "testing" 5 6 "github.com/mmcloughlin/geohash" 7 "github.com/stretchr/testify/assert" 8 9 . "gopkg.in/check.v1" 10 ) 11 12 // Hook up gocheck into the "go test" runner. 13 func Test(t *testing.T) { TestingT(t) } 14 15 type GeobedSuite struct { 16 testLocations []map[string]string 17 } 18 19 var _ = Suite(&GeobedSuite{}) 20 21 var g GeoBed 22 23 func (s *GeobedSuite) SetUpSuite(c *C) { 24 // This is a common alternate name. However, there's a city called "Apple" 25 // (at least one). So it's a bit difficult. 26 // Plus many people would put "The Big Apple" ... Yet Geonames alt city 27 // names has just "Big Apple" ... It may be worth trying to improve this though. 28 //s.testLocations = append(s.testLocations, map[string]string{"query": "Big Apple", "city": "New York City", "country": "US", "region": "NY"}) 29 30 //s.testLocations = append(s.testLocations, map[string]string{ 31 // "query": "NYC", 32 // "city": "New York City", 33 // "country": "US", 34 // "region": "NY", 35 //}) 36 37 //s.testLocations = append(s.testLocations, map[string]string{"query": "New York, NY", 38 // "city": "New York City", 39 // "country": "US", 40 // "region": "NY"}) 41 //s.testLocations = append(s.testLocations, map[string]string{"query": "New York City", "city": "New York City", "country": "US", "region": "NY"}) 42 //s.testLocations = append(s.testLocations, map[string]string{"query": "Austin TX", "city": "Austin", "country": "US", "region": "TX"}) 43 //s.testLocations = append(s.testLocations, map[string]string{"query": "tx austin", "city": "Austin", "country": "US", "region": "TX"}) 44 //s.testLocations = append(s.testLocations, map[string]string{"query": "Paris, TX", "city": "Paris", "country": "US", "region": "TX"}) 45 //s.testLocations = append(s.testLocations, map[string]string{"query": "New Paris, IN", "city": "New Paris", "country": "US", "region": "IN"}) 46 //s.testLocations = append(s.testLocations, map[string]string{"query": "Sweden, Stockholm", "city": "Stockholm", "country": "SE", "region": "26"}) 47 //s.testLocations = append(s.testLocations, map[string]string{"query": "Stockholm", "city": "Stockholm", "country": "SE", "region": "26"}) 48 s.testLocations = append(s.testLocations, map[string]string{"query": "Newport Beach, Orange County ", "city": "Newport Beach", "country": "US", "region": "CA"}) 49 s.testLocations = append(s.testLocations, map[string]string{"query": "Newport Beach", "city": "Newport Beach", "country": "US", "region": "CA"}) 50 // TODO: make bigger cities range better 51 //s.testLocations = append(s.testLocations, map[string]string{"query": "london", "city": "London", "country": "GB", "region": ""}) 52 s.testLocations = append(s.testLocations, map[string]string{"query": "Paris", "city": "Paris", "country": "FR", "region": "11"}) 53 //s.testLocations = append(s.testLocations, map[string]string{"query": "New Paris", "city": "New Paris", "country": "US", "region": "IN"}) 54 55 // Often, "AUS" ends up mapping to Austria. 56 // In our case here, Ausa is a city in India. That's a logical match for "AUS" ... 57 // Airport codes are tricky. Most geocoding don't hangle them properly/reliably anyway. 58 //s.testLocations = append(s.testLocations, map[string]string{"query": "SFO", "city": "San Francisco", "country": "US", "region": "CA"}) 59 //s.testLocations = append(s.testLocations, map[string]string{"query": "AUS", "city": "Austin", "country": "US", "region": "TX"}) 60 61 // Will test out of range on the slice when looking up (0 end) 62 s.testLocations = append(s.testLocations, map[string]string{"query": "ਪੈਰਿਸ", "city": "'Aade\xefssa", "country": "SY", "region": "03"}) 63 } 64 65 func (s *GeobedSuite) TestANewGeobed(c *C) { 66 g = NewGeobed() 67 c.Assert(len(g.c), Not(Equals), 0) 68 c.Assert(len(g.co), Not(Equals), 0) 69 c.Assert(len(cityNameIdx), Not(Equals), 0) 70 c.Assert(g.c, FitsTypeOf, []GeobedCity(nil)) 71 c.Assert(g.co, FitsTypeOf, []CountryInfo(nil)) 72 c.Assert(cityNameIdx, FitsTypeOf, make(map[string]int)) 73 } 74 75 func TestExactMatchCity(t *testing.T) { 76 var geoBedTest GeoBed 77 var city, result GeobedCity 78 var country CountryInfo 79 80 result = geoBedTest.exactMatchCity("") 81 assert.Equal(t, GeobedCity{}, result) 82 83 city = GeobedCity{ 84 "Testcity", 85 "Testing city", 86 "Testcountry", 87 "Testregion", 88 42.42, 89 42.42, 90 int32(1000), 91 geohash.Encode(city.Latitude, city.Longitude), 92 } 93 country = CountryInfo{ 94 "Testcountry", 95 "Testcity", 96 1000, 97 10000, 98 1, 99 1, 100 "te", 101 "tes", 102 "", 103 "", 104 "", 105 "", 106 "", 107 "", 108 "", 109 "", 110 "", 111 "", 112 "", 113 } 114 geoBedTest.c = append(geoBedTest.c, city) 115 geoBedTest.co = append(geoBedTest.co, country) 116 117 result = geoBedTest.exactMatchCity("") 118 assert.Equal(t, GeobedCity{}, result) 119 120 result = g.exactMatchCity("NotMatchingQuery") 121 assert.Equal(t, GeobedCity{}, result) 122 } 123 124 func TestFuzzyMatchLocation(t *testing.T) { 125 var geoBedTest GeoBed 126 var city, result GeobedCity 127 var country CountryInfo 128 129 city = GeobedCity{ 130 "Testcity", 131 "Testing city", 132 "Testcountry", 133 "Testregion", 134 42.42, 135 42.42, 136 int32(1000), 137 geohash.Encode(city.Latitude, city.Longitude), 138 } 139 country = CountryInfo{ 140 "Testcountry", 141 "Testcity", 142 1000, 143 10000, 144 1, 145 1, 146 "te", 147 "tes", 148 "", 149 "", 150 "", 151 "", 152 "", 153 "", 154 "", 155 "", 156 "", 157 "", 158 "", 159 } 160 geoBedTest.c = append(geoBedTest.c, city) 161 geoBedTest.co = append(geoBedTest.co, country) 162 163 result = geoBedTest.fuzzyMatchLocation("") 164 assert.Equal(t, city, result) 165 166 result = g.fuzzyMatchLocation("Moscow") 167 assert.Equal(t, "Moscow", result.City) 168 assert.Equal(t, "RU", result.Country) 169 assert.InDelta(t, 55.75, result.Latitude, 0.2) 170 assert.InDelta(t, 37.61, result.Longitude, 0.2) 171 } 172 173 func TestGeoBed_store(t *testing.T) { 174 var err error 175 g = NewGeobed() 176 err = g.store() 177 if assert.Nil(t, err) { 178 assert.FileExists(t, "./geobed-data/g.c.dmp") 179 assert.FileExists(t, "./geobed-data/g.co.dmp") 180 assert.FileExists(t, "./geobed-data/cityNameIdx.dmp") 181 } 182 183 } 184 185 func TestGeoBed_downloadDataSets(t *testing.T) { 186 g = NewGeobed() 187 g.downloadDataSets() 188 assert.FileExists(t, "./geobed-data/g.c.dmp") 189 assert.FileExists(t, "./geobed-data/g.co.dmp") 190 assert.FileExists(t, "./geobed-data/cityNameIdx.dmp") 191 } 192 193 func TestGeoBed_loadDataSets(t *testing.T) { 194 g = GeoBed{} 195 assert.FileExists(t, "./geobed-data/g.c.dmp") 196 assert.FileExists(t, "./geobed-data/g.co.dmp") 197 assert.FileExists(t, "./geobed-data/cityNameIdx.dmp") 198 g.loadDataSets() 199 assert.NotEqual(t, 0, len(g.c)) 200 } 201 202 func (s *GeobedSuite) TestGeocode(c *C) { 203 var r GeobedCity 204 g = NewGeobed() 205 for _, v := range s.testLocations { 206 if _, ok := v["exact_match"]; ok { 207 r = g.Geocode(v["query"], GeocodeOptions{true}) 208 } else { 209 r = g.Geocode(v["query"]) 210 } 211 212 c.Assert(r.City, Equals, v["city"]) 213 c.Assert(r.Country, Equals, v["country"]) 214 // Due to all the data and various sets, the region can be a little weird. 215 // It's intended to be US state first and foremost (where it is most helpful in geocoding). 216 // TODO: Look back into this later and try to make sense of it all. 217 // It may end up needing to be multiple fields (which will further complicate the matching). 218 if v["region"] != "" { 219 c.Assert(r.Region, Equals, v["region"]) 220 } 221 } 222 223 r = g.Geocode("") 224 c.Assert(r.City, Equals, "") 225 226 r = g.Geocode(" ") 227 c.Assert(r.Population, Equals, int32(0)) 228 } 229 230 func (s *GeobedSuite) TestReverseGeocode(c *C) { 231 g = NewGeobed() 232 233 r := g.ReverseGeocode(30.26715, -97.74306) 234 c.Assert(r.City, Equals, "Austin") 235 c.Assert(r.Region, Equals, "TX") 236 c.Assert(r.Country, Equals, "US") 237 238 r = g.ReverseGeocode(37.44651, -122.15322) 239 c.Assert(r.City, Equals, "Palo Alto") 240 c.Assert(r.Region, Equals, "CA") 241 c.Assert(r.Country, Equals, "US") 242 243 r = g.ReverseGeocode(37, -122) 244 c.Assert(r.City, Equals, "Santa Cruz") 245 246 r = g.ReverseGeocode(37.44, -122.15) 247 c.Assert(r.City, Equals, "Stanford") 248 249 r = g.ReverseGeocode(51.51279, -0.09184) 250 c.Assert(r.City, Equals, "City of London") 251 252 r = g.ReverseGeocode(59.93186166742998, 30.320993812833134) 253 c.Assert(r.City, Equals, "Saint Petersburg") 254 255 } 256 257 func (s *GeobedSuite) TestNext(c *C) { 258 c.Assert(string(prev(rune("new york"[0]))), Equals, "m") 259 c.Assert(prev(rune("new york"[0])), Equals, int32(109)) 260 } 261 262 func (s *GeobedSuite) TestToUpper(c *C) { 263 c.Assert(toUpper("nyc"), Equals, "NYC") 264 } 265 266 func (s *GeobedSuite) TestToLower(c *C) { 267 c.Assert(toLower("NYC"), Equals, "nyc") 268 } 269 270 // Benchmark comments from a MacbookPro Retina with 8GB of RAM with who knows what running. 271 272 // 5629888699 ns/op 273 // 5336288337 ns/op 274 // 5473618388 ns/op 275 // This takes about 5 seconds (to load the data sets into memory - 276 // should only happen once per application, ideally one would do this up front) 277 func BenchmarkNewGeobed(b *testing.B) { 278 g = NewGeobed() 279 } 280 281 // 2285549904 ns/op 282 // 2393945317 ns/op 283 // 2214503806 ns/op 284 // 2265304148 ns/op 285 // 2186608767 ns/op 286 // This has been scoring around 2 - 2.4 seconds on 287 // MacbookPro Retina with 8GB of RAM (before concurrency was added) 288 // (20) 98841134 ns/op 289 func BenchmarkReverseGeocode(b *testing.B) { 290 for n := 0; n < b.N; n++ { 291 //g.ReverseGeocode(37.44651, -122.15322) 292 g.ReverseGeocode(51.51279, -0.09184) 293 } 294 } 295 296 // 297 // Before indexing the slice keys, it would take 2.8 - 3 seconds per lookup. 298 // 2968170541 ns/op 299 // 2956824815 ns/op 300 // 2861628023 ns/op 301 // 302 // After using the index and ranging over sections of the slice, 303 // it takes about 0.0175 - 0.02 seconds per lookup! 304 // (10) 175591906 ns/op 305 // (10) 180395494 ns/op 306 // (10) 123880439 ns/op 307 // (10) 124857396 ns/op 308 // (10) 164229982 ns/op (for Austin, TX) - speed can change 309 // a tiny bit based on what's being searched and where it is in the index, 310 // how items that start with the same characters, etc. 311 // (10) 135527499 ns/op 312 func BenchmarkGeocode(b *testing.B) { 313 314 for n := 0; n < b.N; n++ { 315 g.Geocode("New York") 316 } 317 }