github.com/PDOK/gokoala@v0.50.6/internal/ogc/features/datasources/geopackage/geopackage.go (about) 1 package geopackage 2 3 import ( 4 "context" 5 "database/sql" 6 "fmt" 7 "log" 8 "maps" 9 "os" 10 "path" 11 "sync" 12 "time" 13 14 "github.com/PDOK/gokoala/config" 15 16 "github.com/PDOK/gokoala/internal/engine/util" 17 "github.com/PDOK/gokoala/internal/ogc/features/datasources" 18 "github.com/PDOK/gokoala/internal/ogc/features/domain" 19 "github.com/go-spatial/geom" 20 "github.com/go-spatial/geom/encoding/gpkg" 21 "github.com/go-spatial/geom/encoding/wkt" 22 "github.com/jmoiron/sqlx" 23 "github.com/mattn/go-sqlite3" 24 "github.com/qustavo/sqlhooks/v2" 25 26 _ "github.com/mattn/go-sqlite3" // import for side effect (= sqlite3 driver) only 27 ) 28 29 const ( 30 sqliteDriverName = "sqlite3_with_extensions" 31 ) 32 33 var once sync.Once 34 35 // Load sqlite (with extensions) once. 36 // 37 // Extensions are by default expected in /usr/lib. For spatialite you can 38 // alternatively/optionally set SPATIALITE_LIBRARY_PATH. 39 func loadDriver() { 40 once.Do(func() { 41 spatialite := path.Join(os.Getenv("SPATIALITE_LIBRARY_PATH"), "mod_spatialite") 42 driver := &sqlite3.SQLiteDriver{Extensions: []string{spatialite}} 43 sql.Register(sqliteDriverName, sqlhooks.Wrap(driver, datasources.NewSQLLogFromEnv())) 44 }) 45 } 46 47 type geoPackageBackend interface { 48 getDB() *sqlx.DB 49 close() 50 } 51 52 type featureTable struct { 53 TableName string `db:"table_name"` 54 DataType string `db:"data_type"` // always 'features' 55 Identifier string `db:"identifier"` 56 Description string `db:"description"` 57 GeometryColumnName string `db:"column_name"` 58 GeometryType string `db:"geometry_type_name"` 59 LastChange time.Time `db:"last_change"` 60 MinX float64 `db:"min_x"` // bbox 61 MinY float64 `db:"min_y"` // bbox 62 MaxX float64 `db:"max_x"` // bbox 63 MaxY float64 `db:"max_y"` // bbox 64 SRS int64 `db:"srs_id"` 65 66 ColumnsWithDateType map[string]string 67 } 68 69 func (ft featureTable) ColumnsWithDataType() map[string]string { 70 return ft.ColumnsWithDateType 71 } 72 73 type GeoPackage struct { 74 backend geoPackageBackend 75 preparedStmtCache *PreparedStatementCache 76 77 fidColumn string 78 featureTableByCollectionID map[string]*featureTable 79 queryTimeout time.Duration 80 maxBBoxSizeToUseWithRTree int 81 } 82 83 func NewGeoPackage(collections config.GeoSpatialCollections, gpkgConfig config.GeoPackage) *GeoPackage { 84 loadDriver() 85 86 g := &GeoPackage{} 87 g.preparedStmtCache = NewCache() 88 warmUp := false 89 90 switch { 91 case gpkgConfig.Local != nil: 92 g.backend = newLocalGeoPackage(gpkgConfig.Local) 93 g.fidColumn = gpkgConfig.Local.Fid 94 g.queryTimeout = gpkgConfig.Local.QueryTimeout.Duration 95 g.maxBBoxSizeToUseWithRTree = gpkgConfig.Local.MaxBBoxSizeToUseWithRTree 96 case gpkgConfig.Cloud != nil: 97 g.backend = newCloudBackedGeoPackage(gpkgConfig.Cloud) 98 g.fidColumn = gpkgConfig.Cloud.Fid 99 g.queryTimeout = gpkgConfig.Cloud.QueryTimeout.Duration 100 g.maxBBoxSizeToUseWithRTree = gpkgConfig.Cloud.MaxBBoxSizeToUseWithRTree 101 warmUp = gpkgConfig.Cloud.Cache.WarmUp 102 default: 103 log.Fatal("unknown GeoPackage config encountered") 104 } 105 106 metadata, err := readDriverMetadata(g.backend.getDB()) 107 if err != nil { 108 log.Fatalf("failed to connect with GeoPackage: %v", err) 109 } 110 log.Println(metadata) 111 112 g.featureTableByCollectionID, err = readGpkgContents(collections, g.backend.getDB()) 113 if err != nil { 114 log.Fatal(err) 115 } 116 117 if err = assertIndexesExist(collections, g.featureTableByCollectionID, g.backend.getDB(), g.fidColumn); err != nil { 118 log.Fatal(err) 119 } 120 if warmUp { 121 // perform warmup async since it can take a long time 122 go func() { 123 if err = warmUpFeatureTables(collections, g.featureTableByCollectionID, g.backend.getDB()); err != nil { 124 log.Fatal(err) 125 } 126 }() 127 } 128 return g 129 } 130 131 func (g *GeoPackage) Close() { 132 g.preparedStmtCache.Close() 133 g.backend.close() 134 } 135 136 func (g *GeoPackage) GetFeatureIDs(ctx context.Context, collection string, criteria datasources.FeaturesCriteria) ([]int64, domain.Cursors, error) { 137 table, err := g.getFeatureTable(collection) 138 if err != nil { 139 return nil, domain.Cursors{}, err 140 } 141 142 queryCtx, cancel := context.WithTimeout(ctx, g.queryTimeout) // https://go.dev/doc/database/cancel-operations 143 defer cancel() 144 145 stmt, query, queryArgs, err := g.makeFeaturesQuery(queryCtx, table, true, criteria) //nolint:sqlclosecheck // prepared statement is cached, will be closed when evicted from cache 146 if err != nil { 147 return nil, domain.Cursors{}, fmt.Errorf("failed to create query '%s' error: %w", query, err) 148 } 149 150 rows, err := stmt.QueryxContext(queryCtx, queryArgs) 151 if err != nil { 152 return nil, domain.Cursors{}, fmt.Errorf("failed to execute query '%s' error: %w", query, err) 153 } 154 defer rows.Close() 155 156 featureIDs, prevNext, err := domain.MapRowsToFeatureIDs(rows) 157 if err != nil { 158 return nil, domain.Cursors{}, err 159 } 160 if prevNext == nil { 161 return nil, domain.Cursors{}, nil 162 } 163 return featureIDs, domain.NewCursors(*prevNext, criteria.Cursor.FiltersChecksum), nil 164 } 165 166 func (g *GeoPackage) GetFeaturesByID(ctx context.Context, collection string, featureIDs []int64) (*domain.FeatureCollection, error) { 167 table, err := g.getFeatureTable(collection) 168 if err != nil { 169 return nil, err 170 } 171 172 queryCtx, cancel := context.WithTimeout(ctx, g.queryTimeout) // https://go.dev/doc/database/cancel-operations 173 defer cancel() 174 175 fids := map[string]any{"fids": featureIDs} 176 query, queryArgs, err := sqlx.Named(fmt.Sprintf("select * from %s where %s in (:fids)", table.TableName, g.fidColumn), fids) 177 if err != nil { 178 return nil, fmt.Errorf("failed to make features query, error: %w", err) 179 } 180 query, queryArgs, err = sqlx.In(query, queryArgs...) 181 if err != nil { 182 return nil, fmt.Errorf("failed to make IN-clause, error: %w", err) 183 } 184 185 rows, err := g.backend.getDB().QueryxContext(queryCtx, g.backend.getDB().Rebind(query), queryArgs...) 186 if err != nil { 187 return nil, fmt.Errorf("failed to execute query '%s' error: %w", query, err) 188 } 189 defer rows.Close() 190 191 fc := domain.FeatureCollection{} 192 fc.Features, _, err = domain.MapRowsToFeatures(rows, g.fidColumn, table.GeometryColumnName, readGpkgGeometry) 193 if err != nil { 194 return nil, err 195 } 196 fc.NumberReturned = len(fc.Features) 197 return &fc, nil 198 } 199 200 func (g *GeoPackage) GetFeatures(ctx context.Context, collection string, criteria datasources.FeaturesCriteria) (*domain.FeatureCollection, domain.Cursors, error) { 201 table, err := g.getFeatureTable(collection) 202 if err != nil { 203 return nil, domain.Cursors{}, err 204 } 205 206 queryCtx, cancel := context.WithTimeout(ctx, g.queryTimeout) // https://go.dev/doc/database/cancel-operations 207 defer cancel() 208 209 stmt, query, queryArgs, err := g.makeFeaturesQuery(queryCtx, table, false, criteria) //nolint:sqlclosecheck // prepared statement is cached, will be closed when evicted from cache 210 if err != nil { 211 return nil, domain.Cursors{}, fmt.Errorf("failed to create query '%s' error: %w", query, err) 212 } 213 214 rows, err := stmt.QueryxContext(queryCtx, queryArgs) 215 if err != nil { 216 return nil, domain.Cursors{}, fmt.Errorf("failed to execute query '%s' error: %w", query, err) 217 } 218 defer rows.Close() 219 220 var prevNext *domain.PrevNextFID 221 fc := domain.FeatureCollection{} 222 fc.Features, prevNext, err = domain.MapRowsToFeatures(rows, g.fidColumn, table.GeometryColumnName, readGpkgGeometry) 223 if err != nil { 224 return nil, domain.Cursors{}, err 225 } 226 if prevNext == nil { 227 return nil, domain.Cursors{}, nil 228 } 229 fc.NumberReturned = len(fc.Features) 230 return &fc, domain.NewCursors(*prevNext, criteria.Cursor.FiltersChecksum), nil 231 } 232 233 func (g *GeoPackage) GetFeature(ctx context.Context, collection string, featureID int64) (*domain.Feature, error) { 234 table, err := g.getFeatureTable(collection) 235 if err != nil { 236 return nil, err 237 } 238 239 queryCtx, cancel := context.WithTimeout(ctx, g.queryTimeout) // https://go.dev/doc/database/cancel-operations 240 defer cancel() 241 242 query := fmt.Sprintf("select * from %s f where f.%s = :fid limit 1", table.TableName, g.fidColumn) 243 rows, err := g.backend.getDB().NamedQueryContext(queryCtx, query, map[string]any{"fid": featureID}) 244 if err != nil { 245 return nil, fmt.Errorf("query '%s' failed: %w", query, err) 246 } 247 defer rows.Close() 248 249 features, _, err := domain.MapRowsToFeatures(rows, g.fidColumn, table.GeometryColumnName, readGpkgGeometry) 250 if err != nil { 251 return nil, err 252 } 253 if len(features) != 1 { 254 return nil, nil 255 } 256 return features[0], nil 257 } 258 259 func (g *GeoPackage) GetFeatureTableMetadata(collection string) (datasources.FeatureTableMetadata, error) { 260 val, ok := g.featureTableByCollectionID[collection] 261 if !ok { 262 return nil, fmt.Errorf("no metadata for %s", collection) 263 } 264 return val, nil 265 } 266 267 // Build specific features queries based on the given options. 268 // Make sure to use SQL bind variables and return named params: https://jmoiron.github.io/sqlx/#namedParams 269 func (g *GeoPackage) makeFeaturesQuery(ctx context.Context, table *featureTable, onlyFIDs bool, 270 criteria datasources.FeaturesCriteria) (stmt *sqlx.NamedStmt, query string, queryArgs map[string]any, err error) { 271 272 // make query 273 if criteria.Bbox != nil { 274 query, queryArgs, err = g.makeBboxQuery(table, onlyFIDs, criteria) 275 if err != nil { 276 return 277 } 278 } else { 279 query, queryArgs = g.makeDefaultQuery(table, criteria) 280 } 281 // lookup prepared statement for given query, or create new one 282 stmt, err = g.preparedStmtCache.Lookup(ctx, g.backend.getDB(), query) 283 return 284 } 285 286 func (g *GeoPackage) makeDefaultQuery(table *featureTable, criteria datasources.FeaturesCriteria) (string, map[string]any) { 287 pfClause, pfNamedParams := propertyFiltersToSQL(criteria.PropertyFilters) 288 temporalClause, temporalNamedParams := temporalCriteriaToSQL(criteria.TemporalCriteria) 289 290 defaultQuery := fmt.Sprintf(` 291 with 292 next as (select * from "%[1]s" where "%[2]s" >= :fid %[3]s %[4]s order by %[2]s asc limit :limit + 1), 293 prev as (select * from "%[1]s" where "%[2]s" < :fid %[3]s %[4]s order by %[2]s desc limit :limit), 294 nextprev as (select * from next union all select * from prev), 295 nextprevfeat as (select *, lag("%[2]s", :limit) over (order by %[2]s) as prevfid, lead("%[2]s", :limit) over (order by "%[2]s") as nextfid from nextprev) 296 select * from nextprevfeat where "%[2]s" >= :fid %[3]s %[4]s limit :limit 297 `, table.TableName, g.fidColumn, temporalClause, pfClause) // don't add user input here, use named params for user input! 298 299 namedParams := map[string]any{ 300 "fid": criteria.Cursor.FID, 301 "limit": criteria.Limit, 302 } 303 maps.Copy(namedParams, pfNamedParams) 304 maps.Copy(namedParams, temporalNamedParams) 305 return defaultQuery, namedParams 306 } 307 308 func (g *GeoPackage) makeBboxQuery(table *featureTable, onlyFIDs bool, criteria datasources.FeaturesCriteria) (string, map[string]any, error) { 309 selectClause := "*" 310 if onlyFIDs { 311 selectClause = "\"" + g.fidColumn + "\", prevfid, nextfid" 312 } 313 314 btreeIndexHint := fmt.Sprintf("indexed by \"%s_spatial_idx\"", table.TableName) 315 316 pfClause, pfNamedParams := propertyFiltersToSQL(criteria.PropertyFilters) 317 if pfClause != "" { 318 // don't force btree index when using property filter, let SQLite decide 319 // whether to use the BTree index or the property filter index 320 btreeIndexHint = "" 321 } 322 temporalClause, temporalNamedParams := temporalCriteriaToSQL(criteria.TemporalCriteria) 323 324 bboxQuery := fmt.Sprintf(` 325 with 326 given_bbox as (select geomfromtext(:bboxWkt, :bboxSrid)), 327 bbox_size as (select iif(count(id) < %[3]d, 'small', 'big') as bbox_size 328 from (select id from rtree_%[1]s_%[4]s 329 where minx <= :maxx and maxx >= :minx and miny <= :maxy and maxy >= :miny 330 limit %[3]d)), 331 next_bbox_rtree as (select f.* 332 from "%[1]s" f inner join rtree_%[1]s_%[4]s rf on f."%[2]s" = rf.id 333 where rf.minx <= :maxx and rf.maxx >= :minx and rf.miny <= :maxy and rf.maxy >= :miny 334 and st_intersects((select * from given_bbox), castautomagic(f.%[4]s)) = 1 335 and f."%[2]s" >= :fid %[6]s %[7]s 336 order by f."%[2]s" asc 337 limit (select iif(bbox_size == 'small', :limit + 1, 0) from bbox_size)), 338 next_bbox_btree as (select f.* 339 from "%[1]s" f %[8]s 340 where f.minx <= :maxx and f.maxx >= :minx and f.miny <= :maxy and f.maxy >= :miny 341 and st_intersects((select * from given_bbox), castautomagic(f.%[4]s)) = 1 342 and f."%[2]s" >= :fid %[6]s %[7]s 343 order by f."%[2]s" asc 344 limit (select iif(bbox_size == 'big', :limit + 1, 0) from bbox_size)), 345 next as (select * from next_bbox_rtree union all select * from next_bbox_btree), 346 prev_bbox_rtree as (select f.* 347 from "%[1]s" f inner join rtree_%[1]s_%[4]s rf on f."%[2]s" = rf.id 348 where rf.minx <= :maxx and rf.maxx >= :minx and rf.miny <= :maxy and rf.maxy >= :miny 349 and st_intersects((select * from given_bbox), castautomagic(f.%[4]s)) = 1 350 and f."%[2]s" < :fid %[6]s %[7]s 351 order by f."%[2]s" desc 352 limit (select iif(bbox_size == 'small', :limit, 0) from bbox_size)), 353 prev_bbox_btree as (select f.* 354 from "%[1]s" f %[8]s 355 where f.minx <= :maxx and f.maxx >= :minx and f.miny <= :maxy and f.maxy >= :miny 356 and st_intersects((select * from given_bbox), castautomagic(f.%[4]s)) = 1 357 and f."%[2]s" < :fid %[6]s %[7]s 358 order by f."%[2]s" desc 359 limit (select iif(bbox_size == 'big', :limit, 0) from bbox_size)), 360 prev as (select * from prev_bbox_rtree union all select * from prev_bbox_btree), 361 nextprev as (select * from next union all select * from prev), 362 nextprevfeat as (select *, lag("%[2]s", :limit) over (order by "%[2]s") as prevfid, lead("%[2]s", :limit) over (order by "%[2]s") as nextfid from nextprev) 363 select %[5]s from nextprevfeat where "%[2]s" >= :fid %[6]s %[7]s limit :limit 364 `, table.TableName, g.fidColumn, g.maxBBoxSizeToUseWithRTree, table.GeometryColumnName, 365 selectClause, temporalClause, pfClause, btreeIndexHint) // don't add user input here, use named params for user input! 366 367 bboxAsWKT, err := wkt.EncodeString(criteria.Bbox) 368 if err != nil { 369 return "", nil, err 370 } 371 namedParams := map[string]any{ 372 "fid": criteria.Cursor.FID, 373 "limit": criteria.Limit, 374 "bboxWkt": bboxAsWKT, 375 "maxx": criteria.Bbox.MaxX(), 376 "minx": criteria.Bbox.MinX(), 377 "maxy": criteria.Bbox.MaxY(), 378 "miny": criteria.Bbox.MinY(), 379 "bboxSrid": criteria.InputSRID} 380 maps.Copy(namedParams, pfNamedParams) 381 maps.Copy(namedParams, temporalNamedParams) 382 return bboxQuery, namedParams, nil 383 } 384 385 func (g *GeoPackage) getFeatureTable(collection string) (*featureTable, error) { 386 table, ok := g.featureTableByCollectionID[collection] 387 if !ok { 388 return nil, fmt.Errorf("can't query collection '%s' since it doesn't exist in "+ 389 "geopackage, available in geopackage: %v", collection, util.Keys(g.featureTableByCollectionID)) 390 } 391 return table, nil 392 } 393 394 func readGpkgGeometry(rawGeom []byte) (geom.Geometry, error) { 395 geometry, err := gpkg.DecodeGeometry(rawGeom) 396 if err != nil { 397 return nil, err 398 } 399 return geometry.Geometry, nil 400 } 401 402 func propertyFiltersToSQL(pf map[string]string) (sql string, namedParams map[string]any) { 403 namedParams = make(map[string]any) 404 if len(pf) > 0 { 405 position := 0 406 for k, v := range pf { 407 position++ 408 namedParam := fmt.Sprintf("pf%d", position) 409 // column name in double quotes in case it is a reserved keyword 410 // also: we don't currently support LIKE since wildcard searches don't use the index 411 sql += fmt.Sprintf(" and \"%s\" = :%s", k, namedParam) 412 namedParams[namedParam] = v 413 } 414 } 415 return sql, namedParams 416 } 417 418 func temporalCriteriaToSQL(temporalCriteria datasources.TemporalCriteria) (sql string, namedParams map[string]any) { 419 namedParams = make(map[string]any) 420 if !temporalCriteria.ReferenceDate.IsZero() { 421 namedParams["referenceDate"] = temporalCriteria.ReferenceDate 422 startDate := temporalCriteria.StartDateProperty 423 endDate := temporalCriteria.EndDateProperty 424 sql = fmt.Sprintf(" and \"%[1]s\" <= :referenceDate and (\"%[2]s\" >= :referenceDate or \"%[2]s\" is null)", startDate, endDate) 425 } 426 return sql, namedParams 427 }