github.com/PDOK/gokoala@v0.50.6/internal/ogc/features/datasources/geopackage/geopackage.go (about)

     1  package geopackage
     2  
     3  import (
     4  	"context"
     5  	"database/sql"
     6  	"fmt"
     7  	"log"
     8  	"maps"
     9  	"os"
    10  	"path"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/PDOK/gokoala/config"
    15  
    16  	"github.com/PDOK/gokoala/internal/engine/util"
    17  	"github.com/PDOK/gokoala/internal/ogc/features/datasources"
    18  	"github.com/PDOK/gokoala/internal/ogc/features/domain"
    19  	"github.com/go-spatial/geom"
    20  	"github.com/go-spatial/geom/encoding/gpkg"
    21  	"github.com/go-spatial/geom/encoding/wkt"
    22  	"github.com/jmoiron/sqlx"
    23  	"github.com/mattn/go-sqlite3"
    24  	"github.com/qustavo/sqlhooks/v2"
    25  
    26  	_ "github.com/mattn/go-sqlite3" // import for side effect (= sqlite3 driver) only
    27  )
    28  
    29  const (
    30  	sqliteDriverName = "sqlite3_with_extensions"
    31  )
    32  
    33  var once sync.Once
    34  
    35  // Load sqlite (with extensions) once.
    36  //
    37  // Extensions are by default expected in /usr/lib. For spatialite you can
    38  // alternatively/optionally set SPATIALITE_LIBRARY_PATH.
    39  func loadDriver() {
    40  	once.Do(func() {
    41  		spatialite := path.Join(os.Getenv("SPATIALITE_LIBRARY_PATH"), "mod_spatialite")
    42  		driver := &sqlite3.SQLiteDriver{Extensions: []string{spatialite}}
    43  		sql.Register(sqliteDriverName, sqlhooks.Wrap(driver, datasources.NewSQLLogFromEnv()))
    44  	})
    45  }
    46  
    47  type geoPackageBackend interface {
    48  	getDB() *sqlx.DB
    49  	close()
    50  }
    51  
    52  type featureTable struct {
    53  	TableName          string    `db:"table_name"`
    54  	DataType           string    `db:"data_type"` // always 'features'
    55  	Identifier         string    `db:"identifier"`
    56  	Description        string    `db:"description"`
    57  	GeometryColumnName string    `db:"column_name"`
    58  	GeometryType       string    `db:"geometry_type_name"`
    59  	LastChange         time.Time `db:"last_change"`
    60  	MinX               float64   `db:"min_x"` // bbox
    61  	MinY               float64   `db:"min_y"` // bbox
    62  	MaxX               float64   `db:"max_x"` // bbox
    63  	MaxY               float64   `db:"max_y"` // bbox
    64  	SRS                int64     `db:"srs_id"`
    65  
    66  	ColumnsWithDateType map[string]string
    67  }
    68  
    69  func (ft featureTable) ColumnsWithDataType() map[string]string {
    70  	return ft.ColumnsWithDateType
    71  }
    72  
    73  type GeoPackage struct {
    74  	backend           geoPackageBackend
    75  	preparedStmtCache *PreparedStatementCache
    76  
    77  	fidColumn                  string
    78  	featureTableByCollectionID map[string]*featureTable
    79  	queryTimeout               time.Duration
    80  	maxBBoxSizeToUseWithRTree  int
    81  }
    82  
    83  func NewGeoPackage(collections config.GeoSpatialCollections, gpkgConfig config.GeoPackage) *GeoPackage {
    84  	loadDriver()
    85  
    86  	g := &GeoPackage{}
    87  	g.preparedStmtCache = NewCache()
    88  	warmUp := false
    89  
    90  	switch {
    91  	case gpkgConfig.Local != nil:
    92  		g.backend = newLocalGeoPackage(gpkgConfig.Local)
    93  		g.fidColumn = gpkgConfig.Local.Fid
    94  		g.queryTimeout = gpkgConfig.Local.QueryTimeout.Duration
    95  		g.maxBBoxSizeToUseWithRTree = gpkgConfig.Local.MaxBBoxSizeToUseWithRTree
    96  	case gpkgConfig.Cloud != nil:
    97  		g.backend = newCloudBackedGeoPackage(gpkgConfig.Cloud)
    98  		g.fidColumn = gpkgConfig.Cloud.Fid
    99  		g.queryTimeout = gpkgConfig.Cloud.QueryTimeout.Duration
   100  		g.maxBBoxSizeToUseWithRTree = gpkgConfig.Cloud.MaxBBoxSizeToUseWithRTree
   101  		warmUp = gpkgConfig.Cloud.Cache.WarmUp
   102  	default:
   103  		log.Fatal("unknown GeoPackage config encountered")
   104  	}
   105  
   106  	metadata, err := readDriverMetadata(g.backend.getDB())
   107  	if err != nil {
   108  		log.Fatalf("failed to connect with GeoPackage: %v", err)
   109  	}
   110  	log.Println(metadata)
   111  
   112  	g.featureTableByCollectionID, err = readGpkgContents(collections, g.backend.getDB())
   113  	if err != nil {
   114  		log.Fatal(err)
   115  	}
   116  
   117  	if err = assertIndexesExist(collections, g.featureTableByCollectionID, g.backend.getDB(), g.fidColumn); err != nil {
   118  		log.Fatal(err)
   119  	}
   120  	if warmUp {
   121  		// perform warmup async since it can take a long time
   122  		go func() {
   123  			if err = warmUpFeatureTables(collections, g.featureTableByCollectionID, g.backend.getDB()); err != nil {
   124  				log.Fatal(err)
   125  			}
   126  		}()
   127  	}
   128  	return g
   129  }
   130  
   131  func (g *GeoPackage) Close() {
   132  	g.preparedStmtCache.Close()
   133  	g.backend.close()
   134  }
   135  
   136  func (g *GeoPackage) GetFeatureIDs(ctx context.Context, collection string, criteria datasources.FeaturesCriteria) ([]int64, domain.Cursors, error) {
   137  	table, err := g.getFeatureTable(collection)
   138  	if err != nil {
   139  		return nil, domain.Cursors{}, err
   140  	}
   141  
   142  	queryCtx, cancel := context.WithTimeout(ctx, g.queryTimeout) // https://go.dev/doc/database/cancel-operations
   143  	defer cancel()
   144  
   145  	stmt, query, queryArgs, err := g.makeFeaturesQuery(queryCtx, table, true, criteria) //nolint:sqlclosecheck // prepared statement is cached, will be closed when evicted from cache
   146  	if err != nil {
   147  		return nil, domain.Cursors{}, fmt.Errorf("failed to create query '%s' error: %w", query, err)
   148  	}
   149  
   150  	rows, err := stmt.QueryxContext(queryCtx, queryArgs)
   151  	if err != nil {
   152  		return nil, domain.Cursors{}, fmt.Errorf("failed to execute query '%s' error: %w", query, err)
   153  	}
   154  	defer rows.Close()
   155  
   156  	featureIDs, prevNext, err := domain.MapRowsToFeatureIDs(rows)
   157  	if err != nil {
   158  		return nil, domain.Cursors{}, err
   159  	}
   160  	if prevNext == nil {
   161  		return nil, domain.Cursors{}, nil
   162  	}
   163  	return featureIDs, domain.NewCursors(*prevNext, criteria.Cursor.FiltersChecksum), nil
   164  }
   165  
   166  func (g *GeoPackage) GetFeaturesByID(ctx context.Context, collection string, featureIDs []int64) (*domain.FeatureCollection, error) {
   167  	table, err := g.getFeatureTable(collection)
   168  	if err != nil {
   169  		return nil, err
   170  	}
   171  
   172  	queryCtx, cancel := context.WithTimeout(ctx, g.queryTimeout) // https://go.dev/doc/database/cancel-operations
   173  	defer cancel()
   174  
   175  	fids := map[string]any{"fids": featureIDs}
   176  	query, queryArgs, err := sqlx.Named(fmt.Sprintf("select * from %s where %s in (:fids)", table.TableName, g.fidColumn), fids)
   177  	if err != nil {
   178  		return nil, fmt.Errorf("failed to make features query, error: %w", err)
   179  	}
   180  	query, queryArgs, err = sqlx.In(query, queryArgs...)
   181  	if err != nil {
   182  		return nil, fmt.Errorf("failed to make IN-clause, error: %w", err)
   183  	}
   184  
   185  	rows, err := g.backend.getDB().QueryxContext(queryCtx, g.backend.getDB().Rebind(query), queryArgs...)
   186  	if err != nil {
   187  		return nil, fmt.Errorf("failed to execute query '%s' error: %w", query, err)
   188  	}
   189  	defer rows.Close()
   190  
   191  	fc := domain.FeatureCollection{}
   192  	fc.Features, _, err = domain.MapRowsToFeatures(rows, g.fidColumn, table.GeometryColumnName, readGpkgGeometry)
   193  	if err != nil {
   194  		return nil, err
   195  	}
   196  	fc.NumberReturned = len(fc.Features)
   197  	return &fc, nil
   198  }
   199  
   200  func (g *GeoPackage) GetFeatures(ctx context.Context, collection string, criteria datasources.FeaturesCriteria) (*domain.FeatureCollection, domain.Cursors, error) {
   201  	table, err := g.getFeatureTable(collection)
   202  	if err != nil {
   203  		return nil, domain.Cursors{}, err
   204  	}
   205  
   206  	queryCtx, cancel := context.WithTimeout(ctx, g.queryTimeout) // https://go.dev/doc/database/cancel-operations
   207  	defer cancel()
   208  
   209  	stmt, query, queryArgs, err := g.makeFeaturesQuery(queryCtx, table, false, criteria) //nolint:sqlclosecheck // prepared statement is cached, will be closed when evicted from cache
   210  	if err != nil {
   211  		return nil, domain.Cursors{}, fmt.Errorf("failed to create query '%s' error: %w", query, err)
   212  	}
   213  
   214  	rows, err := stmt.QueryxContext(queryCtx, queryArgs)
   215  	if err != nil {
   216  		return nil, domain.Cursors{}, fmt.Errorf("failed to execute query '%s' error: %w", query, err)
   217  	}
   218  	defer rows.Close()
   219  
   220  	var prevNext *domain.PrevNextFID
   221  	fc := domain.FeatureCollection{}
   222  	fc.Features, prevNext, err = domain.MapRowsToFeatures(rows, g.fidColumn, table.GeometryColumnName, readGpkgGeometry)
   223  	if err != nil {
   224  		return nil, domain.Cursors{}, err
   225  	}
   226  	if prevNext == nil {
   227  		return nil, domain.Cursors{}, nil
   228  	}
   229  	fc.NumberReturned = len(fc.Features)
   230  	return &fc, domain.NewCursors(*prevNext, criteria.Cursor.FiltersChecksum), nil
   231  }
   232  
   233  func (g *GeoPackage) GetFeature(ctx context.Context, collection string, featureID int64) (*domain.Feature, error) {
   234  	table, err := g.getFeatureTable(collection)
   235  	if err != nil {
   236  		return nil, err
   237  	}
   238  
   239  	queryCtx, cancel := context.WithTimeout(ctx, g.queryTimeout) // https://go.dev/doc/database/cancel-operations
   240  	defer cancel()
   241  
   242  	query := fmt.Sprintf("select * from %s f where f.%s = :fid limit 1", table.TableName, g.fidColumn)
   243  	rows, err := g.backend.getDB().NamedQueryContext(queryCtx, query, map[string]any{"fid": featureID})
   244  	if err != nil {
   245  		return nil, fmt.Errorf("query '%s' failed: %w", query, err)
   246  	}
   247  	defer rows.Close()
   248  
   249  	features, _, err := domain.MapRowsToFeatures(rows, g.fidColumn, table.GeometryColumnName, readGpkgGeometry)
   250  	if err != nil {
   251  		return nil, err
   252  	}
   253  	if len(features) != 1 {
   254  		return nil, nil
   255  	}
   256  	return features[0], nil
   257  }
   258  
   259  func (g *GeoPackage) GetFeatureTableMetadata(collection string) (datasources.FeatureTableMetadata, error) {
   260  	val, ok := g.featureTableByCollectionID[collection]
   261  	if !ok {
   262  		return nil, fmt.Errorf("no metadata for %s", collection)
   263  	}
   264  	return val, nil
   265  }
   266  
   267  // Build specific features queries based on the given options.
   268  // Make sure to use SQL bind variables and return named params: https://jmoiron.github.io/sqlx/#namedParams
   269  func (g *GeoPackage) makeFeaturesQuery(ctx context.Context, table *featureTable, onlyFIDs bool,
   270  	criteria datasources.FeaturesCriteria) (stmt *sqlx.NamedStmt, query string, queryArgs map[string]any, err error) {
   271  
   272  	// make query
   273  	if criteria.Bbox != nil {
   274  		query, queryArgs, err = g.makeBboxQuery(table, onlyFIDs, criteria)
   275  		if err != nil {
   276  			return
   277  		}
   278  	} else {
   279  		query, queryArgs = g.makeDefaultQuery(table, criteria)
   280  	}
   281  	// lookup prepared statement for given query, or create new one
   282  	stmt, err = g.preparedStmtCache.Lookup(ctx, g.backend.getDB(), query)
   283  	return
   284  }
   285  
   286  func (g *GeoPackage) makeDefaultQuery(table *featureTable, criteria datasources.FeaturesCriteria) (string, map[string]any) {
   287  	pfClause, pfNamedParams := propertyFiltersToSQL(criteria.PropertyFilters)
   288  	temporalClause, temporalNamedParams := temporalCriteriaToSQL(criteria.TemporalCriteria)
   289  
   290  	defaultQuery := fmt.Sprintf(`
   291  with
   292      next as (select * from "%[1]s" where "%[2]s" >= :fid %[3]s %[4]s order by %[2]s asc limit :limit + 1),
   293      prev as (select * from "%[1]s" where "%[2]s" < :fid %[3]s %[4]s order by %[2]s desc limit :limit),
   294      nextprev as (select * from next union all select * from prev),
   295      nextprevfeat as (select *, lag("%[2]s", :limit) over (order by %[2]s) as prevfid, lead("%[2]s", :limit) over (order by "%[2]s") as nextfid from nextprev)
   296  select * from nextprevfeat where "%[2]s" >= :fid %[3]s %[4]s limit :limit
   297  `, table.TableName, g.fidColumn, temporalClause, pfClause) // don't add user input here, use named params for user input!
   298  
   299  	namedParams := map[string]any{
   300  		"fid":   criteria.Cursor.FID,
   301  		"limit": criteria.Limit,
   302  	}
   303  	maps.Copy(namedParams, pfNamedParams)
   304  	maps.Copy(namedParams, temporalNamedParams)
   305  	return defaultQuery, namedParams
   306  }
   307  
   308  func (g *GeoPackage) makeBboxQuery(table *featureTable, onlyFIDs bool, criteria datasources.FeaturesCriteria) (string, map[string]any, error) {
   309  	selectClause := "*"
   310  	if onlyFIDs {
   311  		selectClause = "\"" + g.fidColumn + "\", prevfid, nextfid"
   312  	}
   313  
   314  	btreeIndexHint := fmt.Sprintf("indexed by \"%s_spatial_idx\"", table.TableName)
   315  
   316  	pfClause, pfNamedParams := propertyFiltersToSQL(criteria.PropertyFilters)
   317  	if pfClause != "" {
   318  		// don't force btree index when using property filter, let SQLite decide
   319  		// whether to use the BTree index or the property filter index
   320  		btreeIndexHint = ""
   321  	}
   322  	temporalClause, temporalNamedParams := temporalCriteriaToSQL(criteria.TemporalCriteria)
   323  
   324  	bboxQuery := fmt.Sprintf(`
   325  with
   326       given_bbox as (select geomfromtext(:bboxWkt, :bboxSrid)),
   327       bbox_size as (select iif(count(id) < %[3]d, 'small', 'big') as bbox_size
   328                       from (select id from rtree_%[1]s_%[4]s
   329                             where minx <= :maxx and maxx >= :minx and miny <= :maxy and maxy >= :miny
   330                             limit %[3]d)),
   331       next_bbox_rtree as (select f.*
   332                           from "%[1]s" f inner join rtree_%[1]s_%[4]s rf on f."%[2]s" = rf.id
   333                           where rf.minx <= :maxx and rf.maxx >= :minx and rf.miny <= :maxy and rf.maxy >= :miny
   334                             and st_intersects((select * from given_bbox), castautomagic(f.%[4]s)) = 1
   335                             and f."%[2]s" >= :fid %[6]s %[7]s
   336                           order by f."%[2]s" asc
   337                           limit (select iif(bbox_size == 'small', :limit + 1, 0) from bbox_size)),
   338       next_bbox_btree as (select f.*
   339                           from "%[1]s" f %[8]s
   340                           where f.minx <= :maxx and f.maxx >= :minx and f.miny <= :maxy and f.maxy >= :miny
   341                             and st_intersects((select * from given_bbox), castautomagic(f.%[4]s)) = 1
   342                             and f."%[2]s" >= :fid %[6]s %[7]s
   343                           order by f."%[2]s" asc
   344                           limit (select iif(bbox_size == 'big', :limit + 1, 0) from bbox_size)),
   345       next as (select * from next_bbox_rtree union all select * from next_bbox_btree),
   346       prev_bbox_rtree as (select f.*
   347                           from "%[1]s" f inner join rtree_%[1]s_%[4]s rf on f."%[2]s" = rf.id
   348                           where rf.minx <= :maxx and rf.maxx >= :minx and rf.miny <= :maxy and rf.maxy >= :miny
   349                             and st_intersects((select * from given_bbox), castautomagic(f.%[4]s)) = 1
   350                             and f."%[2]s" < :fid %[6]s %[7]s
   351                           order by f."%[2]s" desc
   352                           limit (select iif(bbox_size == 'small', :limit, 0) from bbox_size)),
   353       prev_bbox_btree as (select f.*
   354                           from "%[1]s" f %[8]s
   355                           where f.minx <= :maxx and f.maxx >= :minx and f.miny <= :maxy and f.maxy >= :miny
   356                             and st_intersects((select * from given_bbox), castautomagic(f.%[4]s)) = 1
   357                             and f."%[2]s" < :fid %[6]s %[7]s
   358                           order by f."%[2]s" desc
   359                           limit (select iif(bbox_size == 'big', :limit, 0) from bbox_size)),
   360       prev as (select * from prev_bbox_rtree union all select * from prev_bbox_btree),
   361       nextprev as (select * from next union all select * from prev),
   362       nextprevfeat as (select *, lag("%[2]s", :limit) over (order by "%[2]s") as prevfid, lead("%[2]s", :limit) over (order by "%[2]s") as nextfid from nextprev)
   363  select %[5]s from nextprevfeat where "%[2]s" >= :fid %[6]s %[7]s limit :limit
   364  `, table.TableName, g.fidColumn, g.maxBBoxSizeToUseWithRTree, table.GeometryColumnName,
   365  		selectClause, temporalClause, pfClause, btreeIndexHint) // don't add user input here, use named params for user input!
   366  
   367  	bboxAsWKT, err := wkt.EncodeString(criteria.Bbox)
   368  	if err != nil {
   369  		return "", nil, err
   370  	}
   371  	namedParams := map[string]any{
   372  		"fid":      criteria.Cursor.FID,
   373  		"limit":    criteria.Limit,
   374  		"bboxWkt":  bboxAsWKT,
   375  		"maxx":     criteria.Bbox.MaxX(),
   376  		"minx":     criteria.Bbox.MinX(),
   377  		"maxy":     criteria.Bbox.MaxY(),
   378  		"miny":     criteria.Bbox.MinY(),
   379  		"bboxSrid": criteria.InputSRID}
   380  	maps.Copy(namedParams, pfNamedParams)
   381  	maps.Copy(namedParams, temporalNamedParams)
   382  	return bboxQuery, namedParams, nil
   383  }
   384  
   385  func (g *GeoPackage) getFeatureTable(collection string) (*featureTable, error) {
   386  	table, ok := g.featureTableByCollectionID[collection]
   387  	if !ok {
   388  		return nil, fmt.Errorf("can't query collection '%s' since it doesn't exist in "+
   389  			"geopackage, available in geopackage: %v", collection, util.Keys(g.featureTableByCollectionID))
   390  	}
   391  	return table, nil
   392  }
   393  
   394  func readGpkgGeometry(rawGeom []byte) (geom.Geometry, error) {
   395  	geometry, err := gpkg.DecodeGeometry(rawGeom)
   396  	if err != nil {
   397  		return nil, err
   398  	}
   399  	return geometry.Geometry, nil
   400  }
   401  
   402  func propertyFiltersToSQL(pf map[string]string) (sql string, namedParams map[string]any) {
   403  	namedParams = make(map[string]any)
   404  	if len(pf) > 0 {
   405  		position := 0
   406  		for k, v := range pf {
   407  			position++
   408  			namedParam := fmt.Sprintf("pf%d", position)
   409  			// column name in double quotes in case it is a reserved keyword
   410  			// also: we don't currently support LIKE since wildcard searches don't use the index
   411  			sql += fmt.Sprintf(" and \"%s\" = :%s", k, namedParam)
   412  			namedParams[namedParam] = v
   413  		}
   414  	}
   415  	return sql, namedParams
   416  }
   417  
   418  func temporalCriteriaToSQL(temporalCriteria datasources.TemporalCriteria) (sql string, namedParams map[string]any) {
   419  	namedParams = make(map[string]any)
   420  	if !temporalCriteria.ReferenceDate.IsZero() {
   421  		namedParams["referenceDate"] = temporalCriteria.ReferenceDate
   422  		startDate := temporalCriteria.StartDateProperty
   423  		endDate := temporalCriteria.EndDateProperty
   424  		sql = fmt.Sprintf(" and \"%[1]s\" <= :referenceDate and (\"%[2]s\" >= :referenceDate or \"%[2]s\" is null)", startDate, endDate)
   425  	}
   426  	return sql, namedParams
   427  }