go-hep.org/x/hep@v0.38.1/hbook/ntup/ntuple.go (about)

     1  // Copyright ©2016 The go-hep Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package ntup provides a way to create, open and iterate over n-tuple data.
     6  package ntup // import "go-hep.org/x/hep/hbook/ntup"
     7  
     8  import (
     9  	"database/sql"
    10  	"errors"
    11  	"fmt"
    12  	"go/ast"
    13  	"io"
    14  	"math"
    15  	"reflect"
    16  	"strings"
    17  
    18  	"go-hep.org/x/hep/hbook"
    19  )
    20  
    21  var (
    22  	// ErrNotExist is returned when an n-tuple could not be located in a sql.DB
    23  	ErrNotExist = errors.New("hbook/ntup: ntuple does not exist")
    24  
    25  	// ErrMissingColDef is returned when some information is missing wrt
    26  	// an n-tuple column definition
    27  	ErrMissingColDef = errors.New("hbook/ntup: expected at least one column definition")
    28  
    29  	errChanType   = errors.New("hbook/ntup: chans not supported")
    30  	errIfaceType  = errors.New("hbook/ntup: interfaces not supported")
    31  	errMapType    = errors.New("hbook/ntup: maps not supported")
    32  	errSliceType  = errors.New("hbook/ntup: nested slices not supported")
    33  	errStructType = errors.New("hbook/ntup: nested structs not supported")
    34  )
    35  
    36  // Ntuple provides read/write access to row-wise n-tuple data.
    37  type Ntuple struct {
    38  	db     *sql.DB
    39  	name   string
    40  	schema []Descriptor
    41  }
    42  
    43  // Open inspects the given database handle and tries to return
    44  // an Ntuple connected to a table with the given name.
    45  // Open returns ErrNotExist if no such table exists.
    46  // If name is "", Open will connect to the one-and-only table in the db.
    47  //
    48  // e.g.:
    49  //
    50  //	db, err := sql.Open("csv", "file.csv")
    51  //	nt, err := ntup.Open(db, "ntup")
    52  func Open(db *sql.DB, name string) (*Ntuple, error) {
    53  	nt := &Ntuple{
    54  		db:   db,
    55  		name: name,
    56  	}
    57  	// FIXME(sbinet) test whether the table 'name' actually exists
    58  	// FIXME(sbinet) retrieve underlying schema from db
    59  	return nt, nil
    60  }
    61  
    62  // Create creates a new ntuple with the given name inside the given database handle.
    63  // The n-tuple schema is inferred from the cols argument. cols can be:
    64  //   - a single struct value (columns are inferred from the names+types of the exported fields)
    65  //   - a list of builtin values (the columns names are varX where X=[1-len(cols)])
    66  //   - a list of ntup.Descriptors
    67  //
    68  // e.g.:
    69  //
    70  //	nt, err := ntup.Create(db, "nt", struct{X float64 `hbook:"x"`}{})
    71  //	nt, err := ntup.Create(db, "nt", int64(0), float64(0))
    72  func Create(db *sql.DB, name string, cols ...any) (*Ntuple, error) {
    73  	var err error
    74  	nt := &Ntuple{
    75  		db:   db,
    76  		name: name,
    77  	}
    78  	var schema []Descriptor
    79  	switch len(cols) {
    80  	case 0:
    81  		return nil, ErrMissingColDef
    82  	case 1:
    83  		rv := reflect.Indirect(reflect.ValueOf(cols[0]))
    84  		rt := rv.Type()
    85  		switch rt.Kind() {
    86  		case reflect.Struct:
    87  			schema, err = schemaFromStruct(rt)
    88  		default:
    89  			schema, err = schemaFrom(cols...)
    90  		}
    91  	default:
    92  		schema, err = schemaFrom(cols...)
    93  	}
    94  	if err != nil {
    95  		return nil, err
    96  	}
    97  	nt.schema = schema
    98  	return nt, err
    99  }
   100  
   101  // DB returns the underlying db this n-tuple is connected to.
   102  func (nt *Ntuple) DB() *sql.DB {
   103  	return nt.db
   104  }
   105  
   106  // Name returns the name of this n-tuple.
   107  func (nt *Ntuple) Name() string {
   108  	return nt.name
   109  }
   110  
   111  // Cols returns the columns' descriptors of this n-tuple.
   112  // Modifying it directly leads to undefined behaviour.
   113  func (nt *Ntuple) Cols() []Descriptor {
   114  	return nt.schema
   115  }
   116  
   117  // Descriptor describes a column
   118  type Descriptor interface {
   119  	Name() string       // the column name
   120  	Type() reflect.Type // the column type
   121  }
   122  
   123  type columnDescr struct {
   124  	name string
   125  	typ  reflect.Type
   126  }
   127  
   128  func (col *columnDescr) Name() string {
   129  	return col.name
   130  }
   131  
   132  func (col *columnDescr) Type() reflect.Type {
   133  	return col.typ
   134  }
   135  
   136  func schemaFromStruct(rt reflect.Type) ([]Descriptor, error) {
   137  	var schema []Descriptor
   138  	var err error
   139  	for i := range rt.NumField() {
   140  		f := rt.Field(i)
   141  		if !ast.IsExported(f.Name) {
   142  			continue
   143  		}
   144  		ft := f.Type
   145  		switch ft.Kind() {
   146  		case reflect.Chan:
   147  			return nil, errChanType
   148  		case reflect.Interface:
   149  			return nil, errIfaceType
   150  		case reflect.Map:
   151  			return nil, errMapType
   152  		case reflect.Slice:
   153  			return nil, errSliceType
   154  		case reflect.Struct:
   155  			return nil, errStructType
   156  		}
   157  		fname := getTag(f.Tag, "hbook", "rio", "db")
   158  		if fname == "" {
   159  			fname = f.Name
   160  		}
   161  		schema = append(schema, &columnDescr{fname, ft})
   162  	}
   163  	return schema, err
   164  }
   165  
   166  func schemaFrom(src ...any) ([]Descriptor, error) {
   167  	var schema []Descriptor
   168  	var err error
   169  	for i, col := range src {
   170  		rt := reflect.TypeOf(col)
   171  		switch rt.Kind() {
   172  		case reflect.Chan:
   173  			return nil, errChanType
   174  		case reflect.Interface:
   175  			return nil, errIfaceType
   176  		case reflect.Map:
   177  			return nil, errMapType
   178  		case reflect.Slice:
   179  			return nil, errSliceType
   180  		case reflect.Struct:
   181  			return nil, errStructType
   182  		}
   183  		schema = append(schema, &columnDescr{fmt.Sprintf("var%d", i+1), rt})
   184  	}
   185  	return schema, err
   186  }
   187  
   188  func getTag(tag reflect.StructTag, keys ...string) string {
   189  	for _, k := range keys {
   190  		v := tag.Get(k)
   191  		if v != "" && v != "-" {
   192  			return v
   193  		}
   194  	}
   195  	return ""
   196  }
   197  
   198  // Scan executes a query against the ntuple and runs the function f against that context.
   199  //
   200  // e.g.
   201  //
   202  //	err = nt.Scan("x,y where z>10", func(x,y float64) error {
   203  //	  h1.Fill(x, 1)
   204  //	  h2.Fill(y, 1)
   205  //	  return nil
   206  //	})
   207  func (nt *Ntuple) Scan(query string, f any) error {
   208  	if f == nil {
   209  		return fmt.Errorf("hbook/ntup: nil func")
   210  	}
   211  	rv := reflect.ValueOf(f)
   212  	rt := rv.Type()
   213  	if rt.Kind() != reflect.Func {
   214  		return fmt.Errorf("hbook/ntup: expected a func, got %T", f)
   215  	}
   216  	if rt.NumOut() != 1 || rt.Out(0) != reflect.TypeOf((*error)(nil)).Elem() {
   217  		return fmt.Errorf("hbook/ntup: expected a func returning an error. got %T", f)
   218  	}
   219  	vargs := make([]reflect.Value, rt.NumIn())
   220  	args := make([]any, rt.NumIn())
   221  	for i := range args {
   222  		ptr := reflect.New(rt.In(i))
   223  		args[i] = ptr.Interface()
   224  		vargs[i] = ptr.Elem()
   225  	}
   226  
   227  	query, err := nt.massageQuery(query)
   228  	if err != nil {
   229  		return err
   230  	}
   231  
   232  	rows, err := nt.db.Query(query)
   233  	if err != nil {
   234  		return err
   235  	}
   236  	defer rows.Close()
   237  
   238  	for rows.Next() {
   239  		err = rows.Scan(args...)
   240  		if err != nil {
   241  			return err
   242  		}
   243  
   244  		out := rv.Call(vargs)[0].Interface()
   245  		if out != nil {
   246  			return out.(error)
   247  		}
   248  	}
   249  
   250  	err = rows.Err()
   251  	if err == io.EOF {
   252  		err = nil
   253  	}
   254  	return err
   255  }
   256  
   257  // ScanH1D executes a query against the ntuple and fills the histogram with
   258  // the results of the query.
   259  // If h is nil, a (100-bins, xmin, xmax+ULP) histogram is created,
   260  // where xmin and xmax are inferred from the content of the underlying database.
   261  func (nt *Ntuple) ScanH1D(query string, h *hbook.H1D) (*hbook.H1D, error) {
   262  	if h == nil {
   263  		var (
   264  			xmin = +math.MaxFloat64
   265  			xmax = -math.MaxFloat64
   266  		)
   267  		// FIXME(sbinet) leverage the underlying db min/max functions,
   268  		// instead of crawling through the whole data set.
   269  		err := nt.Scan(query, func(x float64) error {
   270  			xmin = math.Min(xmin, x)
   271  			xmax = math.Max(xmax, x)
   272  			return nil
   273  		})
   274  		if err != nil {
   275  			return nil, err
   276  		}
   277  
   278  		h = hbook.NewH1D(100, xmin, nextULP(xmax))
   279  	}
   280  
   281  	err := nt.Scan(query, func(x float64) error {
   282  		h.Fill(x, 1)
   283  		return nil
   284  	})
   285  
   286  	return h, err
   287  }
   288  
   289  // ScanH2D executes a query against the ntuple and fills the histogram with
   290  // the results of the query.
   291  // If h is nil, a (100-bins, xmin, xmax+ULP) (100-bins, ymin, ymax+ULP) 2d-histogram
   292  // is created,
   293  // where xmin, xmax and ymin,ymax are inferred from the content of the
   294  // underlying database.
   295  func (nt *Ntuple) ScanH2D(query string, h *hbook.H2D) (*hbook.H2D, error) {
   296  	if h == nil {
   297  		var (
   298  			xmin = +math.MaxFloat64
   299  			xmax = -math.MaxFloat64
   300  			ymin = +math.MaxFloat64
   301  			ymax = -math.MaxFloat64
   302  		)
   303  		// FIXME(sbinet) leverage the underlying db min/max functions,
   304  		// instead of crawling through the whole data set.
   305  		err := nt.Scan(query, func(x, y float64) error {
   306  			xmin = math.Min(xmin, x)
   307  			xmax = math.Max(xmax, x)
   308  			ymin = math.Min(ymin, y)
   309  			ymax = math.Max(ymax, y)
   310  			return nil
   311  		})
   312  		if err != nil {
   313  			return nil, err
   314  		}
   315  
   316  		h = hbook.NewH2D(100, xmin, nextULP(xmax), 100, ymin, nextULP(ymax))
   317  	}
   318  
   319  	err := nt.Scan(query, func(x, y float64) error {
   320  		h.Fill(x, y, 1)
   321  		return nil
   322  	})
   323  
   324  	return h, err
   325  }
   326  
   327  func (nt *Ntuple) massageQuery(q string) (string, error) {
   328  	const (
   329  		tokWHERE = " WHERE "
   330  		tokWhere = " where "
   331  		tokORDER = " ORDER "
   332  		tokOrder = " order "
   333  	)
   334  	vars := q
   335  	where := ""
   336  	switch {
   337  	case strings.Contains(q, tokWHERE):
   338  		toks := strings.Split(q, tokWHERE)
   339  		vars = toks[0]
   340  		where = " where " + toks[1]
   341  	case strings.Contains(q, tokWhere):
   342  		toks := strings.Split(q, tokWhere)
   343  		vars = toks[0]
   344  		where = " where " + toks[1]
   345  	}
   346  
   347  	order := ""
   348  	switch {
   349  	case strings.Contains(q, tokORDER):
   350  	case strings.Contains(q, tokOrder):
   351  	default:
   352  		order = " order by id()"
   353  	}
   354  
   355  	// FIXME(sbinet) this is vulnerable to SQL injections...
   356  	return "select " + vars + " from " + nt.name + where + order, nil
   357  }
   358  
   359  func nextULP(v float64) float64 {
   360  	return math.Nextafter(v, v+1)
   361  }