github.com/tobgu/qframe@v0.4.0/grouper.go (about) 1 package qframe 2 3 import ( 4 "github.com/tobgu/qframe/internal/grouper" 5 "github.com/tobgu/qframe/internal/icolumn" 6 "github.com/tobgu/qframe/internal/index" 7 "github.com/tobgu/qframe/qerrors" 8 "github.com/tobgu/qframe/types" 9 ) 10 11 // GroupStats contains internal statistics for grouping. 12 // Clients should not depend on this for any type of decision making. It is strictly "for info". 13 // The layout may change if the underlying grouping mechanisms change. 14 type GroupStats grouper.GroupStats 15 16 // Grouper contains groups of rows produced by the QFrame.GroupBy function. 17 type Grouper struct { 18 indices []index.Int 19 groupedColumns []string 20 columns []namedColumn 21 columnsByName map[string]namedColumn 22 Err error 23 Stats GroupStats 24 } 25 26 // Aggregation represents a function to apply to a column. 27 type Aggregation struct { 28 // Fn is the aggregation function to apply. 29 // 30 // IMPORTANT: For pointer and reference types you must not assume that the data passed argument 31 // to this function is valid after the function returns. If you plan to keep it around you need 32 // to take a copy of the data. 33 Fn types.SliceFuncOrBuiltInId 34 35 // Column is the name of the column to apply the aggregation to. 36 Column string 37 38 // As can be used to specify the destination column name, if not given defaults to the 39 // value of Column. 40 As string 41 } 42 43 // Aggregate applies the given aggregations to all row groups in the Grouper. 44 // 45 // Time complexity O(m*n) where m = number of aggregations, n = number of rows. 46 func (g Grouper) Aggregate(aggs ...Aggregation) QFrame { 47 if g.Err != nil { 48 return QFrame{Err: g.Err} 49 } 50 51 // Loop over all groups and pick the first row in each of the groups. 52 // This index will be used to populate the grouped by columns below. 53 firstElementIx := make(index.Int, len(g.indices)) 54 for i, ix := range g.indices { 55 firstElementIx[i] = ix[0] 56 } 57 58 newColumnsByName := make(map[string]namedColumn, len(g.groupedColumns)+len(aggs)) 59 newColumns := make([]namedColumn, 0, len(g.groupedColumns)+len(aggs)) 60 for i, colName := range g.groupedColumns { 61 col := g.columnsByName[colName] 62 col.pos = i 63 col.Column = col.Subset(firstElementIx) 64 newColumnsByName[colName] = col 65 newColumns = append(newColumns, col) 66 } 67 68 var err error 69 for _, agg := range aggs { 70 col, ok := g.columnsByName[agg.Column] 71 if !ok { 72 return QFrame{Err: qerrors.New("Aggregate", unknownCol(agg.Column))} 73 } 74 75 newColumnName := agg.Column 76 if agg.As != "" { 77 newColumnName = agg.As 78 } 79 col.name = newColumnName 80 81 _, ok = newColumnsByName[newColumnName] 82 if ok { 83 return QFrame{Err: qerrors.New( 84 "Aggregate", 85 "cannot aggregate on column that is part of group by or is already an aggregate: %s", newColumnName)} 86 } 87 88 if agg.Fn == "count" { 89 // Special convenience case for "count" which would normally require a cast from 90 // any other type of column to int before being executed. 91 counts := make([]int, len(g.indices)) 92 for i, ix := range g.indices { 93 counts[i] = len(ix) 94 } 95 96 col.Column = icolumn.New(counts) 97 } else { 98 col.Column, err = col.Aggregate(g.indices, agg.Fn) 99 if err != nil { 100 return QFrame{Err: qerrors.Propagate("Aggregate", err)} 101 } 102 } 103 104 newColumnsByName[newColumnName] = col 105 newColumns = append(newColumns, col) 106 } 107 108 return QFrame{columns: newColumns, columnsByName: newColumnsByName, index: index.NewAscending(uint32(len(g.indices)))} 109 } 110 111 // QFrames returns a slice of QFrame where each frame represents the content of one group. 112 // 113 // Time complexity O(n) where n = number of groups. 114 func (g Grouper) QFrames() ([]QFrame, error) { 115 if g.Err != nil { 116 return nil, g.Err 117 } 118 119 baseFrame := QFrame{columns: g.columns, columnsByName: g.columnsByName, index: index.Int{}} 120 result := make([]QFrame, len(g.indices)) 121 for i, ix := range g.indices { 122 result[i] = baseFrame.withIndex(ix) 123 } 124 return result, nil 125 }