github.com/lmorg/murex@v0.0.0-20240217211045-e081c89cd4ef/lang/define_index_tables.go (about)

     1  package lang
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"regexp"
     7  	"strconv"
     8  	"strings"
     9  
    10  	"github.com/lmorg/murex/utils"
    11  )
    12  
    13  const (
    14  	byRowNumber = iota + 1
    15  	byColumnNumber
    16  	byColumnName
    17  
    18  	maxReportedUnmatched = 5
    19  )
    20  
    21  var (
    22  	rxColumnPrefixOld = regexp.MustCompile(`^:[0-9]+$`)
    23  	rxRowSuffixOld    = regexp.MustCompile(`^[0-9]+:$`)
    24  	rxColumnPrefixNew = regexp.MustCompile(`^\*[a-zA-Z]$`)
    25  	rxRowSuffixNew    = regexp.MustCompile(`^\*[0-9]+$`)
    26  	errMixAndMatch    = errors.New("you cannot mix and match matching modes")
    27  )
    28  
    29  // IndexTemplateTable is a handy standard indexer you can use in your custom data types for tabulated / streamed data.
    30  // The point of this is to minimize code rewriting and standardising the behavior of the indexer.
    31  func IndexTemplateTable(p *Process, params []string, cRecords chan []string, marshaller func([]string) []byte) error {
    32  	if p.IsNot {
    33  		return ittNot(p, params, cRecords, marshaller)
    34  	}
    35  	return ittIndex(p, params, cRecords, marshaller)
    36  }
    37  
    38  func charToIndex(b byte) int {
    39  	if b > 96 {
    40  		return int(b - 97)
    41  	}
    42  	return int(b - 65)
    43  }
    44  
    45  func ittIndex(p *Process, params []string, cRecords chan []string, marshaller func([]string) []byte) (err error) {
    46  	var (
    47  		mode           int
    48  		matchStr       []string
    49  		matchInt       []int
    50  		unmatched      []string
    51  		unmatchedCount int
    52  	)
    53  
    54  	defer func() {
    55  		if len(unmatched) != 0 {
    56  			p.ExitNum = 1
    57  			if unmatchedCount > maxReportedUnmatched {
    58  				unmatched = append(unmatched, fmt.Sprintf("...plus %d more", unmatchedCount-maxReportedUnmatched))
    59  			}
    60  			err = fmt.Errorf("some records did not contain all of the requested fields:%s%s",
    61  				utils.NewLineString,
    62  				strings.Join(unmatched, utils.NewLineString))
    63  		}
    64  	}()
    65  
    66  	errUnmatched := func(recs []string) {
    67  		unmatchedCount++
    68  		if unmatchedCount > maxReportedUnmatched {
    69  			return
    70  		}
    71  		unmatched = append(unmatched, strings.Join(recs, "\t"))
    72  	}
    73  
    74  	for i := range params {
    75  		switch {
    76  		case rxRowSuffixOld.MatchString(params[i]):
    77  			if mode != 0 && mode != byRowNumber {
    78  				return errMixAndMatch
    79  			}
    80  			mode = byRowNumber
    81  			num, _ := strconv.Atoi(params[i][:len(params[i])-1])
    82  			matchInt = append(matchInt, num)
    83  
    84  		case rxRowSuffixNew.MatchString(params[i]):
    85  			if mode != 0 && mode != byRowNumber {
    86  				return errMixAndMatch
    87  			}
    88  			mode = byRowNumber
    89  			num, _ := strconv.Atoi(params[i][1:])
    90  			matchInt = append(matchInt, num-1) // Don't count from zero
    91  
    92  		case rxColumnPrefixOld.MatchString(params[i]):
    93  			if mode != 0 && mode != byColumnNumber {
    94  				return errMixAndMatch
    95  			}
    96  			mode = byColumnNumber
    97  			num, _ := strconv.Atoi(params[i][1:])
    98  			matchInt = append(matchInt, num)
    99  
   100  		case rxColumnPrefixNew.MatchString(params[i]):
   101  			if mode != 0 && mode != byColumnNumber {
   102  				return errMixAndMatch
   103  			}
   104  			mode = byColumnNumber
   105  			num := charToIndex(params[i][1])
   106  			matchInt = append(matchInt, num)
   107  
   108  		default:
   109  			if mode != 0 && mode != byColumnName {
   110  				return errMixAndMatch
   111  			}
   112  			matchStr = append(matchStr, params[i])
   113  			mode = byColumnName
   114  
   115  		}
   116  	}
   117  
   118  	switch mode {
   119  	case byRowNumber:
   120  		var (
   121  			ordered = true
   122  			last    int
   123  			max     int
   124  		)
   125  		// check order
   126  		for _, i := range matchInt {
   127  			if i < last {
   128  				ordered = false
   129  			}
   130  			if i > max {
   131  				max = i
   132  			}
   133  			last = i
   134  		}
   135  
   136  		if ordered {
   137  			// ordered matching - for this we can just read in the records we want sequentially. Low memory overhead
   138  			var i int
   139  			for {
   140  				recs, ok := <-cRecords
   141  				if !ok {
   142  					return nil
   143  				}
   144  				if i == matchInt[0] {
   145  					_, err = p.Stdout.Writeln(marshaller(recs))
   146  					if err != nil {
   147  						p.Stderr.Writeln([]byte(err.Error()))
   148  					}
   149  					if len(matchInt) == 1 {
   150  						matchInt[0] = -1
   151  						return nil
   152  					}
   153  					matchInt = matchInt[1:]
   154  				}
   155  				i++
   156  			}
   157  
   158  		} else {
   159  			// unordered matching - for this we load the entire data set into memory - up until the maximum value
   160  			var (
   161  				i     int
   162  				lines = make([][]string, max+1)
   163  			)
   164  			for {
   165  				recs, ok := <-cRecords
   166  				if !ok {
   167  					break
   168  				}
   169  				if i <= max {
   170  					lines[i] = recs
   171  				}
   172  				i++
   173  			}
   174  
   175  			for _, j := range matchInt {
   176  				_, err = p.Stdout.Writeln(marshaller(lines[j]))
   177  				if err != nil {
   178  					p.Stderr.Writeln([]byte(err.Error()))
   179  				}
   180  			}
   181  
   182  			return nil
   183  		}
   184  
   185  	case byColumnNumber:
   186  		for {
   187  			recs, ok := <-cRecords
   188  			if !ok {
   189  				return nil
   190  			}
   191  
   192  			var line []string
   193  			for _, i := range matchInt {
   194  				if i < len(recs) {
   195  					line = append(line, recs[i])
   196  				} else {
   197  					if len(recs) == 0 || (len(recs) == 1 && recs[0] == "") {
   198  						continue
   199  					}
   200  					errUnmatched(recs)
   201  				}
   202  			}
   203  			if len(line) != 0 {
   204  				_, err = p.Stdout.Writeln(marshaller(line))
   205  				if err != nil {
   206  					p.Stderr.Writeln([]byte(err.Error()))
   207  				}
   208  			}
   209  		}
   210  
   211  	case byColumnName:
   212  		var (
   213  			lineNum  int
   214  			headings = make(map[string]int)
   215  		)
   216  
   217  		for {
   218  			var line []string
   219  			recs, ok := <-cRecords
   220  			if !ok {
   221  				return nil
   222  			}
   223  
   224  			if lineNum == 0 {
   225  				for i := range recs {
   226  					headings[recs[i]] = i + 1
   227  				}
   228  				for i := range matchStr {
   229  					if headings[matchStr[i]] != 0 {
   230  						line = append(line, matchStr[i])
   231  					}
   232  				}
   233  				if len(line) != 0 {
   234  					_, err = p.Stdout.Writeln(marshaller(line))
   235  					if err != nil {
   236  						p.Stderr.Writeln([]byte(err.Error()))
   237  					}
   238  				}
   239  
   240  			} else {
   241  				for i := range matchStr {
   242  					col := headings[matchStr[i]]
   243  					if col != 0 && col < len(recs)+1 {
   244  						line = append(line, recs[col-1])
   245  					} else {
   246  						if len(recs) == 0 || (len(recs) == 1 && recs[0] == "") {
   247  							continue
   248  						}
   249  						errUnmatched(recs)
   250  					}
   251  				}
   252  				if len(line) != 0 {
   253  					_, err = p.Stdout.Writeln(marshaller(line))
   254  					if err != nil {
   255  						p.Stderr.Writeln([]byte(err.Error()))
   256  					}
   257  				}
   258  			}
   259  			lineNum++
   260  		}
   261  
   262  	default:
   263  		return errors.New("you haven't selected any rows / columns")
   264  	}
   265  }
   266  
   267  func ittNot(p *Process, params []string, cRecords chan []string, marshaller func([]string) []byte) error {
   268  	var (
   269  		mode     int
   270  		matchStr = make(map[string]bool)
   271  		matchInt = make(map[int]bool)
   272  	)
   273  
   274  	for i := range params {
   275  		switch {
   276  		case rxRowSuffixOld.MatchString(params[i]):
   277  			if mode != 0 && mode != byRowNumber {
   278  				return errMixAndMatch
   279  			}
   280  			mode = byRowNumber
   281  			num, _ := strconv.Atoi(params[i][:len(params[i])-1])
   282  			matchInt[num] = true
   283  
   284  		case rxRowSuffixNew.MatchString(params[i]):
   285  			if mode != 0 && mode != byRowNumber {
   286  				return errMixAndMatch
   287  			}
   288  			mode = byRowNumber
   289  			num, _ := strconv.Atoi(params[i][1:])
   290  			matchInt[num+1] = true // Don't count from zero
   291  
   292  		case rxColumnPrefixOld.MatchString(params[i]):
   293  			if mode != 0 && mode != byColumnNumber {
   294  				return errMixAndMatch
   295  			}
   296  			mode = byColumnNumber
   297  			num, _ := strconv.Atoi(params[i][1:])
   298  			matchInt[num] = true
   299  
   300  		case rxColumnPrefixNew.MatchString(params[i]):
   301  			if mode != 0 && mode != byColumnNumber {
   302  				return errMixAndMatch
   303  			}
   304  			mode = byColumnNumber
   305  			num := charToIndex(params[i][1])
   306  			matchInt[num] = true
   307  
   308  		default:
   309  			if mode != 0 && mode != byColumnName {
   310  				return errMixAndMatch
   311  			}
   312  			matchStr[params[i]] = true
   313  			mode = byColumnName
   314  
   315  		}
   316  	}
   317  
   318  	switch mode {
   319  	case byRowNumber:
   320  		i := -1
   321  		for {
   322  			recs, ok := <-cRecords
   323  			if !ok {
   324  				return nil
   325  			}
   326  
   327  			if !matchInt[i] {
   328  				_, err := p.Stdout.Writeln(marshaller(recs))
   329  				if err != nil {
   330  					p.Stderr.Writeln([]byte(err.Error()))
   331  				}
   332  			}
   333  			i++
   334  		}
   335  
   336  	case byColumnNumber:
   337  		for {
   338  			recs, ok := <-cRecords
   339  			if !ok {
   340  				return nil
   341  			}
   342  
   343  			var line []string
   344  			for i := range recs {
   345  				if !matchInt[i] {
   346  					line = append(line, recs[i])
   347  				}
   348  			}
   349  			if len(line) != 0 {
   350  				p.Stdout.Writeln(marshaller(line))
   351  			}
   352  		}
   353  
   354  	case byColumnName:
   355  		var (
   356  			lineNum  int
   357  			headings = make(map[int]string)
   358  		)
   359  
   360  		for {
   361  			var line []string
   362  			recs, ok := <-cRecords
   363  			if !ok {
   364  				return nil
   365  			}
   366  
   367  			if lineNum == 0 {
   368  				for i := range recs {
   369  					headings[i] = recs[i]
   370  					if !matchStr[headings[i]] {
   371  						line = append(line, recs[i])
   372  					}
   373  				}
   374  				if len(line) != 0 {
   375  					p.Stdout.Writeln(marshaller(line))
   376  				}
   377  
   378  			} else {
   379  				for i := range recs {
   380  					if !matchStr[headings[i]] {
   381  						line = append(line, recs[i])
   382  					}
   383  				}
   384  
   385  				if len(line) != 0 {
   386  					p.Stdout.Writeln(marshaller(line))
   387  				}
   388  			}
   389  			lineNum++
   390  		}
   391  
   392  	default:
   393  		return errors.New("you haven't selected any rows / columns")
   394  	}
   395  }