github.com/andrewrech/ih-abstract@v0.0.0-20210322142951-2fec1c8d0f38/docs.md (about) 1 ``` 2 2021/01/26 13:06:29 starting 3 4 Select raw data for Immune Health report generation. 5 6 USAGE: 7 8 < results-raw.csv | ih-abstract 9 10 DEFAULTS: 11 12 -config string 13 Path to ih-abstract.yml SQL connection configuration file 14 -no-filter 15 Save input data to .csv and exit without Immune Health filtering 16 -old string 17 Path to existing results.csv output data from last run (optional) 18 -print-config 19 Print an example configuration file and exit 20 -sql 21 Read input from Microsoft SQL database instead of Stdin 22 23 DETAILS: 24 25 ih-abstract streams input raw pathology results to the immune.health.report R package 26 for report generation and quality assurance. The input is .csv data or direct streaming 27 from a Microsoft SQL driver-compatible database. The output is filtered .csv/.txt files 28 for incremental new report generation and quality assurance. 29 30 Optionally, Immune Health filtering can be turned off to use ih-abstract as a general 31 method to retrieve arbitrary or incremental pathology results. 32 33 Quality assurance output consists of files containing 34 1) unique, and 35 2) new (never-before-seen) 36 Immune Health report results for manual review. 37 38 Dependencies are vendored and consist of the Go standard library and 39 Microsoft SQL driver. 40 41 OUTPUT: 42 43 Output for report generation: 44 45 results.csv: all results 46 results-increment.csv: new results since last run 47 new-ids.txt: patient identifiers with new results since last run 48 49 Output for quality assurance: 50 51 pdl1.csv: potential PD-L1 reports 52 msi.csv: potential MSI reports 53 cpd.csv: potential CPD reports 54 wbc.csv: white blood cell counts 55 56 pdl1-unique-strings.txt: unique PD-L1 strings 57 pdl1-unique-strings-new.txt: unique PD-L1 strings, new vs. last run 58 msi-unique-strings.txt: unique MSI strings 59 msi-unique-strings-new.txt: unique MSI strings, new vs. last run 60 61 CONFIGURATION FILE: 62 63 See 'ih-abstract --print-config'. Paths searched by default: 64 65 $XDG_CONFIG_HOME/ih-abstract/ih-abstract.yml 66 $HOME/.ih-abstract.yml 67 ./ih-abstract.yml 68 69 70 TESTING: 71 72 go test 73 NOTE: some integration tests require restricted 74 PHI-containing data. Data is available within our 75 organization upon request. 76 77 NOTE: To test the live server connection, set 78 environment variable IH_ABSTRACT_TEST_CONFIG to 79 a test configuration file path. 80 These tests is disabled by default. 81 82 BENCHMARKING: 83 84 go test -bench=. 85 NOTE: some benchmarks require restricted 86 organization VPN access. Access is available within our 87 organization upon request. To test the live server 88 connection, set environment variable 89 IH_ABSTRACT_TEST_CONFIG to a test configuration 90 file path. These tests is disabled by default. 91 92 ``` 93 <!-- Code generated by gomarkdoc. DO NOT EDIT --> 94 95 # ih\-abstract 96 97 ```go 98 import "github.com/andrewrech/ih-abstract" 99 ``` 100 101 ## Index 102 103 - [Constants](<#constants>) 104 - [func CPD(s string) bool](<#func-cpd>) 105 - [func Diff(oldFile *string, in chan []string, header []string) (out chan []string, done chan struct{})](<#func-diff>) 106 - [func DiffUnq(in chan []string, name string) (channels map[string](chan []string), done chan struct{})](<#func-diffunq>) 107 - [func Exclude(s string) bool](<#func-exclude>) 108 - [func MSI(s string) bool](<#func-msi>) 109 - [func New(r *Records, header []string, in chan []string, out chan []string, done chan struct{})](<#func-new>) 110 - [func PDL1(s string) bool](<#func-pdl1>) 111 - [func RecordID(header []string) (id string, err error)](<#func-recordid>) 112 - [func WbcLymph(s string) bool](<#func-wbclymph>) 113 - [func Whitespace(s []string) []string](<#func-whitespace>) 114 - [func Write(h []string, in map[string](chan []string)) (done chan struct{})](<#func-write>) 115 - [func WriteRows(in chan []string, name string, h []string, done chan struct{})](<#func-writerows>) 116 - [func connect(config string) (db *sql.DB, err error)](<#func-connect>) 117 - [func count(counter *int64, descr string, signal chan struct{})](<#func-count>) 118 - [func filterResults(in chan []string, header []string) (results map[string](chan []string), done chan struct{})](<#func-filterresults>) 119 - [func filterRow(l []string, colNames map[string]int, pat map[string](*regexp.Regexp), channels map[string](chan []string), counter *int64)](<#func-filterrow>) 120 - [func headerParse(h []string) (colNames map[string]int)](<#func-headerparse>) 121 - [func locateDefaultConfig() (config string, err error)](<#func-locatedefaultconfig>) 122 - [func main()](<#func-main>) 123 - [func mainInner(f flags, in *os.File)](<#func-maininner>) 124 - [func printConf()](<#func-printconf>) 125 - [func splitCh(in chan []string) (out1 chan []string, out2 chan []string, done chan struct{})](<#func-splitch>) 126 - [func usage()](<#func-usage>) 127 - [type Records](<#type-records>) 128 - [func Existing(name *string) (rs *Records)](<#func-existing>) 129 - [func prevUnq(f string) (r *Records)](<#func-prevunq>) 130 - [func (r *Records) Add(l *[]string) (err error)](<#func-records-add>) 131 - [func (r *Records) Check(l *[]string) (exists bool, err error)](<#func-records-check>) 132 - [type Store](<#type-store>) 133 - [type Writer](<#type-writer>) 134 - [func File(name string, h []string) (w Writer)](<#func-file>) 135 - [type confVars](<#type-confvars>) 136 - [func loadConfig(config string) (vars confVars, err error)](<#func-loadconfig>) 137 - [type flags](<#type-flags>) 138 - [func flagParse() (f flags)](<#func-flagparse>) 139 - [type rawRecords](<#type-rawrecords>) 140 - [func DB(config string, db *sql.DB) (r rawRecords)](<#func-db>) 141 - [func read(f flags, in *os.File) (r rawRecords)](<#func-read>) 142 - [func readCSV(in io.Reader) (r rawRecords)](<#func-readcsv>) 143 - [func readSQLRows(rows *sql.Rows) (r rawRecords)](<#func-readsqlrows>) 144 145 146 ## Constants 147 148 MsiReport is the string form of the regular expression used to match microsatellite instability reports of interest\. 149 150 ```go 151 const MsiReport = "[Mm]icrosatellite[ ]+[Ii]nstability" 152 ``` 153 154 MsiResult is the string form of the regular expression used to extract microsatellite instability results\. 155 156 ```go 157 const MsiResult = "[^\\.:]+findings[^\\.]+[Mm]icrosat[^\\.]+." 158 ``` 159 160 Pdl1Report is the string form of the regular expression used to match PD\-L1 reports of interest\. 161 162 ```go 163 const Pdl1Report = "(?i)pd-?l1" 164 ``` 165 166 Pdl1Result is the string form of the regular expression used to extract PD\-L1 tumor/cancer score results\. 167 168 ```go 169 const Pdl1Result = "(?i)(tumor proportion score|combined positive score \\(cps\\)|cps score):? ?[><~]* ?[0-9\\-\\.]+ ?%?" 170 ``` 171 172 SpacesAndBreaks is the string form of the replace\-all regular expression used to normalize whitespace in pathology report strings of interest\. 173 174 ```go 175 const SpacesAndBreaks = `\s+` 176 ``` 177 178 ## func [CPD](<https://github.com/andrewrech/ih-abstract/blob/main/filter.go#L38>) 179 180 ```go 181 func CPD(s string) bool 182 ``` 183 184 CPD efficiently selects reports that are CPD reports using a lookup table\. 185 186 ## func [Diff](<https://github.com/andrewrech/ih-abstract/blob/main/records.go#L193>) 187 188 ```go 189 func Diff(oldFile *string, in chan []string, header []string) (out chan []string, done chan struct{}) 190 ``` 191 192 Diff diffs old and new record sets\. 193 194 ## func [DiffUnq](<https://github.com/andrewrech/ih-abstract/blob/main/unique.go#L27>) 195 196 ```go 197 func DiffUnq(in chan []string, name string) (channels map[string](chan []string), done chan struct{}) 198 ``` 199 200 DiffUnq identifies unique strings from an input stream and compares the unique strings to an existing output file\. The function returns 1\) unique strings and 2\) new strings compared to the existing output file\. 201 202 ## func [Exclude](<https://github.com/andrewrech/ih-abstract/blob/main/filter.go#L82>) 203 204 ```go 205 func Exclude(s string) bool 206 ``` 207 208 Exclude efficiently excludes unwanted report categories using a lookup table\. 209 210 ## func [MSI](<https://github.com/andrewrech/ih-abstract/blob/main/filter.go#L66>) 211 212 ```go 213 func MSI(s string) bool 214 ``` 215 216 MSI uses a lookup table to efficiently test if a string should be evaluated via regular expression as a potential PD\-L1 report\. 217 218 ## func [New](<https://github.com/andrewrech/ih-abstract/blob/main/records.go#L121>) 219 220 ```go 221 func New(r *Records, header []string, in chan []string, out chan []string, done chan struct{}) 222 ``` 223 224 New identifies new Pathology database records based on a record hash\. For each new record\, the corresponding patient identifier to saved to a file\. 225 226 ## func [PDL1](<https://github.com/andrewrech/ih-abstract/blob/main/filter.go#L48>) 227 228 ```go 229 func PDL1(s string) bool 230 ``` 231 232 PDL1 uses a lookup table to efficiently test if a string should be evaluated via regular expression as a potential PD\-L1 report\. 233 234 ## func [RecordID](<https://github.com/andrewrech/ih-abstract/blob/main/records.go#L176>) 235 236 ```go 237 func RecordID(header []string) (id string, err error) 238 ``` 239 240 RecordID returns a single input data column name containing a person\-instance identifier\. The person instance identifier is either an MRN \(preferred\) or UID\. 241 242 ## func [WbcLymph](<https://github.com/andrewrech/ih-abstract/blob/main/filter.go#L27>) 243 244 ```go 245 func WbcLymph(s string) bool 246 ``` 247 248 WbcLymph efficiently selects records that are WbcLymph or lymphocyte counts using a lookup table\. 249 250 ## func [Whitespace](<https://github.com/andrewrech/ih-abstract/blob/main/filter.go#L115>) 251 252 ```go 253 func Whitespace(s []string) []string 254 ``` 255 256 Whitespace normalizes whitespace in report strings of interest\. 257 258 ## func [Write](<https://github.com/andrewrech/ih-abstract/blob/main/write.go#L77>) 259 260 ```go 261 func Write(h []string, in map[string](chan []string)) (done chan struct{}) 262 ``` 263 264 Write writes results to output CSV files using a common header\. 265 266 ## func [WriteRows](<https://github.com/andrewrech/ih-abstract/blob/main/write.go#L58>) 267 268 ```go 269 func WriteRows(in chan []string, name string, h []string, done chan struct{}) 270 ``` 271 272 WriteRows appends strings to a CSV file using a Writer\. 273 274 ## func [connect](<https://github.com/andrewrech/ih-abstract/blob/main/connect.go#L11>) 275 276 ```go 277 func connect(config string) (db *sql.DB, err error) 278 ``` 279 280 connect connects to an SQL database\. 281 282 ## func [count](<https://github.com/andrewrech/ih-abstract/blob/main/utils.go#L11>) 283 284 ```go 285 func count(counter *int64, descr string, signal chan struct{}) 286 ``` 287 288 count counts processed lines per unit time\. 289 290 ## func [filterResults](<https://github.com/andrewrech/ih-abstract/blob/main/filter.go#L169>) 291 292 ```go 293 func filterResults(in chan []string, header []string) (results map[string](chan []string), done chan struct{}) 294 ``` 295 296 filterResults filters a raw data input stream row by row\. 297 298 ## func [filterRow](<https://github.com/andrewrech/ih-abstract/blob/main/filter.go#L126>) 299 300 ```go 301 func filterRow(l []string, colNames map[string]int, pat map[string](*regexp.Regexp), channels map[string](chan []string), counter *int64) 302 ``` 303 304 filterRow filters a row of input data for matches to patterns of interest\. 305 306 ## func [headerParse](<https://github.com/andrewrech/ih-abstract/blob/main/read.go#L151>) 307 308 ```go 309 func headerParse(h []string) (colNames map[string]int) 310 ``` 311 312 headerParse parses input data column names\. 313 314 ## func [locateDefaultConfig](<https://github.com/andrewrech/ih-abstract/blob/main/config.go#L37>) 315 316 ```go 317 func locateDefaultConfig() (config string, err error) 318 ``` 319 320 locateDefaultConfig locates the configuration file in $XDG\_CONFIG\_HOME\, $HOME\, or the current directory\. 321 322 ## func [main](<https://github.com/andrewrech/ih-abstract/blob/main/ih-abstract.go#L11>) 323 324 ```go 325 func main() 326 ``` 327 328 ih\-abstract streams input raw pathology results to the immune\.health\.report R package for report generation and quality assurance\. The input is \.csv data or direct streaming from a Microsoft SQL driver\-compatible database\. The output is filtered \.csv files for incremental new report generation and quality assurance\. Optionally\, Immune Health filtering can be turned off to use ih\-abstract as a general method to retrieve arbitrary or incremental pathology results\. 329 330 ## func [mainInner](<https://github.com/andrewrech/ih-abstract/blob/main/ih-abstract.go#L27>) 331 332 ```go 333 func mainInner(f flags, in *os.File) 334 ``` 335 336 mainInner facilitates testing by allowing parameters to be passed to the main program code path\. 337 338 ## func [printConf](<https://github.com/andrewrech/ih-abstract/blob/main/config.go#L14>) 339 340 ```go 341 func printConf() 342 ``` 343 344 printConf prints an example SQL database configuration file 345 346 ## func [splitCh](<https://github.com/andrewrech/ih-abstract/blob/main/utils.go#L37>) 347 348 ```go 349 func splitCh(in chan []string) (out1 chan []string, out2 chan []string, done chan struct{}) 350 ``` 351 352 splitCh splits a \[\]string channel into two channels\, sending results from the input channel onto both output channels 353 354 ## func [usage](<https://github.com/andrewrech/ih-abstract/blob/main/cli.go#L38>) 355 356 ```go 357 func usage() 358 ``` 359 360 usage prints usage\. 361 362 ## type [Records](<https://github.com/andrewrech/ih-abstract/blob/main/records.go#L21-L24>) 363 364 Records provides thread safe access to Store\. 365 366 ```go 367 type Records struct { 368 Store 369 sync.Mutex 370 } 371 ``` 372 373 ### func [Existing](<https://github.com/andrewrech/ih-abstract/blob/main/records.go#L69>) 374 375 ```go 376 func Existing(name *string) (rs *Records) 377 ``` 378 379 Existing creates a map of existing records\. 380 381 ### func [prevUnq](<https://github.com/andrewrech/ih-abstract/blob/main/unique.go#L10>) 382 383 ```go 384 func prevUnq(f string) (r *Records) 385 ``` 386 387 prevUnq adds previously identified unique strings from an existing output file to a hash map\. 388 389 ### func \(\*Records\) [Add](<https://github.com/andrewrech/ih-abstract/blob/main/records.go#L27>) 390 391 ```go 392 func (r *Records) Add(l *[]string) (err error) 393 ``` 394 395 Add adds a record\. 396 397 ### func \(\*Records\) [Check](<https://github.com/andrewrech/ih-abstract/blob/main/records.go#L49>) 398 399 ```go 400 func (r *Records) Check(l *[]string) (exists bool, err error) 401 ``` 402 403 Check checks that a record exists\. 404 405 ## type [Store](<https://github.com/andrewrech/ih-abstract/blob/main/records.go#L18>) 406 407 Store is a blake2b hash map that stores string slices\. 408 409 ```go 410 type Store map[[blake2b.Size256]byte](struct{}) 411 ``` 412 413 ## type [Writer](<https://github.com/andrewrech/ih-abstract/blob/main/write.go#L12-L18>) 414 415 Writer contains a file name\, connection\, CSV Writer\, and a 'done' signal to cleanup the connection\. 416 417 ```go 418 type Writer struct { 419 name string 420 conn *os.File 421 w *csv.Writer 422 counter int64 423 done func() 424 } 425 ``` 426 427 ### func [File](<https://github.com/andrewrech/ih-abstract/blob/main/write.go#L21>) 428 429 ```go 430 func File(name string, h []string) (w Writer) 431 ``` 432 433 File creates an output CSV write file\. 434 435 ## type [confVars](<https://github.com/andrewrech/ih-abstract/blob/main/config.go#L27-L34>) 436 437 confVars is a struct of configuration variables required for the SQL database connection\. 438 439 ```go 440 type confVars struct { 441 Username string `yaml:"username"` 442 Password string `yaml:"password"` 443 Host string `yaml:"host"` 444 Port string `yaml:"port"` 445 Database string `yaml:"database"` 446 Query string `yaml:"query"` 447 } 448 ``` 449 450 ### func [loadConfig](<https://github.com/andrewrech/ih-abstract/blob/main/config.go#L80>) 451 452 ```go 453 func loadConfig(config string) (vars confVars, err error) 454 ``` 455 456 ## type [flags](<https://github.com/andrewrech/ih-abstract/blob/main/cli.go#L10-L16>) 457 458 flagVars contains variables set by command line flags\. 459 460 ```go 461 type flags struct { 462 config *string 463 example *bool 464 noFilter *bool 465 old *string 466 sql *bool 467 } 468 ``` 469 470 ### func [flagParse](<https://github.com/andrewrech/ih-abstract/blob/main/cli.go#L19>) 471 472 ```go 473 func flagParse() (f flags) 474 ``` 475 476 flags parses command line flags\. 477 478 ## type [rawRecords](<https://github.com/andrewrech/ih-abstract/blob/main/connect.go#L44-L48>) 479 480 rawRecords contains a header\, a channel of raw records\, and a channel indicating when raw records have been read\. 481 482 ```go 483 type rawRecords struct { 484 header []string 485 out chan []string 486 done chan struct{} 487 } 488 ``` 489 490 ### func [DB](<https://github.com/andrewrech/ih-abstract/blob/main/connect.go#L51>) 491 492 ```go 493 func DB(config string, db *sql.DB) (r rawRecords) 494 ``` 495 496 DB reads records from an Sql database\. 497 498 ### func [read](<https://github.com/andrewrech/ih-abstract/blob/main/read.go#L15>) 499 500 ```go 501 func read(f flags, in *os.File) (r rawRecords) 502 ``` 503 504 read reads raw input data\. 505 506 ### func [readCSV](<https://github.com/andrewrech/ih-abstract/blob/main/read.go#L103>) 507 508 ```go 509 func readCSV(in io.Reader) (r rawRecords) 510 ``` 511 512 readCSV reads records from a CSV file\. 513 514 ### func [readSQLRows](<https://github.com/andrewrech/ih-abstract/blob/main/read.go#L40>) 515 516 ```go 517 func readSQLRows(rows *sql.Rows) (r rawRecords) 518 ``` 519 520 readSQLRows reads rows of strings from an SQL database\. 521 522 523 524 Generated by [gomarkdoc](<https://github.com/princjef/gomarkdoc>)