github.com/hscells/guru@v0.0.0-20200207042420-2dabeb950d69/medline.go (about)

     1  package guru
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"log"
     9  	"strings"
    10  )
    11  
    12  const (
    13  	PMID = "PMID"
    14  	TI   = "TI"
    15  	BTI  = "BTI"
    16  	CTI  = "CTI"
    17  	AB   = "AB"
    18  	MH   = "MH"
    19  	PT   = "PT"
    20  	AU   = "AU"
    21  	DCOM = "DCOM"
    22  )
    23  
    24  type MedlineDocument struct {
    25  	PMID string   // PubMed ID
    26  	TI   string   // Title
    27  	AB   string   // Abstract
    28  	DCOM string   // Date Completed
    29  	MH   []string // MeSH Headings
    30  	PT   []string // Publication Types
    31  	AU   []string // Authors
    32  }
    33  
    34  type MedlineDocuments []MedlineDocument
    35  
    36  func UnmarshalMedline(r io.Reader) MedlineDocuments {
    37  
    38  	const (
    39  		Bnil int = iota
    40  		Bpmid
    41  		Bti
    42  		Bbti
    43  		Bcti
    44  		Bab
    45  		Bmh
    46  		Bpt
    47  		Bau
    48  		Bdcom
    49  	)
    50  	s := bufio.NewScanner(r)
    51  	var (
    52  		item int
    53  		docs MedlineDocuments
    54  		doc  MedlineDocument
    55  	)
    56  	c := new(strings.Builder)
    57  	for s.Scan() {
    58  		// There are a number of articles without titles.
    59  		// These variables will store book and collection title values.
    60  		var bti, cti string
    61  		line := s.Bytes()
    62  		if len(line) == 0 && len(doc.PMID) > 0 {
    63  			if len(doc.TI) == 0 && len(bti) > 0 {
    64  				doc.TI = bti
    65  			} else if len(doc.TI) == 0 && len(cti) > 0 {
    66  				doc.TI = cti
    67  			}
    68  			if len(doc.TI) == 0 {
    69  				log.Printf("parsing %s with no title\n", doc.PMID)
    70  			}
    71  			docs = append(docs, doc)
    72  			doc = MedlineDocument{}
    73  		} else if len(line) >= 5 && bytes.Equal(line[:6], []byte("      ")) {
    74  			_, err := c.Write(bytes.Replace(line, []byte("     "), []byte(""), 1))
    75  			if err != nil {
    76  				panic(err)
    77  			}
    78  		} else {
    79  			switch item {
    80  			case Bpmid:
    81  				doc.PMID = c.String()
    82  			case Bti:
    83  				doc.TI = c.String()
    84  			case Bbti:
    85  				bti = c.String()
    86  			case Bcti:
    87  				cti = c.String()
    88  			case Bab:
    89  				doc.AB = c.String()
    90  			case Bdcom:
    91  				doc.DCOM = c.String()
    92  			case Bmh:
    93  				doc.MH = append(doc.MH, c.String())
    94  			case Bpt:
    95  				doc.PT = append(doc.PT, c.String())
    96  			case Bau:
    97  				doc.AU = append(doc.AU, c.String())
    98  			case Bnil:
    99  				break
   100  			}
   101  			pair := bytes.Split(line, []byte("-"))
   102  			if len(pair) <= 1 {
   103  				continue
   104  			}
   105  			p0 := bytes.TrimSpace(pair[0])
   106  			if bytes.Equal(p0, []byte(PMID)) {
   107  				item = Bpmid
   108  			} else if bytes.Equal(p0, []byte(TI)) {
   109  				item = Bti
   110  			} else if bytes.Equal(p0, []byte(BTI)) {
   111  				item = Bbti
   112  			} else if bytes.Equal(p0, []byte(CTI)) {
   113  				item = Bcti
   114  			} else if bytes.Equal(p0, []byte(AB)) {
   115  				item = Bab
   116  			} else if bytes.Equal(p0, []byte(MH)) {
   117  				item = Bmh
   118  			} else if bytes.Equal(p0, []byte(PT)) {
   119  				item = Bpt
   120  			} else if bytes.Equal(p0, []byte(AU)) {
   121  				item = Bau
   122  			} else if bytes.Equal(p0, []byte(DCOM)) {
   123  				item = Bdcom
   124  			} else {
   125  				item = Bnil
   126  			}
   127  
   128  			c = new(strings.Builder)
   129  			_, err := c.Write(bytes.TrimSpace(bytes.Join(pair[1:], []byte("-"))))
   130  			if err != nil {
   131  				panic(err)
   132  			}
   133  		}
   134  	}
   135  	docs = append(docs, doc)
   136  	return docs
   137  }
   138  
   139  func UnmarshalAbstract(r io.Reader) MedlineDocuments {
   140  	s := bufio.NewScanner(r)
   141  	var (
   142  		docs MedlineDocuments
   143  		doc  MedlineDocument
   144  		i    int
   145  	)
   146  	for s.Scan() {
   147  		line := s.Text()
   148  		if len(line) == 0 {
   149  			i++
   150  			continue
   151  		}
   152  
   153  		if len(line) >= 5 && line[0:5] == "PMID:" {
   154  			doc.PMID = strings.TrimSpace(strings.Split(line, " ")[1])
   155  			docs = append(docs, doc)
   156  			doc = MedlineDocument{}
   157  			i = -1
   158  			continue
   159  		}
   160  
   161  		switch i {
   162  		case 2:
   163  			doc.TI = doc.TI + line
   164  		case 5:
   165  			doc.AB = doc.AB + line
   166  		}
   167  	}
   168  	return docs
   169  }
   170  
   171  func (m MedlineDocument) String() string {
   172  	var b strings.Builder
   173  	b.WriteString(fmt.Sprintf("\n"))
   174  	b.WriteString(fmt.Sprintf("PMID- %s\n", m.PMID))
   175  	b.WriteString(fmt.Sprintf("TI  - %s\n", m.TI))
   176  	b.WriteString(fmt.Sprintf("AB  - %s\n", m.AB))
   177  	b.WriteString(fmt.Sprintf("DCOM  - %s\n", m.DCOM))
   178  	for _, au := range m.AU {
   179  		b.WriteString(fmt.Sprintf("AU  - %s\n", au))
   180  	}
   181  	for _, pt := range m.PT {
   182  		b.WriteString(fmt.Sprintf("PT  - %s\n", pt))
   183  	}
   184  	for _, mh := range m.MH {
   185  		b.WriteString(fmt.Sprintf("MH  - %s\n", mh))
   186  	}
   187  	return b.String()
   188  }