github.com/hscells/guru@v0.0.0-20200207042420-2dabeb950d69/medline.go (about) 1 package guru 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "io" 8 "log" 9 "strings" 10 ) 11 12 const ( 13 PMID = "PMID" 14 TI = "TI" 15 BTI = "BTI" 16 CTI = "CTI" 17 AB = "AB" 18 MH = "MH" 19 PT = "PT" 20 AU = "AU" 21 DCOM = "DCOM" 22 ) 23 24 type MedlineDocument struct { 25 PMID string // PubMed ID 26 TI string // Title 27 AB string // Abstract 28 DCOM string // Date Completed 29 MH []string // MeSH Headings 30 PT []string // Publication Types 31 AU []string // Authors 32 } 33 34 type MedlineDocuments []MedlineDocument 35 36 func UnmarshalMedline(r io.Reader) MedlineDocuments { 37 38 const ( 39 Bnil int = iota 40 Bpmid 41 Bti 42 Bbti 43 Bcti 44 Bab 45 Bmh 46 Bpt 47 Bau 48 Bdcom 49 ) 50 s := bufio.NewScanner(r) 51 var ( 52 item int 53 docs MedlineDocuments 54 doc MedlineDocument 55 ) 56 c := new(strings.Builder) 57 for s.Scan() { 58 // There are a number of articles without titles. 59 // These variables will store book and collection title values. 60 var bti, cti string 61 line := s.Bytes() 62 if len(line) == 0 && len(doc.PMID) > 0 { 63 if len(doc.TI) == 0 && len(bti) > 0 { 64 doc.TI = bti 65 } else if len(doc.TI) == 0 && len(cti) > 0 { 66 doc.TI = cti 67 } 68 if len(doc.TI) == 0 { 69 log.Printf("parsing %s with no title\n", doc.PMID) 70 } 71 docs = append(docs, doc) 72 doc = MedlineDocument{} 73 } else if len(line) >= 5 && bytes.Equal(line[:6], []byte(" ")) { 74 _, err := c.Write(bytes.Replace(line, []byte(" "), []byte(""), 1)) 75 if err != nil { 76 panic(err) 77 } 78 } else { 79 switch item { 80 case Bpmid: 81 doc.PMID = c.String() 82 case Bti: 83 doc.TI = c.String() 84 case Bbti: 85 bti = c.String() 86 case Bcti: 87 cti = c.String() 88 case Bab: 89 doc.AB = c.String() 90 case Bdcom: 91 doc.DCOM = c.String() 92 case Bmh: 93 doc.MH = append(doc.MH, c.String()) 94 case Bpt: 95 doc.PT = append(doc.PT, c.String()) 96 case Bau: 97 doc.AU = append(doc.AU, c.String()) 98 case Bnil: 99 break 100 } 101 pair := bytes.Split(line, []byte("-")) 102 if len(pair) <= 1 { 103 continue 104 } 105 p0 := bytes.TrimSpace(pair[0]) 106 if bytes.Equal(p0, []byte(PMID)) { 107 item = Bpmid 108 } else if bytes.Equal(p0, []byte(TI)) { 109 item = Bti 110 } else if bytes.Equal(p0, []byte(BTI)) { 111 item = Bbti 112 } else if bytes.Equal(p0, []byte(CTI)) { 113 item = Bcti 114 } else if bytes.Equal(p0, []byte(AB)) { 115 item = Bab 116 } else if bytes.Equal(p0, []byte(MH)) { 117 item = Bmh 118 } else if bytes.Equal(p0, []byte(PT)) { 119 item = Bpt 120 } else if bytes.Equal(p0, []byte(AU)) { 121 item = Bau 122 } else if bytes.Equal(p0, []byte(DCOM)) { 123 item = Bdcom 124 } else { 125 item = Bnil 126 } 127 128 c = new(strings.Builder) 129 _, err := c.Write(bytes.TrimSpace(bytes.Join(pair[1:], []byte("-")))) 130 if err != nil { 131 panic(err) 132 } 133 } 134 } 135 docs = append(docs, doc) 136 return docs 137 } 138 139 func UnmarshalAbstract(r io.Reader) MedlineDocuments { 140 s := bufio.NewScanner(r) 141 var ( 142 docs MedlineDocuments 143 doc MedlineDocument 144 i int 145 ) 146 for s.Scan() { 147 line := s.Text() 148 if len(line) == 0 { 149 i++ 150 continue 151 } 152 153 if len(line) >= 5 && line[0:5] == "PMID:" { 154 doc.PMID = strings.TrimSpace(strings.Split(line, " ")[1]) 155 docs = append(docs, doc) 156 doc = MedlineDocument{} 157 i = -1 158 continue 159 } 160 161 switch i { 162 case 2: 163 doc.TI = doc.TI + line 164 case 5: 165 doc.AB = doc.AB + line 166 } 167 } 168 return docs 169 } 170 171 func (m MedlineDocument) String() string { 172 var b strings.Builder 173 b.WriteString(fmt.Sprintf("\n")) 174 b.WriteString(fmt.Sprintf("PMID- %s\n", m.PMID)) 175 b.WriteString(fmt.Sprintf("TI - %s\n", m.TI)) 176 b.WriteString(fmt.Sprintf("AB - %s\n", m.AB)) 177 b.WriteString(fmt.Sprintf("DCOM - %s\n", m.DCOM)) 178 for _, au := range m.AU { 179 b.WriteString(fmt.Sprintf("AU - %s\n", au)) 180 } 181 for _, pt := range m.PT { 182 b.WriteString(fmt.Sprintf("PT - %s\n", pt)) 183 } 184 for _, mh := range m.MH { 185 b.WriteString(fmt.Sprintf("MH - %s\n", mh)) 186 } 187 return b.String() 188 }