github.com/omniscale/go-osm@v0.3.1/parser/pbf/parser.go (about) 1 package pbf 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "runtime" 9 "sync" 10 11 "github.com/omniscale/go-osm" 12 ) 13 14 type Config struct { 15 // IncludeMetadata indicates whether metadata like timestamps, versions and 16 // user names should be parsed. 17 IncludeMetadata bool 18 19 // Nodes specifies the destination for parsed nodes. See also Coords below. 20 // For efficiency, multiple nodes are passed in batches. 21 Nodes chan []osm.Node 22 // Ways specifies the destination for parsed ways. 23 // For efficiency, multiple wats are passed in batches. 24 Ways chan []osm.Way 25 // Relations specifies the destination for parsed relations. 26 // For efficiency, multiple relations are passed in batches. 27 Relations chan []osm.Relation 28 29 // Coords specifies the destination for parsed nodes without any tags. This 30 // can be used for more efficient storage/proceessing of nodes that are 31 // only used as coordinates for ways and relations. 32 // For efficiency, multiple nodes are passed in batches. 33 // 34 // If a Coords channel is specified, then nodes without tags are 35 // not sent to the Nodes channel. However, the Coords channel will receive 36 // all nodes. 37 Coords chan []osm.Node 38 39 // KeepOpen specifies whether the destination channels should be keept open 40 // after Parse(). By default, Nodes, Ways, Relations and Coords channels 41 // are closed after Parse(). 42 KeepOpen bool 43 44 // OnFirstWay defines an optional func that gets called when the the first 45 // way is parsed. The callback should block until it is safe to fill the 46 // Ways channel. 47 // 48 // This can be used when you require that all nodes are processed before 49 // you start processing ways. 50 // 51 // This only works when the PBF file is ordered by type (nodes before ways 52 // before relations). 53 OnFirstWay func() 54 55 // OnFirstRelation defines an optional func that gets called when the 56 // the first relation is parsed. The callback should block until it is 57 // safe to fill the Relations channel. 58 // 59 // This can be used when you require that all ways are processed before you 60 // start processing relations. 61 // 62 // This only works when the PBF file is ordered by type (nodes before ways 63 // before relations). 64 OnFirstRelation func() 65 66 // Concurrency specifies how many concurrent parsers are started. Defaults 67 // to runtime.NumCPU if <= 0. 68 Concurrency int 69 } 70 71 type Parser struct { 72 conf Config 73 r io.Reader 74 header *Header 75 wg sync.WaitGroup 76 waySync *barrier 77 relSync *barrier 78 err error 79 } 80 81 // New creates a new PBF parser for the provided input. Config specifies the destinations for the parsed elements. 82 func New(r io.Reader, conf Config) *Parser { 83 p := &Parser{ 84 r: r, 85 conf: conf, 86 } 87 88 if conf.Concurrency <= 0 { 89 p.conf.Concurrency = runtime.NumCPU() 90 } 91 92 if conf.OnFirstWay != nil { 93 p.waySync = newBarrier(conf.OnFirstWay) 94 p.waySync.add(p.conf.Concurrency) 95 } 96 if conf.OnFirstRelation != nil { 97 p.relSync = newBarrier(conf.OnFirstRelation) 98 p.relSync.add(p.conf.Concurrency) 99 } 100 return p 101 } 102 103 // Header returns the header information from the PBF. Can be called before or 104 // after Parse(). 105 func (p *Parser) Header() (*Header, error) { 106 if p.err != nil { 107 return nil, p.err 108 } 109 if p.header == nil { 110 if p.err = p.parseHeader(); p.err != nil { 111 return nil, p.err 112 } 113 } 114 return p.header, nil 115 } 116 117 // Error returns the first error that occurred during Header/Parse calls. 118 func (p *Parser) Error() error { 119 return p.err 120 } 121 122 // Parse parses the PBF file and sends the parsed nodes, ways and relations 123 // into the channels provided to the Parsers Config. 124 // Context can be used to cancel the parsing. 125 func (p *Parser) Parse(ctx context.Context) (err error) { 126 if p.err != nil { 127 return err 128 } 129 130 defer func() { 131 if err != nil { 132 p.err = err 133 } 134 }() 135 if p.header == nil { 136 if err := p.parseHeader(); err != nil { 137 return err 138 } 139 } 140 wg := sync.WaitGroup{} 141 blocks := make(chan []byte) 142 143 for i := 0; i < p.conf.Concurrency; i++ { 144 wg.Add(1) 145 go func() { 146 for block := range blocks { 147 p.parseBlock(block) 148 } 149 if p.waySync != nil { 150 p.waySync.doneWait() 151 } 152 if p.relSync != nil { 153 p.relSync.doneWait() 154 } 155 wg.Done() 156 }() 157 } 158 159 read: 160 for { 161 header, data, err := nextBlock(p.r) 162 if err == io.EOF { 163 break read 164 } 165 if err != nil { 166 close(blocks) 167 return fmt.Errorf("parsing next block: %w", err) 168 } 169 if header.GetType() != "OSMData" { 170 close(blocks) 171 return errors.New("next block not of type OSMData but " + header.GetType()) 172 } 173 select { 174 case <-ctx.Done(): 175 fmt.Println("done") 176 break read 177 case blocks <- data: 178 } 179 } 180 181 close(blocks) 182 wg.Wait() 183 184 if !p.conf.KeepOpen { 185 if p.conf.Coords != nil { 186 close(p.conf.Coords) 187 } 188 if p.conf.Nodes != nil { 189 close(p.conf.Nodes) 190 } 191 if p.conf.Ways != nil { 192 close(p.conf.Ways) 193 } 194 if p.conf.Relations != nil { 195 close(p.conf.Relations) 196 } 197 } 198 199 return ctx.Err() 200 } 201 202 func (p *Parser) parseHeader() error { 203 if p.header != nil { 204 return nil 205 } 206 var err error 207 p.header, err = parseHeader(p.r) 208 return err 209 } 210 211 func (p *Parser) parseBlock(blob []byte) error { 212 block, err := decodePrimitiveBlock(blob) 213 if err != nil { 214 return err 215 } 216 stringtable := newStringTable(block.GetStringtable()) 217 218 for _, group := range block.Primitivegroup { 219 if p.conf.Coords != nil || p.conf.Nodes != nil { 220 dense := group.GetDense() 221 if dense != nil { 222 parsedCoords, parsedNodes := readDenseNodes(dense, block, stringtable, p.conf.Coords == nil, p.conf.IncludeMetadata) 223 if len(parsedCoords) > 0 && p.conf.Coords != nil { 224 p.conf.Coords <- parsedCoords 225 } 226 if len(parsedNodes) > 0 && p.conf.Nodes != nil { 227 p.conf.Nodes <- parsedNodes 228 } 229 } 230 if len(group.Nodes) > 0 { 231 parsedCoords, parsedNodes := readNodes(group.Nodes, block, stringtable, p.conf.Coords == nil, p.conf.IncludeMetadata) 232 if len(parsedCoords) > 0 && p.conf.Coords != nil { 233 p.conf.Coords <- parsedCoords 234 } 235 if len(parsedNodes) > 0 && p.conf.Nodes != nil { 236 p.conf.Nodes <- parsedNodes 237 } 238 } 239 } 240 if len(group.Ways) > 0 && p.conf.Ways != nil { 241 parsedWays := readWays(group.Ways, block, stringtable, p.conf.IncludeMetadata) 242 if len(parsedWays) > 0 { 243 if p.waySync != nil { 244 p.waySync.doneWait() 245 } 246 p.conf.Ways <- parsedWays 247 } 248 } 249 if len(group.Relations) > 0 && p.conf.Relations != nil { 250 parsedRelations := readRelations(group.Relations, block, stringtable, p.conf.IncludeMetadata) 251 if len(parsedRelations) > 0 { 252 if p.waySync != nil { 253 p.waySync.doneWait() 254 } 255 if p.relSync != nil { 256 p.relSync.doneWait() 257 } 258 p.conf.Relations <- parsedRelations 259 } 260 } 261 } 262 return nil 263 }