github.com/omniscale/go-osm@v0.3.1/parser/pbf/parser.go (about)

     1  package pbf
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"runtime"
     9  	"sync"
    10  
    11  	"github.com/omniscale/go-osm"
    12  )
    13  
    14  type Config struct {
    15  	// IncludeMetadata indicates whether metadata like timestamps, versions and
    16  	// user names should be parsed.
    17  	IncludeMetadata bool
    18  
    19  	// Nodes specifies the destination for parsed nodes. See also Coords below.
    20  	// For efficiency, multiple nodes are passed in batches.
    21  	Nodes chan []osm.Node
    22  	// Ways specifies the destination for parsed ways.
    23  	// For efficiency, multiple wats are passed in batches.
    24  	Ways chan []osm.Way
    25  	// Relations specifies the destination for parsed relations.
    26  	// For efficiency, multiple relations are passed in batches.
    27  	Relations chan []osm.Relation
    28  
    29  	// Coords specifies the destination for parsed nodes without any tags. This
    30  	// can be used for more efficient storage/proceessing of nodes that are
    31  	// only used as coordinates for ways and relations.
    32  	// For efficiency, multiple nodes are passed in batches.
    33  	//
    34  	// If a Coords channel is specified, then nodes without tags are
    35  	// not sent to the Nodes channel. However, the Coords channel will receive
    36  	// all nodes.
    37  	Coords chan []osm.Node
    38  
    39  	// KeepOpen specifies whether the destination channels should be keept open
    40  	// after Parse(). By default, Nodes, Ways, Relations and Coords channels
    41  	// are closed after Parse().
    42  	KeepOpen bool
    43  
    44  	// OnFirstWay defines an optional func that gets called when the the first
    45  	// way is parsed. The callback should block until it is safe to fill the
    46  	// Ways channel.
    47  	//
    48  	// This can be used when you require that all nodes are processed before
    49  	// you start processing ways.
    50  	//
    51  	// This only works when the PBF file is ordered by type (nodes before ways
    52  	// before relations).
    53  	OnFirstWay func()
    54  
    55  	// OnFirstRelation defines an optional func that gets called when the
    56  	// the first relation is parsed. The callback should block until it is
    57  	// safe to fill the Relations channel.
    58  	//
    59  	// This can be used when you require that all ways are processed before you
    60  	// start processing relations.
    61  	//
    62  	// This only works when the PBF file is ordered by type (nodes before ways
    63  	// before relations).
    64  	OnFirstRelation func()
    65  
    66  	// Concurrency specifies how many concurrent parsers are started. Defaults
    67  	// to runtime.NumCPU if <= 0.
    68  	Concurrency int
    69  }
    70  
    71  type Parser struct {
    72  	conf    Config
    73  	r       io.Reader
    74  	header  *Header
    75  	wg      sync.WaitGroup
    76  	waySync *barrier
    77  	relSync *barrier
    78  	err     error
    79  }
    80  
    81  // New creates a new PBF parser for the provided input. Config specifies the destinations for the parsed elements.
    82  func New(r io.Reader, conf Config) *Parser {
    83  	p := &Parser{
    84  		r:    r,
    85  		conf: conf,
    86  	}
    87  
    88  	if conf.Concurrency <= 0 {
    89  		p.conf.Concurrency = runtime.NumCPU()
    90  	}
    91  
    92  	if conf.OnFirstWay != nil {
    93  		p.waySync = newBarrier(conf.OnFirstWay)
    94  		p.waySync.add(p.conf.Concurrency)
    95  	}
    96  	if conf.OnFirstRelation != nil {
    97  		p.relSync = newBarrier(conf.OnFirstRelation)
    98  		p.relSync.add(p.conf.Concurrency)
    99  	}
   100  	return p
   101  }
   102  
   103  // Header returns the header information from the PBF. Can be called before or
   104  // after Parse().
   105  func (p *Parser) Header() (*Header, error) {
   106  	if p.err != nil {
   107  		return nil, p.err
   108  	}
   109  	if p.header == nil {
   110  		if p.err = p.parseHeader(); p.err != nil {
   111  			return nil, p.err
   112  		}
   113  	}
   114  	return p.header, nil
   115  }
   116  
   117  // Error returns the first error that occurred during Header/Parse calls.
   118  func (p *Parser) Error() error {
   119  	return p.err
   120  }
   121  
   122  // Parse parses the PBF file and sends the parsed nodes, ways and relations
   123  // into the channels provided to the Parsers Config.
   124  // Context can be used to cancel the parsing.
   125  func (p *Parser) Parse(ctx context.Context) (err error) {
   126  	if p.err != nil {
   127  		return err
   128  	}
   129  
   130  	defer func() {
   131  		if err != nil {
   132  			p.err = err
   133  		}
   134  	}()
   135  	if p.header == nil {
   136  		if err := p.parseHeader(); err != nil {
   137  			return err
   138  		}
   139  	}
   140  	wg := sync.WaitGroup{}
   141  	blocks := make(chan []byte)
   142  
   143  	for i := 0; i < p.conf.Concurrency; i++ {
   144  		wg.Add(1)
   145  		go func() {
   146  			for block := range blocks {
   147  				p.parseBlock(block)
   148  			}
   149  			if p.waySync != nil {
   150  				p.waySync.doneWait()
   151  			}
   152  			if p.relSync != nil {
   153  				p.relSync.doneWait()
   154  			}
   155  			wg.Done()
   156  		}()
   157  	}
   158  
   159  read:
   160  	for {
   161  		header, data, err := nextBlock(p.r)
   162  		if err == io.EOF {
   163  			break read
   164  		}
   165  		if err != nil {
   166  			close(blocks)
   167  			return fmt.Errorf("parsing next block: %w", err)
   168  		}
   169  		if header.GetType() != "OSMData" {
   170  			close(blocks)
   171  			return errors.New("next block not of type OSMData but " + header.GetType())
   172  		}
   173  		select {
   174  		case <-ctx.Done():
   175  			fmt.Println("done")
   176  			break read
   177  		case blocks <- data:
   178  		}
   179  	}
   180  
   181  	close(blocks)
   182  	wg.Wait()
   183  
   184  	if !p.conf.KeepOpen {
   185  		if p.conf.Coords != nil {
   186  			close(p.conf.Coords)
   187  		}
   188  		if p.conf.Nodes != nil {
   189  			close(p.conf.Nodes)
   190  		}
   191  		if p.conf.Ways != nil {
   192  			close(p.conf.Ways)
   193  		}
   194  		if p.conf.Relations != nil {
   195  			close(p.conf.Relations)
   196  		}
   197  	}
   198  
   199  	return ctx.Err()
   200  }
   201  
   202  func (p *Parser) parseHeader() error {
   203  	if p.header != nil {
   204  		return nil
   205  	}
   206  	var err error
   207  	p.header, err = parseHeader(p.r)
   208  	return err
   209  }
   210  
   211  func (p *Parser) parseBlock(blob []byte) error {
   212  	block, err := decodePrimitiveBlock(blob)
   213  	if err != nil {
   214  		return err
   215  	}
   216  	stringtable := newStringTable(block.GetStringtable())
   217  
   218  	for _, group := range block.Primitivegroup {
   219  		if p.conf.Coords != nil || p.conf.Nodes != nil {
   220  			dense := group.GetDense()
   221  			if dense != nil {
   222  				parsedCoords, parsedNodes := readDenseNodes(dense, block, stringtable, p.conf.Coords == nil, p.conf.IncludeMetadata)
   223  				if len(parsedCoords) > 0 && p.conf.Coords != nil {
   224  					p.conf.Coords <- parsedCoords
   225  				}
   226  				if len(parsedNodes) > 0 && p.conf.Nodes != nil {
   227  					p.conf.Nodes <- parsedNodes
   228  				}
   229  			}
   230  			if len(group.Nodes) > 0 {
   231  				parsedCoords, parsedNodes := readNodes(group.Nodes, block, stringtable, p.conf.Coords == nil, p.conf.IncludeMetadata)
   232  				if len(parsedCoords) > 0 && p.conf.Coords != nil {
   233  					p.conf.Coords <- parsedCoords
   234  				}
   235  				if len(parsedNodes) > 0 && p.conf.Nodes != nil {
   236  					p.conf.Nodes <- parsedNodes
   237  				}
   238  			}
   239  		}
   240  		if len(group.Ways) > 0 && p.conf.Ways != nil {
   241  			parsedWays := readWays(group.Ways, block, stringtable, p.conf.IncludeMetadata)
   242  			if len(parsedWays) > 0 {
   243  				if p.waySync != nil {
   244  					p.waySync.doneWait()
   245  				}
   246  				p.conf.Ways <- parsedWays
   247  			}
   248  		}
   249  		if len(group.Relations) > 0 && p.conf.Relations != nil {
   250  			parsedRelations := readRelations(group.Relations, block, stringtable, p.conf.IncludeMetadata)
   251  			if len(parsedRelations) > 0 {
   252  				if p.waySync != nil {
   253  					p.waySync.doneWait()
   254  				}
   255  				if p.relSync != nil {
   256  					p.relSync.doneWait()
   257  				}
   258  				p.conf.Relations <- parsedRelations
   259  			}
   260  		}
   261  	}
   262  	return nil
   263  }