github.com/signintech/pdft@v0.5.0/crawl.go (about)

     1  package pdft
     2  
     3  type crawl struct {
     4  	//setup
     5  	pdf   *PDFData
     6  	objID int
     7  	paths []string
     8  
     9  	//result
    10  	index int
    11  
    12  	//onCrawl     funcOnCrawl
    13  	results map[int]*crawlResult
    14  }
    15  
    16  func (c *crawl) resultByObjID(id int) *crawlResult {
    17  	if r, ok := c.results[id]; ok {
    18  		return r
    19  	}
    20  	c.results[id] = new(crawlResult)
    21  	return c.results[id]
    22  }
    23  
    24  func (c *crawl) set(pdf *PDFData, objID int, p ...string) {
    25  
    26  	c.pdf = pdf
    27  	c.objID = objID
    28  	c.paths = p
    29  	//init
    30  	c.results = make(map[int]*crawlResult)
    31  }
    32  
    33  func (c *crawl) run() error {
    34  	objdata := c.pdf.getObjByID(c.objID)
    35  	err := c.next(&objdata.data, 0, c.objID, c.resultByObjID(c.objID))
    36  	return err
    37  }
    38  
    39  func (c *crawl) next(data *[]byte, i int, id int, cr *crawlResult) error {
    40  
    41  	lenPath := len(c.paths)
    42  	if lenPath <= i {
    43  
    44  		return nil
    45  	}
    46  
    47  	var err error
    48  	var props PDFObjPropertiesData
    49  	err = readProperties(data, &props)
    50  	if err != nil {
    51  		return err
    52  	}
    53  
    54  	//var cr crawlResult
    55  	for _, prop := range props {
    56  		var item crawlResultItem
    57  		item.key = prop.key
    58  		if prop.key != c.paths[i] {
    59  			item.setValStr(prop.rawVal)
    60  		} else {
    61  			propType := prop.valType()
    62  			if propType == dictionary {
    63  				var objID int
    64  				item.setValStr(prop.rawVal)
    65  				objID, _, err = prop.asDictionary()
    66  				if err != nil {
    67  					return err
    68  				}
    69  				objdata := c.pdf.getObjByID(objID)
    70  				err = c.next(&objdata.data, i+1, objID, c.resultByObjID(objID))
    71  				if err != nil {
    72  					return err
    73  				}
    74  			} else if propType == array {
    75  				var objIDs []int
    76  				item.setValStr(prop.rawVal)
    77  				objIDs, _, err = prop.asDictionaryArr()
    78  				if err != nil {
    79  					return err
    80  				}
    81  				for _, objID := range objIDs {
    82  					objdata := c.pdf.getObjByID(objID)
    83  					c.next(&objdata.data, i+1, objID, c.resultByObjID(objID))
    84  				}
    85  			} else if propType == object {
    86  				if lenPath <= i+1 {
    87  					item.setValStr(prop.rawVal)
    88  				} else {
    89  					var subCr crawlResult
    90  					item.setValCr(&subCr)
    91  					tmp := []byte(prop.rawVal)
    92  					err = c.next(&tmp, i+1, -1, &subCr)
    93  					if err != nil {
    94  						return err
    95  					}
    96  				}
    97  			}
    98  		}
    99  		cr.items = append(cr.items, item)
   100  	}
   101  
   102  	return nil
   103  }