github.com/MontFerret/ferret@v0.18.0/pkg/stdlib/html/parse.go (about)

     1  package html
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/pkg/errors"
     7  
     8  	"github.com/MontFerret/ferret/pkg/drivers"
     9  	"github.com/MontFerret/ferret/pkg/runtime/values/types"
    10  
    11  	"github.com/MontFerret/ferret/pkg/runtime/core"
    12  	"github.com/MontFerret/ferret/pkg/runtime/values"
    13  )
    14  
    15  type ParseParams struct {
    16  	drivers.ParseParams
    17  	Driver string
    18  }
    19  
    20  // PARSE loads an HTML page from a given string or byte array
    21  // @param {String} html - HTML string to parse.
    22  // @param {Object} [params] - An object containing the following properties:
    23  // @param {String} [params.driver] - Name of a driver to parse with.
    24  // @param {Boolean} [params.keepCookies=False] - Boolean value indicating whether to use cookies from previous sessions i.e. not to open a page in the Incognito mode.
    25  // @param {HTTPCookies} [params.cookies] - Set of HTTP cookies to use during page loading.
    26  // @param {HTTPHeaders} [params.headers] - Set of HTTP headers to use during page loading.
    27  // @param {Object} [params.viewport] - Viewport params.
    28  // @param {Int} [params.viewport.height] - Viewport height.
    29  // @param {Int} [params.viewport.width] - Viewport width.
    30  // @param {Float} [params.viewport.scaleFactor] - Viewport scale factor.
    31  // @param {Boolean} [params.viewport.mobile] - Value that indicates whether to emulate mobile device.
    32  // @param {Boolean} [params.viewport.landscape] - Value that indicates whether to render a page in landscape position.
    33  // @return {HTMLPage} - Returns parsed and loaded HTML page.
    34  func Parse(ctx context.Context, args ...core.Value) (core.Value, error) {
    35  	if err := core.ValidateArgs(args, 1, 2); err != nil {
    36  		return values.None, err
    37  	}
    38  
    39  	arg1 := args[0]
    40  
    41  	if err := core.ValidateType(arg1, types.String, types.Binary); err != nil {
    42  		return values.None, err
    43  	}
    44  
    45  	var content []byte
    46  
    47  	if arg1.Type() == types.String {
    48  		content = []byte(arg1.(values.String))
    49  	} else {
    50  		content = []byte(arg1.(values.Binary))
    51  	}
    52  
    53  	var params ParseParams
    54  
    55  	if len(args) > 1 {
    56  		if err := core.ValidateType(args[1], types.Object); err != nil {
    57  			return values.None, err
    58  		}
    59  
    60  		p, err := parseParseParams(content, args[1].(*values.Object))
    61  
    62  		if err != nil {
    63  			return values.None, err
    64  		}
    65  
    66  		params = p
    67  	} else {
    68  		params = defaultParseParams(content)
    69  	}
    70  
    71  	drv, err := drivers.FromContext(ctx, params.Driver)
    72  
    73  	if err != nil {
    74  		return values.None, err
    75  	}
    76  
    77  	return drv.Parse(ctx, params.ParseParams)
    78  }
    79  
    80  func defaultParseParams(content []byte) ParseParams {
    81  	return ParseParams{
    82  		ParseParams: drivers.ParseParams{
    83  			Content: content,
    84  		},
    85  		Driver: "",
    86  	}
    87  }
    88  
    89  func parseParseParams(content []byte, arg *values.Object) (ParseParams, error) {
    90  	res := defaultParseParams(content)
    91  
    92  	if arg.Has("driver") {
    93  		driverName := arg.MustGet("driver")
    94  
    95  		if err := core.ValidateType(driverName, types.String); err != nil {
    96  			return ParseParams{}, errors.Wrap(err, ".driver")
    97  		}
    98  
    99  		res.Driver = driverName.String()
   100  	}
   101  
   102  	if arg.Has("keepCookies") {
   103  		keepCookies := arg.MustGet("keepCookies")
   104  
   105  		if err := core.ValidateType(keepCookies, types.Boolean); err != nil {
   106  			return ParseParams{}, errors.Wrap(err, ".keepCookies")
   107  		}
   108  
   109  		res.KeepCookies = bool(keepCookies.(values.Boolean))
   110  	}
   111  
   112  	if arg.Has("cookies") {
   113  		cookies := arg.MustGet("cookies")
   114  
   115  		if err := core.ValidateType(cookies, types.Array, types.Object); err != nil {
   116  			return res, err
   117  		}
   118  
   119  		switch c := cookies.(type) {
   120  		case *values.Array:
   121  			cookies, err := parseCookieArray(c)
   122  
   123  			if err != nil {
   124  				return ParseParams{}, errors.Wrap(err, ".cookies")
   125  			}
   126  
   127  			res.Cookies = cookies
   128  		case *values.Object:
   129  			cookies, err := parseCookieObject(c)
   130  
   131  			if err != nil {
   132  				return ParseParams{}, errors.Wrap(err, ".cookies")
   133  			}
   134  
   135  			res.Cookies = cookies
   136  		default:
   137  			res.Cookies = drivers.NewHTTPCookies()
   138  		}
   139  	}
   140  
   141  	if arg.Has("headers") {
   142  		headers := arg.MustGet("headers")
   143  
   144  		if err := core.ValidateType(headers, types.Object); err != nil {
   145  			return ParseParams{}, errors.Wrap(err, ".headers")
   146  		}
   147  
   148  		res.Headers = parseHeader(headers.(*values.Object))
   149  	}
   150  
   151  	if arg.Has("viewport") {
   152  		viewport, err := parseViewport(arg.MustGet("viewport"))
   153  
   154  		if err != nil {
   155  			return ParseParams{}, errors.Wrap(err, ".viewport")
   156  		}
   157  
   158  		res.Viewport = viewport
   159  	}
   160  
   161  	return res, nil
   162  }