github.com/MontFerret/ferret@v0.18.0/pkg/stdlib/html/parse.go (about) 1 package html 2 3 import ( 4 "context" 5 6 "github.com/pkg/errors" 7 8 "github.com/MontFerret/ferret/pkg/drivers" 9 "github.com/MontFerret/ferret/pkg/runtime/values/types" 10 11 "github.com/MontFerret/ferret/pkg/runtime/core" 12 "github.com/MontFerret/ferret/pkg/runtime/values" 13 ) 14 15 type ParseParams struct { 16 drivers.ParseParams 17 Driver string 18 } 19 20 // PARSE loads an HTML page from a given string or byte array 21 // @param {String} html - HTML string to parse. 22 // @param {Object} [params] - An object containing the following properties: 23 // @param {String} [params.driver] - Name of a driver to parse with. 24 // @param {Boolean} [params.keepCookies=False] - Boolean value indicating whether to use cookies from previous sessions i.e. not to open a page in the Incognito mode. 25 // @param {HTTPCookies} [params.cookies] - Set of HTTP cookies to use during page loading. 26 // @param {HTTPHeaders} [params.headers] - Set of HTTP headers to use during page loading. 27 // @param {Object} [params.viewport] - Viewport params. 28 // @param {Int} [params.viewport.height] - Viewport height. 29 // @param {Int} [params.viewport.width] - Viewport width. 30 // @param {Float} [params.viewport.scaleFactor] - Viewport scale factor. 31 // @param {Boolean} [params.viewport.mobile] - Value that indicates whether to emulate mobile device. 32 // @param {Boolean} [params.viewport.landscape] - Value that indicates whether to render a page in landscape position. 33 // @return {HTMLPage} - Returns parsed and loaded HTML page. 34 func Parse(ctx context.Context, args ...core.Value) (core.Value, error) { 35 if err := core.ValidateArgs(args, 1, 2); err != nil { 36 return values.None, err 37 } 38 39 arg1 := args[0] 40 41 if err := core.ValidateType(arg1, types.String, types.Binary); err != nil { 42 return values.None, err 43 } 44 45 var content []byte 46 47 if arg1.Type() == types.String { 48 content = []byte(arg1.(values.String)) 49 } else { 50 content = []byte(arg1.(values.Binary)) 51 } 52 53 var params ParseParams 54 55 if len(args) > 1 { 56 if err := core.ValidateType(args[1], types.Object); err != nil { 57 return values.None, err 58 } 59 60 p, err := parseParseParams(content, args[1].(*values.Object)) 61 62 if err != nil { 63 return values.None, err 64 } 65 66 params = p 67 } else { 68 params = defaultParseParams(content) 69 } 70 71 drv, err := drivers.FromContext(ctx, params.Driver) 72 73 if err != nil { 74 return values.None, err 75 } 76 77 return drv.Parse(ctx, params.ParseParams) 78 } 79 80 func defaultParseParams(content []byte) ParseParams { 81 return ParseParams{ 82 ParseParams: drivers.ParseParams{ 83 Content: content, 84 }, 85 Driver: "", 86 } 87 } 88 89 func parseParseParams(content []byte, arg *values.Object) (ParseParams, error) { 90 res := defaultParseParams(content) 91 92 if arg.Has("driver") { 93 driverName := arg.MustGet("driver") 94 95 if err := core.ValidateType(driverName, types.String); err != nil { 96 return ParseParams{}, errors.Wrap(err, ".driver") 97 } 98 99 res.Driver = driverName.String() 100 } 101 102 if arg.Has("keepCookies") { 103 keepCookies := arg.MustGet("keepCookies") 104 105 if err := core.ValidateType(keepCookies, types.Boolean); err != nil { 106 return ParseParams{}, errors.Wrap(err, ".keepCookies") 107 } 108 109 res.KeepCookies = bool(keepCookies.(values.Boolean)) 110 } 111 112 if arg.Has("cookies") { 113 cookies := arg.MustGet("cookies") 114 115 if err := core.ValidateType(cookies, types.Array, types.Object); err != nil { 116 return res, err 117 } 118 119 switch c := cookies.(type) { 120 case *values.Array: 121 cookies, err := parseCookieArray(c) 122 123 if err != nil { 124 return ParseParams{}, errors.Wrap(err, ".cookies") 125 } 126 127 res.Cookies = cookies 128 case *values.Object: 129 cookies, err := parseCookieObject(c) 130 131 if err != nil { 132 return ParseParams{}, errors.Wrap(err, ".cookies") 133 } 134 135 res.Cookies = cookies 136 default: 137 res.Cookies = drivers.NewHTTPCookies() 138 } 139 } 140 141 if arg.Has("headers") { 142 headers := arg.MustGet("headers") 143 144 if err := core.ValidateType(headers, types.Object); err != nil { 145 return ParseParams{}, errors.Wrap(err, ".headers") 146 } 147 148 res.Headers = parseHeader(headers.(*values.Object)) 149 } 150 151 if arg.Has("viewport") { 152 viewport, err := parseViewport(arg.MustGet("viewport")) 153 154 if err != nil { 155 return ParseParams{}, errors.Wrap(err, ".viewport") 156 } 157 158 res.Viewport = viewport 159 } 160 161 return res, nil 162 }