github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/public/libs/to-markdown/index.js (about) 1 /* 2 * to-markdown - an HTML to Markdown converter 3 * 4 * Copyright 2011-15, Dom Christie 5 * Licenced under the MIT licence 6 * 7 */ 8 9 'use strict'; 10 11 var toMarkdown; 12 var converters; 13 var mdConverters = require('./lib/md-converters'); 14 var gfmConverters = require('./lib/gfm-converters'); 15 var collapse = require('collapse-whitespace'); 16 17 /* 18 * Set up window and document for Node.js 19 */ 20 21 var _window = (typeof window !== 'undefined' ? window : this), _document; 22 if (typeof document === 'undefined') { 23 _document = require('jsdom').jsdom(); 24 } 25 else { 26 _document = document; 27 } 28 29 /* 30 * Utilities 31 */ 32 33 function trim(string) { 34 return string.replace(/^[ \r\n\t]+|[ \r\n\t]+$/g, ''); 35 } 36 37 var blocks = ['address', 'article', 'aside', 'audio', 'blockquote', 'body', 38 'canvas', 'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 39 'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4','h5', 'h6', 40 'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav', 41 'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table', 42 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul' 43 ]; 44 45 function isBlock(node) { 46 return blocks.indexOf(node.nodeName.toLowerCase()) !== -1; 47 } 48 49 var voids = [ 50 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 51 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' 52 ]; 53 54 function isVoid(node) { 55 return voids.indexOf(node.nodeName.toLowerCase()) !== -1; 56 } 57 58 /* 59 * Parsing HTML strings 60 */ 61 62 function canParseHtml() { 63 var Parser = _window.DOMParser, canParse = false; 64 65 // Adapted from https://gist.github.com/1129031 66 // Firefox/Opera/IE throw errors on unsupported types 67 try { 68 // WebKit returns null on unsupported types 69 if (new Parser().parseFromString('', 'text/html')) { 70 canParse = true; 71 } 72 } catch (e) {} 73 return canParse; 74 } 75 76 function createHtmlParser() { 77 var Parser = function () {}; 78 79 Parser.prototype.parseFromString = function (string) { 80 var newDoc = _document.implementation.createHTMLDocument(''); 81 82 if (string.toLowerCase().indexOf('<!doctype') > -1) { 83 newDoc.documentElement.innerHTML = string; 84 } 85 else { 86 newDoc.body.innerHTML = string; 87 } 88 return newDoc; 89 }; 90 return Parser; 91 } 92 93 var HtmlParser = canParseHtml() ? _window.DOMParser : createHtmlParser(); 94 95 function htmlToDom(string) { 96 var tree = new HtmlParser().parseFromString(string, 'text/html'); 97 collapse(tree, isBlock); 98 return tree; 99 } 100 101 /* 102 * Flattens DOM tree into single array 103 */ 104 105 function bfsOrder(node) { 106 var inqueue = [node], 107 outqueue = [], 108 elem, children, i; 109 110 while (inqueue.length > 0) { 111 elem = inqueue.shift(); 112 outqueue.push(elem); 113 children = elem.childNodes; 114 for (i = 0 ; i < children.length; i++) { 115 if (children[i].nodeType === 1) { inqueue.push(children[i]); } 116 } 117 } 118 outqueue.shift(); 119 return outqueue; 120 } 121 122 /* 123 * Contructs a Markdown string of replacement text for a given node 124 */ 125 126 function getContent(node) { 127 var text = ''; 128 for (var i = 0; i < node.childNodes.length; i++) { 129 if (node.childNodes[i].nodeType === 1) { 130 text += node.childNodes[i]._replacement; 131 } 132 else if (node.childNodes[i].nodeType === 3) { 133 text += node.childNodes[i].data; 134 } 135 else { continue; } 136 } 137 return text; 138 } 139 140 /* 141 * Returns the HTML string of an element with its contents converted 142 */ 143 144 function outer(node, content) { 145 return node.cloneNode(false).outerHTML.replace('><', '>'+ content +'<'); 146 } 147 148 function canConvert(node, filter) { 149 if (typeof filter === 'string') { 150 return filter === node.nodeName.toLowerCase(); 151 } 152 if (Array.isArray(filter)) { 153 return filter.indexOf(node.nodeName.toLowerCase()) !== -1; 154 } 155 else if (typeof filter === 'function') { 156 return filter.call(toMarkdown, node); 157 } 158 else { 159 throw new TypeError('`filter` needs to be a string, array, or function'); 160 } 161 } 162 163 function isFlankedByWhitespace(side, node) { 164 var sibling, regExp, isFlanked; 165 166 if (side === 'left') { 167 sibling = node.previousSibling; 168 regExp = / $/; 169 } 170 else { 171 sibling = node.nextSibling; 172 regExp = /^ /; 173 } 174 175 if (sibling) { 176 if (sibling.nodeType === 3) { 177 isFlanked = regExp.test(sibling.nodeValue); 178 } 179 else if(sibling.nodeType === 1 && !isBlock(sibling)) { 180 isFlanked = regExp.test(sibling.textContent); 181 } 182 } 183 return isFlanked; 184 } 185 186 function flankingWhitespace(node) { 187 var leading = '', trailing = ''; 188 189 if (!isBlock(node)) { 190 var hasLeading = /^[ \r\n\t]/.test(node.innerHTML), 191 hasTrailing = /[ \r\n\t]$/.test(node.innerHTML); 192 193 if (hasLeading && !isFlankedByWhitespace('left', node)) { 194 leading = ' '; 195 } 196 if (hasTrailing && !isFlankedByWhitespace('right', node)) { 197 trailing = ' '; 198 } 199 } 200 201 return { leading: leading, trailing: trailing }; 202 } 203 204 /* 205 * Finds a Markdown converter, gets the replacement, and sets it on 206 * `_replacement` 207 */ 208 209 function process(node) { 210 var replacement, content = getContent(node); 211 212 for (var i = 0; i < converters.length; i++) { 213 var converter = converters[i]; 214 215 if (canConvert(node, converter.filter)) { 216 if (typeof converter.replacement !== 'function') { 217 throw new TypeError( 218 '`replacement` needs to be a function that returns a string' 219 ); 220 } 221 222 var whitespace = flankingWhitespace(node); 223 224 if (whitespace.leading || whitespace.trailing) { 225 content = trim(content); 226 } 227 replacement = whitespace.leading + 228 converter.replacement.call(toMarkdown, content, node) + 229 whitespace.trailing; 230 break; 231 } 232 } 233 234 // Remove blank nodes 235 if (!isVoid(node) && !/A/.test(node.nodeName) && /^\s*$/i.test(content)) { 236 replacement = ''; 237 } 238 239 node._replacement = replacement; 240 } 241 242 toMarkdown = function (input, options) { 243 options = options || {}; 244 245 if (typeof input !== 'string') { 246 throw new TypeError(input + ' is not a string'); 247 } 248 249 // Escape potential ol triggers 250 input = input.replace(/(\d+)\. /g, '$1\\. '); 251 252 var clone = htmlToDom(input).body, 253 nodes = bfsOrder(clone), 254 output; 255 256 converters = mdConverters.slice(0); 257 if (options.gfm) { 258 converters = gfmConverters.concat(converters); 259 } 260 261 if (options.converters) { 262 converters = options.converters.concat(converters); 263 } 264 265 // Process through nodes in reverse (so deepest child elements are first). 266 for (var i = nodes.length - 1; i >= 0; i--) { 267 process(nodes[i]); 268 } 269 output = getContent(clone); 270 271 return output.replace(/^[\t\r\n]+|[\t\r\n\s]+$/g, '') 272 .replace(/\n\s+\n/g, '\n\n') 273 .replace(/\n{3,}/g, '\n\n'); 274 }; 275 276 toMarkdown.isBlock = isBlock; 277 toMarkdown.isVoid = isVoid; 278 toMarkdown.trim = trim; 279 toMarkdown.outer = outer; 280 281 module.exports = toMarkdown;