github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/public/libs/to-markdown/index.js

github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/public/libs/to-markdown/index.js (about)

     1  /*
     2   * to-markdown - an HTML to Markdown converter
     3   *
     4   * Copyright 2011-15, Dom Christie
     5   * Licenced under the MIT licence
     6   *
     7   */
     8  
     9  'use strict';
    10  
    11  var toMarkdown;
    12  var converters;
    13  var mdConverters = require('./lib/md-converters');
    14  var gfmConverters = require('./lib/gfm-converters');
    15  var collapse = require('collapse-whitespace');
    16  
    17  /*
    18   * Set up window and document for Node.js
    19   */
    20  
    21  var _window = (typeof window !== 'undefined' ? window : this), _document;
    22  if (typeof document === 'undefined') {
    23    _document = require('jsdom').jsdom();
    24  }
    25  else {
    26    _document = document;
    27  }
    28  
    29  /*
    30   * Utilities
    31   */
    32  
    33  function trim(string) {
    34    return string.replace(/^[ \r\n\t]+|[ \r\n\t]+$/g, '');
    35  }
    36  
    37  var blocks = ['address', 'article', 'aside', 'audio', 'blockquote', 'body',
    38    'canvas', 'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
    39    'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4','h5', 'h6',
    40    'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav',
    41    'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table',
    42    'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul'
    43  ];
    44  
    45  function isBlock(node) {
    46    return blocks.indexOf(node.nodeName.toLowerCase()) !== -1;
    47  }
    48  
    49  var voids = [
    50    'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input',
    51    'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
    52  ];
    53  
    54  function isVoid(node) {
    55    return voids.indexOf(node.nodeName.toLowerCase()) !== -1;
    56  }
    57  
    58  /*
    59   * Parsing HTML strings
    60   */
    61  
    62  function canParseHtml() {
    63    var Parser = _window.DOMParser, canParse = false;
    64  
    65    // Adapted from https://gist.github.com/1129031
    66    // Firefox/Opera/IE throw errors on unsupported types
    67    try {
    68      // WebKit returns null on unsupported types
    69      if (new Parser().parseFromString('', 'text/html')) {
    70        canParse = true;
    71      }
    72    } catch (e) {}
    73    return canParse;
    74  }
    75  
    76  function createHtmlParser() {
    77    var Parser = function () {};
    78  
    79    Parser.prototype.parseFromString = function (string) {
    80      var newDoc = _document.implementation.createHTMLDocument('');
    81  
    82      if (string.toLowerCase().indexOf('<!doctype') > -1) {
    83        newDoc.documentElement.innerHTML = string;
    84      }
    85      else {
    86        newDoc.body.innerHTML = string;
    87      }
    88      return newDoc;
    89    };
    90    return Parser;
    91  }
    92  
    93  var HtmlParser = canParseHtml() ? _window.DOMParser : createHtmlParser();
    94  
    95  function htmlToDom(string) {
    96    var tree = new HtmlParser().parseFromString(string, 'text/html');
    97    collapse(tree, isBlock);
    98    return tree;
    99  }
   100  
   101  /*
   102   * Flattens DOM tree into single array
   103   */
   104  
   105  function bfsOrder(node) {
   106    var inqueue = [node],
   107        outqueue = [],
   108        elem, children, i;
   109  
   110    while (inqueue.length > 0) {
   111      elem = inqueue.shift();
   112      outqueue.push(elem);
   113      children = elem.childNodes;
   114      for (i = 0 ; i < children.length; i++) {
   115        if (children[i].nodeType === 1) { inqueue.push(children[i]); }
   116      }
   117    }
   118    outqueue.shift();
   119    return outqueue;
   120  }
   121  
   122  /*
   123   * Contructs a Markdown string of replacement text for a given node
   124   */
   125  
   126  function getContent(node) {
   127    var text = '';
   128    for (var i = 0; i < node.childNodes.length; i++) {
   129      if (node.childNodes[i].nodeType === 1) {
   130        text += node.childNodes[i]._replacement;
   131      }
   132      else if (node.childNodes[i].nodeType === 3) {
   133        text += node.childNodes[i].data;
   134      }
   135      else { continue; }
   136    }
   137    return text;
   138  }
   139  
   140  /*
   141   * Returns the HTML string of an element with its contents converted
   142   */
   143  
   144  function outer(node, content) {
   145    return node.cloneNode(false).outerHTML.replace('><', '>'+ content +'<');
   146  }
   147  
   148  function canConvert(node, filter) {
   149    if (typeof filter === 'string') {
   150      return filter === node.nodeName.toLowerCase();
   151    }
   152    if (Array.isArray(filter)) {
   153      return filter.indexOf(node.nodeName.toLowerCase()) !== -1;
   154    }
   155    else if (typeof filter === 'function') {
   156      return filter.call(toMarkdown, node);
   157    }
   158    else {
   159      throw new TypeError('`filter` needs to be a string, array, or function');
   160    }
   161  }
   162  
   163  function isFlankedByWhitespace(side, node) {
   164    var sibling, regExp, isFlanked;
   165  
   166    if (side === 'left') {
   167      sibling = node.previousSibling;
   168      regExp = / $/;
   169    }
   170    else {
   171      sibling = node.nextSibling;
   172      regExp = /^ /;
   173    }
   174  
   175    if (sibling) {
   176      if (sibling.nodeType === 3) {
   177        isFlanked = regExp.test(sibling.nodeValue);
   178      }
   179      else if(sibling.nodeType === 1 && !isBlock(sibling)) {
   180        isFlanked = regExp.test(sibling.textContent);
   181      }
   182    }
   183    return isFlanked;
   184  }
   185  
   186  function flankingWhitespace(node) {
   187    var leading = '', trailing = '';
   188  
   189    if (!isBlock(node)) {
   190      var hasLeading = /^[ \r\n\t]/.test(node.innerHTML),
   191          hasTrailing = /[ \r\n\t]$/.test(node.innerHTML);
   192  
   193      if (hasLeading && !isFlankedByWhitespace('left', node)) {
   194        leading = ' ';
   195      }
   196      if (hasTrailing && !isFlankedByWhitespace('right', node)) {
   197        trailing = ' ';
   198      }
   199    }
   200  
   201    return { leading: leading, trailing: trailing };
   202  }
   203  
   204  /*
   205   * Finds a Markdown converter, gets the replacement, and sets it on
   206   * `_replacement`
   207   */
   208  
   209  function process(node) {
   210    var replacement, content = getContent(node);
   211  
   212    for (var i = 0; i < converters.length; i++) {
   213      var converter = converters[i];
   214  
   215      if (canConvert(node, converter.filter)) {
   216        if (typeof converter.replacement !== 'function') {
   217          throw new TypeError(
   218            '`replacement` needs to be a function that returns a string'
   219          );
   220        }
   221  
   222        var whitespace = flankingWhitespace(node);
   223  
   224        if (whitespace.leading || whitespace.trailing) {
   225          content = trim(content);
   226        }
   227        replacement = whitespace.leading +
   228                      converter.replacement.call(toMarkdown, content, node) +
   229                      whitespace.trailing;
   230        break;
   231      }
   232    }
   233  
   234    // Remove blank nodes
   235    if (!isVoid(node) && !/A/.test(node.nodeName) && /^\s*$/i.test(content)) {
   236      replacement = '';
   237    }
   238  
   239    node._replacement = replacement;
   240  }
   241  
   242  toMarkdown = function (input, options) {
   243    options = options || {};
   244  
   245    if (typeof input !== 'string') {
   246      throw new TypeError(input + ' is not a string');
   247    }
   248  
   249    // Escape potential ol triggers
   250    input = input.replace(/(\d+)\. /g, '$1\\. ');
   251  
   252    var clone = htmlToDom(input).body,
   253        nodes = bfsOrder(clone),
   254        output;
   255  
   256    converters = mdConverters.slice(0);
   257    if (options.gfm) {
   258      converters = gfmConverters.concat(converters);
   259    }
   260  
   261    if (options.converters) {
   262      converters = options.converters.concat(converters);
   263    }
   264  
   265    // Process through nodes in reverse (so deepest child elements are first).
   266    for (var i = nodes.length - 1; i >= 0; i--) {
   267      process(nodes[i]);
   268    }
   269    output = getContent(clone);
   270  
   271    return output.replace(/^[\t\r\n]+|[\t\r\n\s]+$/g, '')
   272                 .replace(/\n\s+\n/g, '\n\n')
   273                 .replace(/\n{3,}/g, '\n\n');
   274  };
   275  
   276  toMarkdown.isBlock = isBlock;
   277  toMarkdown.isVoid = isVoid;
   278  toMarkdown.trim = trim;
   279  toMarkdown.outer = outer;
   280  
   281  module.exports = toMarkdown;