github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/website/talks/2011-05-07-Camlistore-Sao-Paolo/prettify.js (about)

     1  // Copyright (C) 2006 Google Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  
    16  /**
    17   * @fileoverview
    18   * some functions for browser-side pretty printing of code contained in html.
    19   *
    20   * <p>
    21   * For a fairly comprehensive set of languages see the
    22   * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a>
    23   * file that came with this source.  At a minimum, the lexer should work on a
    24   * number of languages including C and friends, Java, Python, Bash, SQL, HTML,
    25   * XML, CSS, Javascript, and Makefiles.  It works passably on Ruby, PHP and Awk
    26   * and a subset of Perl, but, because of commenting conventions, doesn't work on
    27   * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class.
    28   * <p>
    29   * Usage: <ol>
    30   * <li> include this source file in an html page via
    31   *   {@code <script type="text/javascript" src="/path/to/prettify.js"></script>}
    32   * <li> define style rules.  See the example page for examples.
    33   * <li> mark the {@code <pre>} and {@code <code>} tags in your source with
    34   *    {@code class=prettyprint.}
    35   *    You can also use the (html deprecated) {@code <xmp>} tag, but the pretty
    36   *    printer needs to do more substantial DOM manipulations to support that, so
    37   *    some css styles may not be preserved.
    38   * </ol>
    39   * That's it.  I wanted to keep the API as simple as possible, so there's no
    40   * need to specify which language the code is in, but if you wish, you can add
    41   * another class to the {@code <pre>} or {@code <code>} element to specify the
    42   * language, as in {@code <pre class="prettyprint lang-java">}.  Any class that
    43   * starts with "lang-" followed by a file extension, specifies the file type.
    44   * See the "lang-*.js" files in this directory for code that implements
    45   * per-language file handlers.
    46   * <p>
    47   * Change log:<br>
    48   * cbeust, 2006/08/22
    49   * <blockquote>
    50   *   Java annotations (start with "@") are now captured as literals ("lit")
    51   * </blockquote>
    52   * @requires console
    53   */
    54  
    55  // JSLint declarations
    56  /*global console, document, navigator, setTimeout, window */
    57  
    58  /**
    59   * Split {@code prettyPrint} into multiple timeouts so as not to interfere with
    60   * UI events.
    61   * If set to {@code false}, {@code prettyPrint()} is synchronous.
    62   */
    63  window['PR_SHOULD_USE_CONTINUATION'] = true;
    64  
    65  /** the number of characters between tab columns */
    66  window['PR_TAB_WIDTH'] = 8;
    67  
    68  /** Contains functions for creating and registering new language handlers.
    69    * @type {Object}
    70    */
    71  window['PR']
    72  
    73  /** Pretty print a chunk of code.
    74    *
    75    * @param {string} sourceCodeHtml code as html
    76    * @return {string} code as html, but prettier
    77    */
    78    = window['prettyPrintOne']
    79  /** Find all the {@code <pre>} and {@code <code>} tags in the DOM with
    80    * {@code class=prettyprint} and prettify them.
    81    * @param {Function?} opt_whenDone if specified, called when the last entry
    82    *     has been finished.
    83    */
    84    = window['prettyPrint'] = void 0;
    85  
    86  
    87  (function () {
    88    // Keyword lists for various languages.
    89    var FLOW_CONTROL_KEYWORDS =
    90        "break continue do else for if return while ";
    91    var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " +
    92        "double enum extern float goto int long register short signed sizeof " +
    93        "static struct switch typedef union unsigned void volatile ";
    94    var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " +
    95        "new operator private protected public this throw true try typeof ";
    96    var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " +
    97        "concept concept_map const_cast constexpr decltype " +
    98        "dynamic_cast explicit export friend inline late_check " +
    99        "mutable namespace nullptr reinterpret_cast static_assert static_cast " +
   100        "template typeid typename using virtual wchar_t where ";
   101    var JAVA_KEYWORDS = COMMON_KEYWORDS +
   102        "abstract boolean byte extends final finally implements import " +
   103        "instanceof null native package strictfp super synchronized throws " +
   104        "transient ";
   105    var CSHARP_KEYWORDS = JAVA_KEYWORDS +
   106        "as base by checked decimal delegate descending dynamic event " +
   107        "fixed foreach from group implicit in interface internal into is lock " +
   108        "object out override orderby params partial readonly ref sbyte sealed " +
   109        "stackalloc string select uint ulong unchecked unsafe ushort var ";
   110    var COFFEE_KEYWORDS = "all and by catch class else extends false finally " +
   111        "for if in is isnt loop new no not null of off on or return super then " +
   112        "true try unless until when while yes ";
   113    var JSCRIPT_KEYWORDS = COMMON_KEYWORDS +
   114        "debugger eval export function get null set undefined var with " +
   115        "Infinity NaN ";
   116    var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " +
   117        "goto if import last local my next no our print package redo require " +
   118        "sub undef unless until use wantarray while BEGIN END ";
   119    var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " +
   120        "elif except exec finally from global import in is lambda " +
   121        "nonlocal not or pass print raise try with yield " +
   122        "False True None ";
   123    var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" +
   124        " defined elsif end ensure false in module next nil not or redo rescue " +
   125        "retry self super then true undef unless until when yield BEGIN END ";
   126    var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " +
   127        "function in local set then until ";
   128    var ALL_KEYWORDS = (
   129        CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS +
   130        PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS);
   131  
   132    // token style names.  correspond to css classes
   133    /** token style for a string literal */
   134    var PR_STRING = 'str';
   135    /** token style for a keyword */
   136    var PR_KEYWORD = 'kwd';
   137    /** token style for a comment */
   138    var PR_COMMENT = 'com';
   139    /** token style for a type */
   140    var PR_TYPE = 'typ';
   141    /** token style for a literal value.  e.g. 1, null, true. */
   142    var PR_LITERAL = 'lit';
   143    /** token style for a punctuation string. */
   144    var PR_PUNCTUATION = 'pun';
   145    /** token style for a punctuation string. */
   146    var PR_PLAIN = 'pln';
   147  
   148    /** token style for an sgml tag. */
   149    var PR_TAG = 'tag';
   150    /** token style for a markup declaration such as a DOCTYPE. */
   151    var PR_DECLARATION = 'dec';
   152    /** token style for embedded source. */
   153    var PR_SOURCE = 'src';
   154    /** token style for an sgml attribute name. */
   155    var PR_ATTRIB_NAME = 'atn';
   156    /** token style for an sgml attribute value. */
   157    var PR_ATTRIB_VALUE = 'atv';
   158  
   159    /**
   160     * A class that indicates a section of markup that is not code, e.g. to allow
   161     * embedding of line numbers within code listings.
   162     */
   163    var PR_NOCODE = 'nocode';
   164  
   165    /** A set of tokens that can precede a regular expression literal in
   166      * javascript.
   167      * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full
   168      * list, but I've removed ones that might be problematic when seen in
   169      * languages that don't support regular expression literals.
   170      *
   171      * <p>Specifically, I've removed any keywords that can't precede a regexp
   172      * literal in a syntactically legal javascript program, and I've removed the
   173      * "in" keyword since it's not a keyword in many languages, and might be used
   174      * as a count of inches.
   175      *
   176      * <p>The link a above does not accurately describe EcmaScript rules since
   177      * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works
   178      * very well in practice.
   179      *
   180      * @private
   181      */
   182    var REGEXP_PRECEDER_PATTERN = function () {
   183        var preceders = [
   184            "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=",
   185            "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=",
   186            "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";",
   187            "<", "<<", "<<=", "<=", "=", "==", "===", ">",
   188            ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[",
   189            "^", "^=", "^^", "^^=", "{", "|", "|=", "||",
   190            "||=", "~" /* handles =~ and !~ */,
   191            "break", "case", "continue", "delete",
   192            "do", "else", "finally", "instanceof",
   193            "return", "throw", "try", "typeof"
   194            ];
   195        var pattern = '(?:^^|[+-]';
   196        for (var i = 0; i < preceders.length; ++i) {
   197          pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1');
   198        }
   199        pattern += ')\\s*';  // matches at end, and matches empty string
   200        return pattern;
   201        // CAVEAT: this does not properly handle the case where a regular
   202        // expression immediately follows another since a regular expression may
   203        // have flags for case-sensitivity and the like.  Having regexp tokens
   204        // adjacent is not valid in any language I'm aware of, so I'm punting.
   205        // TODO: maybe style special characters inside a regexp as punctuation.
   206      }();
   207  
   208    
   209    /**
   210     * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally
   211     * matches the union of the sets of strings matched by the input RegExp.
   212     * Since it matches globally, if the input strings have a start-of-input
   213     * anchor (/^.../), it is ignored for the purposes of unioning.
   214     * @param {Array.<RegExp>} regexs non multiline, non-global regexs.
   215     * @return {RegExp} a global regex.
   216     */
   217    function combinePrefixPatterns(regexs) {
   218      var capturedGroupIndex = 0;
   219    
   220      var needToFoldCase = false;
   221      var ignoreCase = false;
   222      for (var i = 0, n = regexs.length; i < n; ++i) {
   223        var regex = regexs[i];
   224        if (regex.ignoreCase) {
   225          ignoreCase = true;
   226        } else if (/[a-z]/i.test(regex.source.replace(
   227                       /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) {
   228          needToFoldCase = true;
   229          ignoreCase = false;
   230          break;
   231        }
   232      }
   233    
   234      function decodeEscape(charsetPart) {
   235        if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); }
   236        switch (charsetPart.charAt(1)) {
   237          case 'b': return 8;
   238          case 't': return 9;
   239          case 'n': return 0xa;
   240          case 'v': return 0xb;
   241          case 'f': return 0xc;
   242          case 'r': return 0xd;
   243          case 'u': case 'x':
   244            return parseInt(charsetPart.substring(2), 16)
   245                || charsetPart.charCodeAt(1);
   246          case '0': case '1': case '2': case '3': case '4':
   247          case '5': case '6': case '7':
   248            return parseInt(charsetPart.substring(1), 8);
   249          default: return charsetPart.charCodeAt(1);
   250        }
   251      }
   252    
   253      function encodeEscape(charCode) {
   254        if (charCode < 0x20) {
   255          return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16);
   256        }
   257        var ch = String.fromCharCode(charCode);
   258        if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') {
   259          ch = '\\' + ch;
   260        }
   261        return ch;
   262      }
   263    
   264      function caseFoldCharset(charSet) {
   265        var charsetParts = charSet.substring(1, charSet.length - 1).match(
   266            new RegExp(
   267                '\\\\u[0-9A-Fa-f]{4}'
   268                + '|\\\\x[0-9A-Fa-f]{2}'
   269                + '|\\\\[0-3][0-7]{0,2}'
   270                + '|\\\\[0-7]{1,2}'
   271                + '|\\\\[\\s\\S]'
   272                + '|-'
   273                + '|[^-\\\\]',
   274                'g'));
   275        var groups = [];
   276        var ranges = [];
   277        var inverse = charsetParts[0] === '^';
   278        for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) {
   279          var p = charsetParts[i];
   280          switch (p) {
   281            case '\\B': case '\\b':
   282            case '\\D': case '\\d':
   283            case '\\S': case '\\s':
   284            case '\\W': case '\\w':
   285              groups.push(p);
   286              continue;
   287          }
   288          var start = decodeEscape(p);
   289          var end;
   290          if (i + 2 < n && '-' === charsetParts[i + 1]) {
   291            end = decodeEscape(charsetParts[i + 2]);
   292            i += 2;
   293          } else {
   294            end = start;
   295          }
   296          ranges.push([start, end]);
   297          // If the range might intersect letters, then expand it.
   298          if (!(end < 65 || start > 122)) {
   299            if (!(end < 65 || start > 90)) {
   300              ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]);
   301            }
   302            if (!(end < 97 || start > 122)) {
   303              ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]);
   304            }
   305          }
   306        }
   307    
   308        // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]]
   309        // -> [[1, 12], [14, 14], [16, 17]]
   310        ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1]  - a[1]); });
   311        var consolidatedRanges = [];
   312        var lastRange = [NaN, NaN];
   313        for (var i = 0; i < ranges.length; ++i) {
   314          var range = ranges[i];
   315          if (range[0] <= lastRange[1] + 1) {
   316            lastRange[1] = Math.max(lastRange[1], range[1]);
   317          } else {
   318            consolidatedRanges.push(lastRange = range);
   319          }
   320        }
   321    
   322        var out = ['['];
   323        if (inverse) { out.push('^'); }
   324        out.push.apply(out, groups);
   325        for (var i = 0; i < consolidatedRanges.length; ++i) {
   326          var range = consolidatedRanges[i];
   327          out.push(encodeEscape(range[0]));
   328          if (range[1] > range[0]) {
   329            if (range[1] + 1 > range[0]) { out.push('-'); }
   330            out.push(encodeEscape(range[1]));
   331          }
   332        }
   333        out.push(']');
   334        return out.join('');
   335      }
   336    
   337      function allowAnywhereFoldCaseAndRenumberGroups(regex) {
   338        // Split into character sets, escape sequences, punctuation strings
   339        // like ('(', '(?:', ')', '^'), and runs of characters that do not
   340        // include any of the above.
   341        var parts = regex.source.match(
   342            new RegExp(
   343                '(?:'
   344                + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]'  // a character set
   345                + '|\\\\u[A-Fa-f0-9]{4}'  // a unicode escape
   346                + '|\\\\x[A-Fa-f0-9]{2}'  // a hex escape
   347                + '|\\\\[0-9]+'  // a back-reference or octal escape
   348                + '|\\\\[^ux0-9]'  // other escape sequence
   349                + '|\\(\\?[:!=]'  // start of a non-capturing group
   350                + '|[\\(\\)\\^]'  // start/emd of a group, or line start
   351                + '|[^\\x5B\\x5C\\(\\)\\^]+'  // run of other characters
   352                + ')',
   353                'g'));
   354        var n = parts.length;
   355    
   356        // Maps captured group numbers to the number they will occupy in
   357        // the output or to -1 if that has not been determined, or to
   358        // undefined if they need not be capturing in the output.
   359        var capturedGroups = [];
   360    
   361        // Walk over and identify back references to build the capturedGroups
   362        // mapping.
   363        for (var i = 0, groupIndex = 0; i < n; ++i) {
   364          var p = parts[i];
   365          if (p === '(') {
   366            // groups are 1-indexed, so max group index is count of '('
   367            ++groupIndex;
   368          } else if ('\\' === p.charAt(0)) {
   369            var decimalValue = +p.substring(1);
   370            if (decimalValue && decimalValue <= groupIndex) {
   371              capturedGroups[decimalValue] = -1;
   372            }
   373          }
   374        }
   375    
   376        // Renumber groups and reduce capturing groups to non-capturing groups
   377        // where possible.
   378        for (var i = 1; i < capturedGroups.length; ++i) {
   379          if (-1 === capturedGroups[i]) {
   380            capturedGroups[i] = ++capturedGroupIndex;
   381          }
   382        }
   383        for (var i = 0, groupIndex = 0; i < n; ++i) {
   384          var p = parts[i];
   385          if (p === '(') {
   386            ++groupIndex;
   387            if (capturedGroups[groupIndex] === undefined) {
   388              parts[i] = '(?:';
   389            }
   390          } else if ('\\' === p.charAt(0)) {
   391            var decimalValue = +p.substring(1);
   392            if (decimalValue && decimalValue <= groupIndex) {
   393              parts[i] = '\\' + capturedGroups[groupIndex];
   394            }
   395          }
   396        }
   397    
   398        // Remove any prefix anchors so that the output will match anywhere.
   399        // ^^ really does mean an anchored match though.
   400        for (var i = 0, groupIndex = 0; i < n; ++i) {
   401          if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; }
   402        }
   403    
   404        // Expand letters to groups to handle mixing of case-sensitive and
   405        // case-insensitive patterns if necessary.
   406        if (regex.ignoreCase && needToFoldCase) {
   407          for (var i = 0; i < n; ++i) {
   408            var p = parts[i];
   409            var ch0 = p.charAt(0);
   410            if (p.length >= 2 && ch0 === '[') {
   411              parts[i] = caseFoldCharset(p);
   412            } else if (ch0 !== '\\') {
   413              // TODO: handle letters in numeric escapes.
   414              parts[i] = p.replace(
   415                  /[a-zA-Z]/g,
   416                  function (ch) {
   417                    var cc = ch.charCodeAt(0);
   418                    return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']';
   419                  });
   420            }
   421          }
   422        }
   423    
   424        return parts.join('');
   425      }
   426    
   427      var rewritten = [];
   428      for (var i = 0, n = regexs.length; i < n; ++i) {
   429        var regex = regexs[i];
   430        if (regex.global || regex.multiline) { throw new Error('' + regex); }
   431        rewritten.push(
   432            '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')');
   433      }
   434    
   435      return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g');
   436    }
   437  
   438  
   439    /**
   440     * Split markup into a string of source code and an array mapping ranges in
   441     * that string to the text nodes in which they appear.
   442     *
   443     * <p>
   444     * The HTML DOM structure:</p>
   445     * <pre>
   446     * (Element   "p"
   447     *   (Element "b"
   448     *     (Text  "print "))       ; #1
   449     *   (Text    "'Hello '")      ; #2
   450     *   (Element "br")            ; #3
   451     *   (Text    "  + 'World';")) ; #4
   452     * </pre>
   453     * <p>
   454     * corresponds to the HTML
   455     * {@code <p><b>print </b>'Hello '<br>  + 'World';</p>}.</p>
   456     *
   457     * <p>
   458     * It will produce the output:</p>
   459     * <pre>
   460     * {
   461     *   source: "print 'Hello '\n  + 'World';",
   462     *   //                 1         2
   463     *   //       012345678901234 5678901234567
   464     *   spans: [0, #1, 6, #2, 14, #3, 15, #4]
   465     * }
   466     * </pre>
   467     * <p>
   468     * where #1 is a reference to the {@code "print "} text node above, and so
   469     * on for the other text nodes.
   470     * </p>
   471     *
   472     * <p>
   473     * The {@code} spans array is an array of pairs.  Even elements are the start
   474     * indices of substrings, and odd elements are the text nodes (or BR elements)
   475     * that contain the text for those substrings.
   476     * Substrings continue until the next index or the end of the source.
   477     * </p>
   478     *
   479     * @param {Node} node an HTML DOM subtree containing source-code.
   480     * @return {Object} source code and the text nodes in which they occur.
   481     */
   482    function extractSourceSpans(node) {
   483      var nocode = /(?:^|\s)nocode(?:\s|$)/;
   484    
   485      var chunks = [];
   486      var length = 0;
   487      var spans = [];
   488      var k = 0;
   489    
   490      var whitespace;
   491      if (node.currentStyle) {
   492        whitespace = node.currentStyle.whiteSpace;
   493      } else if (window.getComputedStyle) {
   494        whitespace = document.defaultView.getComputedStyle(node, null)
   495            .getPropertyValue('white-space');
   496      }
   497      var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3);
   498    
   499      function walk(node) {
   500        switch (node.nodeType) {
   501          case 1:  // Element
   502            if (nocode.test(node.className)) { return; }
   503            for (var child = node.firstChild; child; child = child.nextSibling) {
   504              walk(child);
   505            }
   506            var nodeName = node.nodeName;
   507            if ('BR' === nodeName || 'LI' === nodeName) {
   508              chunks[k] = '\n';
   509              spans[k << 1] = length++;
   510              spans[(k++ << 1) | 1] = node;
   511            }
   512            break;
   513          case 3: case 4:  // Text
   514            var text = node.nodeValue;
   515            if (text.length) {
   516              if (!isPreformatted) {
   517                text = text.replace(/[ \t\r\n]+/g, ' ');
   518              } else {
   519                text = text.replace(/\r\n?/g, '\n');  // Normalize newlines.
   520              }
   521              // TODO: handle tabs here?
   522              chunks[k] = text;
   523              spans[k << 1] = length;
   524              length += text.length;
   525              spans[(k++ << 1) | 1] = node;
   526            }
   527            break;
   528        }
   529      }
   530    
   531      walk(node);
   532    
   533      return {
   534        source: chunks.join('').replace(/\n$/, ''),
   535        spans: spans
   536      };
   537    }
   538  
   539  
   540    /**
   541     * Apply the given language handler to sourceCode and add the resulting
   542     * decorations to out.
   543     * @param {number} basePos the index of sourceCode within the chunk of source
   544     *    whose decorations are already present on out.
   545     */
   546    function appendDecorations(basePos, sourceCode, langHandler, out) {
   547      if (!sourceCode) { return; }
   548      var job = {
   549        source: sourceCode,
   550        basePos: basePos
   551      };
   552      langHandler(job);
   553      out.push.apply(out, job.decorations);
   554    }
   555  
   556    /** Given triples of [style, pattern, context] returns a lexing function,
   557      * The lexing function interprets the patterns to find token boundaries and
   558      * returns a decoration list of the form
   559      * [index_0, style_0, index_1, style_1, ..., index_n, style_n]
   560      * where index_n is an index into the sourceCode, and style_n is a style
   561      * constant like PR_PLAIN.  index_n-1 <= index_n, and style_n-1 applies to
   562      * all characters in sourceCode[index_n-1:index_n].
   563      *
   564      * The stylePatterns is a list whose elements have the form
   565      * [style : string, pattern : RegExp, DEPRECATED, shortcut : string].
   566      *
   567      * Style is a style constant like PR_PLAIN, or can be a string of the
   568      * form 'lang-FOO', where FOO is a language extension describing the
   569      * language of the portion of the token in $1 after pattern executes.
   570      * E.g., if style is 'lang-lisp', and group 1 contains the text
   571      * '(hello (world))', then that portion of the token will be passed to the
   572      * registered lisp handler for formatting.
   573      * The text before and after group 1 will be restyled using this decorator
   574      * so decorators should take care that this doesn't result in infinite
   575      * recursion.  For example, the HTML lexer rule for SCRIPT elements looks
   576      * something like ['lang-js', /<[s]cript>(.+?)<\/script>/].  This may match
   577      * '<script>foo()<\/script>', which would cause the current decorator to
   578      * be called with '<script>' which would not match the same rule since
   579      * group 1 must not be empty, so it would be instead styled as PR_TAG by
   580      * the generic tag rule.  The handler registered for the 'js' extension would
   581      * then be called with 'foo()', and finally, the current decorator would
   582      * be called with '<\/script>' which would not match the original rule and
   583      * so the generic tag rule would identify it as a tag.
   584      *
   585      * Pattern must only match prefixes, and if it matches a prefix, then that
   586      * match is considered a token with the same style.
   587      *
   588      * Context is applied to the last non-whitespace, non-comment token
   589      * recognized.
   590      *
   591      * Shortcut is an optional string of characters, any of which, if the first
   592      * character, gurantee that this pattern and only this pattern matches.
   593      *
   594      * @param {Array} shortcutStylePatterns patterns that always start with
   595      *   a known character.  Must have a shortcut string.
   596      * @param {Array} fallthroughStylePatterns patterns that will be tried in
   597      *   order if the shortcut ones fail.  May have shortcuts.
   598      *
   599      * @return {function (Object)} a
   600      *   function that takes source code and returns a list of decorations.
   601      */
   602    function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) {
   603      var shortcuts = {};
   604      var tokenizer;
   605      (function () {
   606        var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);
   607        var allRegexs = [];
   608        var regexKeys = {};
   609        for (var i = 0, n = allPatterns.length; i < n; ++i) {
   610          var patternParts = allPatterns[i];
   611          var shortcutChars = patternParts[3];
   612          if (shortcutChars) {
   613            for (var c = shortcutChars.length; --c >= 0;) {
   614              shortcuts[shortcutChars.charAt(c)] = patternParts;
   615            }
   616          }
   617          var regex = patternParts[1];
   618          var k = '' + regex;
   619          if (!regexKeys.hasOwnProperty(k)) {
   620            allRegexs.push(regex);
   621            regexKeys[k] = null;
   622          }
   623        }
   624        allRegexs.push(/[\0-\uffff]/);
   625        tokenizer = combinePrefixPatterns(allRegexs);
   626      })();
   627  
   628      var nPatterns = fallthroughStylePatterns.length;
   629      var notWs = /\S/;
   630  
   631      /**
   632       * Lexes job.source and produces an output array job.decorations of style
   633       * classes preceded by the position at which they start in job.source in
   634       * order.
   635       *
   636       * @param {Object} job an object like {@code
   637       *    source: {string} sourceText plain text,
   638       *    basePos: {int} position of job.source in the larger chunk of
   639       *        sourceCode.
   640       * }
   641       */
   642      var decorate = function (job) {
   643        var sourceCode = job.source, basePos = job.basePos;
   644        /** Even entries are positions in source in ascending order.  Odd enties
   645          * are style markers (e.g., PR_COMMENT) that run from that position until
   646          * the end.
   647          * @type {Array.<number|string>}
   648          */
   649        var decorations = [basePos, PR_PLAIN];
   650        var pos = 0;  // index into sourceCode
   651        var tokens = sourceCode.match(tokenizer) || [];
   652        var styleCache = {};
   653  
   654        for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) {
   655          var token = tokens[ti];
   656          var style = styleCache[token];
   657          var match = void 0;
   658  
   659          var isEmbedded;
   660          if (typeof style === 'string') {
   661            isEmbedded = false;
   662          } else {
   663            var patternParts = shortcuts[token.charAt(0)];
   664            if (patternParts) {
   665              match = token.match(patternParts[1]);
   666              style = patternParts[0];
   667            } else {
   668              for (var i = 0; i < nPatterns; ++i) {
   669                patternParts = fallthroughStylePatterns[i];
   670                match = token.match(patternParts[1]);
   671                if (match) {
   672                  style = patternParts[0];
   673                  break;
   674                }
   675              }
   676  
   677              if (!match) {  // make sure that we make progress
   678                style = PR_PLAIN;
   679              }
   680            }
   681  
   682            isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5);
   683            if (isEmbedded && !(match && typeof match[1] === 'string')) {
   684              isEmbedded = false;
   685              style = PR_SOURCE;
   686            }
   687  
   688            if (!isEmbedded) { styleCache[token] = style; }
   689          }
   690  
   691          var tokenStart = pos;
   692          pos += token.length;
   693  
   694          if (!isEmbedded) {
   695            decorations.push(basePos + tokenStart, style);
   696          } else {  // Treat group 1 as an embedded block of source code.
   697            var embeddedSource = match[1];
   698            var embeddedSourceStart = token.indexOf(embeddedSource);
   699            var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length;
   700            if (match[2]) {
   701              // If embeddedSource can be blank, then it would match at the
   702              // beginning which would cause us to infinitely recurse on the
   703              // entire token, so we catch the right context in match[2].
   704              embeddedSourceEnd = token.length - match[2].length;
   705              embeddedSourceStart = embeddedSourceEnd - embeddedSource.length;
   706            }
   707            var lang = style.substring(5);
   708            // Decorate the left of the embedded source
   709            appendDecorations(
   710                basePos + tokenStart,
   711                token.substring(0, embeddedSourceStart),
   712                decorate, decorations);
   713            // Decorate the embedded source
   714            appendDecorations(
   715                basePos + tokenStart + embeddedSourceStart,
   716                embeddedSource,
   717                langHandlerForExtension(lang, embeddedSource),
   718                decorations);
   719            // Decorate the right of the embedded section
   720            appendDecorations(
   721                basePos + tokenStart + embeddedSourceEnd,
   722                token.substring(embeddedSourceEnd),
   723                decorate, decorations);
   724          }
   725        }
   726        job.decorations = decorations;
   727      };
   728      return decorate;
   729    }
   730  
   731    /** returns a function that produces a list of decorations from source text.
   732      *
   733      * This code treats ", ', and ` as string delimiters, and \ as a string
   734      * escape.  It does not recognize perl's qq() style strings.
   735      * It has no special handling for double delimiter escapes as in basic, or
   736      * the tripled delimiters used in python, but should work on those regardless
   737      * although in those cases a single string literal may be broken up into
   738      * multiple adjacent string literals.
   739      *
   740      * It recognizes C, C++, and shell style comments.
   741      *
   742      * @param {Object} options a set of optional parameters.
   743      * @return {function (Object)} a function that examines the source code
   744      *     in the input job and builds the decoration list.
   745      */
   746    function sourceDecorator(options) {
   747      var shortcutStylePatterns = [], fallthroughStylePatterns = [];
   748      if (options['tripleQuotedStrings']) {
   749        // '''multi-line-string''', 'single-line-string', and double-quoted
   750        shortcutStylePatterns.push(
   751            [PR_STRING,  /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/,
   752             null, '\'"']);
   753      } else if (options['multiLineStrings']) {
   754        // 'multi-line-string', "multi-line-string"
   755        shortcutStylePatterns.push(
   756            [PR_STRING,  /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/,
   757             null, '\'"`']);
   758      } else {
   759        // 'single-line-string', "single-line-string"
   760        shortcutStylePatterns.push(
   761            [PR_STRING,
   762             /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,
   763             null, '"\'']);
   764      }
   765      if (options['verbatimStrings']) {
   766        // verbatim-string-literal production from the C# grammar.  See issue 93.
   767        fallthroughStylePatterns.push(
   768            [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]);
   769      }
   770      var hc = options['hashComments'];
   771      if (hc) {
   772        if (options['cStyleComments']) {
   773          if (hc > 1) {  // multiline hash comments
   774            shortcutStylePatterns.push(
   775                [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']);
   776          } else {
   777            // Stop C preprocessor declarations at an unclosed open comment
   778            shortcutStylePatterns.push(
   779                [PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/,
   780                 null, '#']);
   781          }
   782          fallthroughStylePatterns.push(
   783              [PR_STRING,
   784               /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/,
   785               null]);
   786        } else {
   787          shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);
   788        }
   789      }
   790      if (options['cStyleComments']) {
   791        fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);
   792        fallthroughStylePatterns.push(
   793            [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]);
   794      }
   795      if (options['regexLiterals']) {
   796        var REGEX_LITERAL = (
   797            // A regular expression literal starts with a slash that is
   798            // not followed by * or / so that it is not confused with
   799            // comments.
   800            '/(?=[^/*])'
   801            // and then contains any number of raw characters,
   802            + '(?:[^/\\x5B\\x5C]'
   803            // escape sequences (\x5C),
   804            +    '|\\x5C[\\s\\S]'
   805            // or non-nesting character sets (\x5B\x5D);
   806            +    '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+'
   807            // finally closed by a /.
   808            + '/');
   809        fallthroughStylePatterns.push(
   810            ['lang-regex',
   811             new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')')
   812             ]);
   813      }
   814  
   815      var keywords = options['keywords'].replace(/^\s+|\s+$/g, '');
   816      if (keywords.length) {
   817        fallthroughStylePatterns.push(
   818            [PR_KEYWORD,
   819             new RegExp('^(?:' + keywords.replace(/\s+/g, '|') + ')\\b'), null]);
   820      }
   821  
   822      shortcutStylePatterns.push([PR_PLAIN,       /^\s+/, null, ' \r\n\t\xA0']);
   823      fallthroughStylePatterns.push(
   824          // TODO(mikesamuel): recognize non-latin letters and numerals in idents
   825          [PR_LITERAL,     /^@[a-z_$][a-z_$@0-9]*/i, null],
   826          [PR_TYPE,        /^@?[A-Z]+[a-z][A-Za-z_$@0-9]*/, null],
   827          [PR_PLAIN,       /^[a-z_$][a-z_$@0-9]*/i, null],
   828          [PR_LITERAL,
   829           new RegExp(
   830               '^(?:'
   831               // A hex number
   832               + '0x[a-f0-9]+'
   833               // or an octal or decimal number,
   834               + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)'
   835               // possibly in scientific notation
   836               + '(?:e[+\\-]?\\d+)?'
   837               + ')'
   838               // with an optional modifier like UL for unsigned long
   839               + '[a-z]*', 'i'),
   840           null, '0123456789'],
   841          // Don't treat escaped quotes in bash as starting strings.  See issue 144.
   842          [PR_PLAIN,       /^\\[\s\S]?/, null],
   843          [PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]);
   844  
   845      return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);
   846    }
   847  
   848    var decorateSource = sourceDecorator({
   849          'keywords': ALL_KEYWORDS,
   850          'hashComments': true,
   851          'cStyleComments': true,
   852          'multiLineStrings': true,
   853          'regexLiterals': true
   854        });
   855  
   856    /**
   857     * Given a DOM subtree, wraps it in a list, and puts each line into its own
   858     * list item.
   859     *
   860     * @param {Node} node modified in place.  Its content is pulled into an
   861     *     HTMLOListElement, and each line is moved into a separate list item.
   862     *     This requires cloning elements, so the input might not have unique
   863     *     IDs after numbering.
   864     */
   865    function numberLines(node, opt_startLineNum) {
   866      var nocode = /(?:^|\s)nocode(?:\s|$)/;
   867      var lineBreak = /\r\n?|\n/;
   868    
   869      var document = node.ownerDocument;
   870    
   871      var whitespace;
   872      if (node.currentStyle) {
   873        whitespace = node.currentStyle.whiteSpace;
   874      } else if (window.getComputedStyle) {
   875        whitespace = document.defaultView.getComputedStyle(node, null)
   876            .getPropertyValue('white-space');
   877      }
   878      // If it's preformatted, then we need to split lines on line breaks
   879      // in addition to <BR>s.
   880      var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3);
   881    
   882      var li = document.createElement('LI');
   883      while (node.firstChild) {
   884        li.appendChild(node.firstChild);
   885      }
   886      // An array of lines.  We split below, so this is initialized to one
   887      // un-split line.
   888      var listItems = [li];
   889    
   890      function walk(node) {
   891        switch (node.nodeType) {
   892          case 1:  // Element
   893            if (nocode.test(node.className)) { break; }
   894            if ('BR' === node.nodeName) {
   895              breakAfter(node);
   896              // Discard the <BR> since it is now flush against a </LI>.
   897              if (node.parentNode) {
   898                node.parentNode.removeChild(node);
   899              }
   900            } else {
   901              for (var child = node.firstChild; child; child = child.nextSibling) {
   902                walk(child);
   903              }
   904            }
   905            break;
   906          case 3: case 4:  // Text
   907            if (isPreformatted) {
   908              var text = node.nodeValue;
   909              var match = text.match(lineBreak);
   910              if (match) {
   911                var firstLine = text.substring(0, match.index);
   912                node.nodeValue = firstLine;
   913                var tail = text.substring(match.index + match[0].length);
   914                if (tail) {
   915                  var parent = node.parentNode;
   916                  parent.insertBefore(
   917                      document.createTextNode(tail), node.nextSibling);
   918                }
   919                breakAfter(node);
   920                if (!firstLine) {
   921                  // Don't leave blank text nodes in the DOM.
   922                  node.parentNode.removeChild(node);
   923                }
   924              }
   925            }
   926            break;
   927        }
   928      }
   929    
   930      // Split a line after the given node.
   931      function breakAfter(lineEndNode) {
   932        // If there's nothing to the right, then we can skip ending the line
   933        // here, and move root-wards since splitting just before an end-tag
   934        // would require us to create a bunch of empty copies.
   935        while (!lineEndNode.nextSibling) {
   936          lineEndNode = lineEndNode.parentNode;
   937          if (!lineEndNode) { return; }
   938        }
   939    
   940        function breakLeftOf(limit, copy) {
   941          // Clone shallowly if this node needs to be on both sides of the break.
   942          var rightSide = copy ? limit.cloneNode(false) : limit;
   943          var parent = limit.parentNode;
   944          if (parent) {
   945            // We clone the parent chain.
   946            // This helps us resurrect important styling elements that cross lines.
   947            // E.g. in <i>Foo<br>Bar</i>
   948            // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>.
   949            var parentClone = breakLeftOf(parent, 1);
   950            // Move the clone and everything to the right of the original
   951            // onto the cloned parent.
   952            var next = limit.nextSibling;
   953            parentClone.appendChild(rightSide);
   954            for (var sibling = next; sibling; sibling = next) {
   955              next = sibling.nextSibling;
   956              parentClone.appendChild(sibling);
   957            }
   958          }
   959          return rightSide;
   960        }
   961    
   962        var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0);
   963    
   964        // Walk the parent chain until we reach an unattached LI.
   965        for (var parent;
   966             // Check nodeType since IE invents document fragments.
   967             (parent = copiedListItem.parentNode) && parent.nodeType === 1;) {
   968          copiedListItem = parent;
   969        }
   970        // Put it on the list of lines for later processing.
   971        listItems.push(copiedListItem);
   972      }
   973    
   974      // Split lines while there are lines left to split.
   975      for (var i = 0;  // Number of lines that have been split so far.
   976           i < listItems.length;  // length updated by breakAfter calls.
   977           ++i) {
   978        walk(listItems[i]);
   979      }
   980    
   981      // Make sure numeric indices show correctly.
   982      if (opt_startLineNum === (opt_startLineNum|0)) {
   983        listItems[0].setAttribute('value', opt_startLineNum);
   984      }
   985    
   986      var ol = document.createElement('OL');
   987      ol.className = 'linenums';
   988      var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0) || 0;
   989      for (var i = 0, n = listItems.length; i < n; ++i) {
   990        li = listItems[i];
   991        // Stick a class on the LIs so that stylesheets can
   992        // color odd/even rows, or any other row pattern that
   993        // is co-prime with 10.
   994        li.className = 'L' + ((i + offset) % 10);
   995        if (!li.firstChild) {
   996          li.appendChild(document.createTextNode('\xA0'));
   997        }
   998        ol.appendChild(li);
   999      }
  1000    
  1001      node.appendChild(ol);
  1002    }
  1003  
  1004    /**
  1005     * Breaks {@code job.source} around style boundaries in {@code job.decorations}
  1006     * and modifies {@code job.sourceNode} in place.
  1007     * @param {Object} job like <pre>{
  1008     *    source: {string} source as plain text,
  1009     *    spans: {Array.<number|Node>} alternating span start indices into source
  1010     *       and the text node or element (e.g. {@code <BR>}) corresponding to that
  1011     *       span.
  1012     *    decorations: {Array.<number|string} an array of style classes preceded
  1013     *       by the position at which they start in job.source in order
  1014     * }</pre>
  1015     * @private
  1016     */
  1017    function recombineTagsAndDecorations(job) {
  1018      var isIE = /\bMSIE\b/.test(navigator.userAgent);
  1019      var newlineRe = /\n/g;
  1020    
  1021      var source = job.source;
  1022      var sourceLength = source.length;
  1023      // Index into source after the last code-unit recombined.
  1024      var sourceIndex = 0;
  1025    
  1026      var spans = job.spans;
  1027      var nSpans = spans.length;
  1028      // Index into spans after the last span which ends at or before sourceIndex.
  1029      var spanIndex = 0;
  1030    
  1031      var decorations = job.decorations;
  1032      var nDecorations = decorations.length;
  1033      // Index into decorations after the last decoration which ends at or before sourceIndex.
  1034      var decorationIndex = 0;
  1035    
  1036      // Simplify decorations.
  1037      var decPos = 0;
  1038      for (var i = 0; i < nDecorations;) {
  1039        // Skip over any zero-length decorations.
  1040        var startPos = decorations[i];
  1041        var start = i;
  1042        while (start + 2 < nDecorations && decorations[start + 2] === startPos) {
  1043          start += 2;
  1044        }
  1045        // Conflate all adjacent decorations that use the same style.
  1046        var startDec = decorations[start + 1];
  1047        var end = start + 2;
  1048        while (end + 2 <= nDecorations
  1049               && (decorations[end + 1] === startDec
  1050                   || decorations[end] === decorations[end + 2])) {
  1051          end += 2;
  1052        }
  1053        decorations[decPos++] = startPos;
  1054        decorations[decPos++] = startDec;
  1055        i = end;
  1056      }
  1057    
  1058      // Strip any zero-length decoration at the end.
  1059      if (decPos && decorations[decPos - 2] === sourceLength) { decPos -= 2; }
  1060      nDecorations = decorations.length = decPos;
  1061    
  1062      var decoration = null;
  1063      while (spanIndex < nSpans) {
  1064        var spanStart = spans[spanIndex];
  1065        var spanEnd = spans[spanIndex + 2] || sourceLength;
  1066    
  1067        var decStart = decorations[decorationIndex];
  1068        var decEnd = decorations[decorationIndex + 2] || sourceLength;
  1069    
  1070        var end = Math.min(spanEnd, decEnd);
  1071    
  1072        var textNode = spans[spanIndex + 1];
  1073        if (textNode.nodeType !== 1) {  // Don't muck with <BR>s or <LI>s
  1074          var styledText = source.substring(sourceIndex, end);
  1075          // This may seem bizarre, and it is.  Emitting LF on IE causes the
  1076          // code to display with spaces instead of line breaks.
  1077          // Emitting Windows standard issue linebreaks (CRLF) causes a blank
  1078          // space to appear at the beginning of every line but the first.
  1079          // Emitting an old Mac OS 9 line separator makes everything spiffy.
  1080          if (isIE) { styledText = styledText.replace(newlineRe, '\r'); }
  1081          textNode.nodeValue = styledText;
  1082          var document = textNode.ownerDocument;
  1083          var span = document.createElement('SPAN');
  1084          span.className = decorations[decorationIndex + 1];
  1085          var parentNode = textNode.parentNode;
  1086          parentNode.replaceChild(span, textNode);
  1087          span.appendChild(textNode);
  1088          if (sourceIndex < spanEnd) {  // Split off a text node.
  1089            spans[spanIndex + 1] = textNode
  1090                // TODO: Possibly optimize by using '' if there's no flicker.
  1091                = document.createTextNode(source.substring(end, spanEnd));
  1092            parentNode.insertBefore(textNode, span.nextSibling);
  1093          }
  1094        }
  1095    
  1096        sourceIndex = end;
  1097    
  1098        if (sourceIndex >= spanEnd) {
  1099          spanIndex += 2;
  1100        }
  1101        if (sourceIndex >= decEnd) {
  1102          decorationIndex += 2;
  1103        }
  1104      }
  1105    }
  1106  
  1107  
  1108    /** Maps language-specific file extensions to handlers. */
  1109    var langHandlerRegistry = {};
  1110    /** Register a language handler for the given file extensions.
  1111      * @param {function (Object)} handler a function from source code to a list
  1112      *      of decorations.  Takes a single argument job which describes the
  1113      *      state of the computation.   The single parameter has the form
  1114      *      {@code {
  1115      *        source: {string} as plain text.
  1116      *        decorations: {Array.<number|string>} an array of style classes
  1117      *                     preceded by the position at which they start in
  1118      *                     job.source in order.
  1119      *                     The language handler should assigned this field.
  1120      *        basePos: {int} the position of source in the larger source chunk.
  1121      *                 All positions in the output decorations array are relative
  1122      *                 to the larger source chunk.
  1123      *      } }
  1124      * @param {Array.<string>} fileExtensions
  1125      */
  1126    function registerLangHandler(handler, fileExtensions) {
  1127      for (var i = fileExtensions.length; --i >= 0;) {
  1128        var ext = fileExtensions[i];
  1129        if (!langHandlerRegistry.hasOwnProperty(ext)) {
  1130          langHandlerRegistry[ext] = handler;
  1131        } else if ('console' in window) {
  1132          console['warn']('cannot override language handler %s', ext);
  1133        }
  1134      }
  1135    }
  1136    function langHandlerForExtension(extension, source) {
  1137      if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) {
  1138        // Treat it as markup if the first non whitespace character is a < and
  1139        // the last non-whitespace character is a >.
  1140        extension = /^\s*</.test(source)
  1141            ? 'default-markup'
  1142            : 'default-code';
  1143      }
  1144      return langHandlerRegistry[extension];
  1145    }
  1146    registerLangHandler(decorateSource, ['default-code']);
  1147    registerLangHandler(
  1148        createSimpleLexer(
  1149            [],
  1150            [
  1151             [PR_PLAIN,       /^[^<?]+/],
  1152             [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/],
  1153             [PR_COMMENT,     /^<\!--[\s\S]*?(?:-\->|$)/],
  1154             // Unescaped content in an unknown language
  1155             ['lang-',        /^<\?([\s\S]+?)(?:\?>|$)/],
  1156             ['lang-',        /^<%([\s\S]+?)(?:%>|$)/],
  1157             [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/],
  1158             ['lang-',        /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i],
  1159             // Unescaped content in javascript.  (Or possibly vbscript).
  1160             ['lang-js',      /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i],
  1161             // Contains unescaped stylesheet content
  1162             ['lang-css',     /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i],
  1163             ['lang-in.tag',  /^(<\/?[a-z][^<>]*>)/i]
  1164            ]),
  1165        ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']);
  1166    registerLangHandler(
  1167        createSimpleLexer(
  1168            [
  1169             [PR_PLAIN,        /^[\s]+/, null, ' \t\r\n'],
  1170             [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\'']
  1171             ],
  1172            [
  1173             [PR_TAG,          /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i],
  1174             [PR_ATTRIB_NAME,  /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],
  1175             ['lang-uq.val',   /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/],
  1176             [PR_PUNCTUATION,  /^[=<>\/]+/],
  1177             ['lang-js',       /^on\w+\s*=\s*\"([^\"]+)\"/i],
  1178             ['lang-js',       /^on\w+\s*=\s*\'([^\']+)\'/i],
  1179             ['lang-js',       /^on\w+\s*=\s*([^\"\'>\s]+)/i],
  1180             ['lang-css',      /^style\s*=\s*\"([^\"]+)\"/i],
  1181             ['lang-css',      /^style\s*=\s*\'([^\']+)\'/i],
  1182             ['lang-css',      /^style\s*=\s*([^\"\'>\s]+)/i]
  1183             ]),
  1184        ['in.tag']);
  1185    registerLangHandler(
  1186        createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']);
  1187    registerLangHandler(sourceDecorator({
  1188            'keywords': CPP_KEYWORDS,
  1189            'hashComments': true,
  1190            'cStyleComments': true
  1191          }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']);
  1192    registerLangHandler(sourceDecorator({
  1193            'keywords': 'null true false'
  1194          }), ['json']);
  1195    registerLangHandler(sourceDecorator({
  1196            'keywords': CSHARP_KEYWORDS,
  1197            'hashComments': true,
  1198            'cStyleComments': true,
  1199            'verbatimStrings': true
  1200          }), ['cs']);
  1201    registerLangHandler(sourceDecorator({
  1202            'keywords': JAVA_KEYWORDS,
  1203            'cStyleComments': true
  1204          }), ['java']);
  1205    registerLangHandler(sourceDecorator({
  1206            'keywords': SH_KEYWORDS,
  1207            'hashComments': true,
  1208            'multiLineStrings': true
  1209          }), ['bsh', 'csh', 'sh']);
  1210    registerLangHandler(sourceDecorator({
  1211            'keywords': PYTHON_KEYWORDS,
  1212            'hashComments': true,
  1213            'multiLineStrings': true,
  1214            'tripleQuotedStrings': true
  1215          }), ['cv', 'py']);
  1216    registerLangHandler(sourceDecorator({
  1217            'keywords': PERL_KEYWORDS,
  1218            'hashComments': true,
  1219            'multiLineStrings': true,
  1220            'regexLiterals': true
  1221          }), ['perl', 'pl', 'pm']);
  1222    registerLangHandler(sourceDecorator({
  1223            'keywords': RUBY_KEYWORDS,
  1224            'hashComments': true,
  1225            'multiLineStrings': true,
  1226            'regexLiterals': true
  1227          }), ['rb']);
  1228    registerLangHandler(sourceDecorator({
  1229            'keywords': JSCRIPT_KEYWORDS,
  1230            'cStyleComments': true,
  1231            'regexLiterals': true
  1232          }), ['js']);
  1233    registerLangHandler(sourceDecorator({
  1234            'keywords': COFFEE_KEYWORDS,
  1235            'hashComments': 3,  // ### style block comments
  1236            'cStyleComments': true,
  1237            'multilineStrings': true,
  1238            'tripleQuotedStrings': true,
  1239            'regexLiterals': true
  1240          }), ['coffee']);
  1241    registerLangHandler(createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']);
  1242  
  1243    function applyDecorator(job) {
  1244      var opt_langExtension = job.langExtension;
  1245  
  1246      try {
  1247        // Extract tags, and convert the source code to plain text.
  1248        var sourceAndSpans = extractSourceSpans(job.sourceNode);
  1249        /** Plain text. @type {string} */
  1250        var source = sourceAndSpans.source;
  1251        job.source = source;
  1252        job.spans = sourceAndSpans.spans;
  1253        job.basePos = 0;
  1254  
  1255        // Apply the appropriate language handler
  1256        langHandlerForExtension(opt_langExtension, source)(job);
  1257  
  1258        // Integrate the decorations and tags back into the source code,
  1259        // modifying the sourceNode in place.
  1260        recombineTagsAndDecorations(job);
  1261      } catch (e) {
  1262        if ('console' in window) {
  1263          console['log'](e && e['stack'] ? e['stack'] : e);
  1264        }
  1265      }
  1266    }
  1267  
  1268    /**
  1269     * @param sourceCodeHtml {string} The HTML to pretty print.
  1270     * @param opt_langExtension {string} The language name to use.
  1271     *     Typically, a filename extension like 'cpp' or 'java'.
  1272     * @param opt_numberLines {number|boolean} True to number lines,
  1273     *     or the 1-indexed number of the first line in sourceCodeHtml.
  1274     */
  1275    function prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) {
  1276      var container = document.createElement('PRE');
  1277      // This could cause images to load and onload listeners to fire.
  1278      // E.g. <img onerror="alert(1337)" src="nosuchimage.png">.
  1279      // We assume that the inner HTML is from a trusted source.
  1280      container.innerHTML = sourceCodeHtml;
  1281      if (opt_numberLines) {
  1282        numberLines(container, opt_numberLines);
  1283      }
  1284  
  1285      var job = {
  1286        langExtension: opt_langExtension,
  1287        numberLines: opt_numberLines,
  1288        sourceNode: container
  1289      };
  1290      applyDecorator(job);
  1291      return container.innerHTML;
  1292    }
  1293  
  1294    function prettyPrint(opt_whenDone) {
  1295      function byTagName(tn) { return document.getElementsByTagName(tn); }
  1296      // fetch a list of nodes to rewrite
  1297      var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')];
  1298      var elements = [];
  1299      for (var i = 0; i < codeSegments.length; ++i) {
  1300        for (var j = 0, n = codeSegments[i].length; j < n; ++j) {
  1301          elements.push(codeSegments[i][j]);
  1302        }
  1303      }
  1304      codeSegments = null;
  1305  
  1306      var clock = Date;
  1307      if (!clock['now']) {
  1308        clock = { 'now': function () { return (new Date).getTime(); } };
  1309      }
  1310  
  1311      // The loop is broken into a series of continuations to make sure that we
  1312      // don't make the browser unresponsive when rewriting a large page.
  1313      var k = 0;
  1314      var prettyPrintingJob;
  1315  
  1316      function doWork() {
  1317        var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ?
  1318                       clock.now() + 250 /* ms */ :
  1319                       Infinity);
  1320        for (; k < elements.length && clock.now() < endTime; k++) {
  1321          var cs = elements[k];
  1322          if (cs.className && cs.className.indexOf('prettyprint') >= 0) {
  1323            // If the classes includes a language extensions, use it.
  1324            // Language extensions can be specified like
  1325            //     <pre class="prettyprint lang-cpp">
  1326            // the language extension "cpp" is used to find a language handler as
  1327            // passed to PR.registerLangHandler.
  1328            var langExtension = cs.className.match(/\blang-(\w+)\b/);
  1329            if (langExtension) { langExtension = langExtension[1]; }
  1330  
  1331            // make sure this is not nested in an already prettified element
  1332            var nested = false;
  1333            for (var p = cs.parentNode; p; p = p.parentNode) {
  1334              if ((p.tagName === 'pre' || p.tagName === 'code' ||
  1335                   p.tagName === 'xmp') &&
  1336                  p.className && p.className.indexOf('prettyprint') >= 0) {
  1337                nested = true;
  1338                break;
  1339              }
  1340            }
  1341            if (!nested) {
  1342              // Look for a class like linenums or linenums:<n> where <n> is the
  1343              // 1-indexed number of the first line.
  1344              var lineNums = cs.className.match(/\blinenums\b(?::(\d+))?/);
  1345              lineNums = lineNums
  1346                    ? lineNums[1] && lineNums[1].length ? +lineNums[1] : true
  1347                    : false;
  1348              if (lineNums) { numberLines(cs, lineNums); }
  1349  
  1350              // do the pretty printing
  1351              prettyPrintingJob = {
  1352                langExtension: langExtension,
  1353                sourceNode: cs,
  1354                numberLines: lineNums
  1355              };
  1356              applyDecorator(prettyPrintingJob);
  1357            }
  1358          }
  1359        }
  1360        if (k < elements.length) {
  1361          // finish up in a continuation
  1362          setTimeout(doWork, 250);
  1363        } else if (opt_whenDone) {
  1364          opt_whenDone();
  1365        }
  1366      }
  1367  
  1368      doWork();
  1369    }
  1370  
  1371    window['prettyPrintOne'] = prettyPrintOne;
  1372    window['prettyPrint'] = prettyPrint;
  1373    window['PR'] = {
  1374          'createSimpleLexer': createSimpleLexer,
  1375          'registerLangHandler': registerLangHandler,
  1376          'sourceDecorator': sourceDecorator,
  1377          'PR_ATTRIB_NAME': PR_ATTRIB_NAME,
  1378          'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,
  1379          'PR_COMMENT': PR_COMMENT,
  1380          'PR_DECLARATION': PR_DECLARATION,
  1381          'PR_KEYWORD': PR_KEYWORD,
  1382          'PR_LITERAL': PR_LITERAL,
  1383          'PR_NOCODE': PR_NOCODE,
  1384          'PR_PLAIN': PR_PLAIN,
  1385          'PR_PUNCTUATION': PR_PUNCTUATION,
  1386          'PR_SOURCE': PR_SOURCE,
  1387          'PR_STRING': PR_STRING,
  1388          'PR_TAG': PR_TAG,
  1389          'PR_TYPE': PR_TYPE
  1390        };
  1391  })();