github.com/elliott5/community@v0.14.1-0.20160709191136-823126fb026a/documize/api/convert/html/html_test.go (about) 1 // Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved. 2 // 3 // This software (Documize Community Edition) is licensed under 4 // GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html 5 // 6 // You can operate outside the AGPL restrictions by purchasing 7 // Documize Enterprise Edition and obtaining a commercial license 8 // by contacting <sales@documize.com>. 9 // 10 // https://documize.com 11 12 package html_test 13 14 import ( 15 "strings" 16 "testing" 17 ) 18 import "github.com/documize/community/wordsmith/api" 19 import "github.com/documize/community/documize/api/convert/html" 20 21 const b string = ` 22 <h1>Markdown: Basics</h1> 23 24 <ul id="ProjectSubmenu"> 25 <li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li> 26 <li><a class="selected" title="Markdown Basics">Basics</a></li> 27 <li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li> 28 <li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li> 29 <li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li> 30 </ul> 31 32 <h2>Getting the Gist of Markdown's Formatting Syntax</h2> 33 34 <p>This page offers a brief overview of what it's like to use Markdown. 35 The <a href="/projects/markdown/syntax" title="Markdown Syntax">syntax page</a> provides complete, detailed documentation for 36 every feature, but Markdown should be very easy to pick up simply by 37 looking at a few examples of it in action. The examples on this page 38 are written in a before/after style, showing example syntax and the 39 HTML output produced by Markdown.</p> 40 41 <p>It's also helpful to simply try Markdown out; the <a href="/projects/markdown/dingus" title="Markdown Dingus">Dingus</a> is a 42 web application that allows you type your own Markdown-formatted text 43 and translate it to XHTML.</p> 44 45 <p><strong>Note:</strong> This document is itself written using Markdown; you 46 can <a href="/projects/markdown/basics.text">see the source for it by adding '.text' to the URL</a>.</p> 47 48 <h2>Paragraphs, Headers, Blockquotes</h2> 49 50 <p>A paragraph is simply one or more consecutive lines of text, separated 51 by one or more blank lines. (A blank line is any line that looks like a 52 blank line -- a line containing nothing spaces or tabs is considered 53 blank.) Normal paragraphs should not be intended with spaces or tabs.</p> 54 55 <p>Markdown offers two styles of headers: <em>Setext</em> and <em>atx</em>. 56 Setext-style headers for <code><h1></code> and <code><h2></code> are created by 57 "underlining" with equal signs (<code>=</code>) and hyphens (<code>-</code>), respectively. 58 To create an atx-style header, you put 1-6 hash marks (<code>#</code>) at the 59 beginning of the line -- the number of hashes equals the resulting 60 HTML header level.</p> 61 62 <p>Blockquotes are indicated using email-style '<code>></code>' angle brackets.</p> 63 64 <p>Markdown:</p> 65 66 <pre><code>A First Level Header 67 ==================== 68 69 A Second Level Header 70 --------------------- 71 72 Now is the time for all good men to come to 73 the aid of their country. This is just a 74 regular paragraph. 75 76 The quick brown fox jumped over the lazy 77 dog's back. 78 79 ### Header 3 80 81 > This is a blockquote. 82 > 83 > This is the second paragraph in the blockquote. 84 > 85 > ## This is an H2 in a blockquote 86 </code></pre> 87 88 <p>Output:</p> 89 90 <pre><code><h1>A First Level Header</h1> 91 92 <h2>A Second Level Header</h2> 93 94 <p>Now is the time for all good men to come to 95 the aid of their country. This is just a 96 regular paragraph.</p> 97 98 <p>The quick brown fox jumped over the lazy 99 dog's back.</p> 100 101 <h3>Header 3</h3> 102 103 <blockquote> 104 <p>This is a blockquote.</p> 105 106 <p>This is the second paragraph in the blockquote.</p> 107 108 <h2>This is an H2 in a blockquote</h2> 109 </blockquote> 110 </code></pre> 111 112 <h3>Phrase Emphasis</h3> 113 114 <p>Markdown uses asterisks and underscores to indicate spans of emphasis.</p> 115 116 <p>Markdown:</p> 117 118 <pre><code>Some of these words *are emphasized*. 119 Some of these words _are emphasized also_. 120 121 Use two asterisks for **strong emphasis**. 122 Or, if you prefer, __use two underscores instead__. 123 </code></pre> 124 125 <p>Output:</p> 126 127 <pre><code><p>Some of these words <em>are emphasized</em>. 128 Some of these words <em>are emphasized also</em>.</p> 129 130 <p>Use two asterisks for <strong>strong emphasis</strong>. 131 Or, if you prefer, <strong>use two underscores instead</strong>.</p> 132 </code></pre> 133 134 <h2>Lists</h2> 135 136 <p>Unordered (bulleted) lists use asterisks, pluses, and hyphens (<code>*</code>, 137 <code>+</code>, and <code>-</code>) as list markers. These three markers are 138 interchangable; this:</p> 139 140 <pre><code>* Candy. 141 * Gum. 142 * Booze. 143 </code></pre> 144 145 <p>this:</p> 146 147 <pre><code>+ Candy. 148 + Gum. 149 + Booze. 150 </code></pre> 151 152 <p>and this:</p> 153 154 <pre><code>- Candy. 155 - Gum. 156 - Booze. 157 </code></pre> 158 159 <p>all produce the same output:</p> 160 161 <pre><code><ul> 162 <li>Candy.</li> 163 <li>Gum.</li> 164 <li>Booze.</li> 165 </ul> 166 </code></pre> 167 168 <p>Ordered (numbered) lists use regular numbers, followed by periods, as 169 list markers:</p> 170 171 <pre><code>1. Red 172 2. Green 173 3. Blue 174 </code></pre> 175 176 <p>Output:</p> 177 178 <pre><code><ol> 179 <li>Red</li> 180 <li>Green</li> 181 <li>Blue</li> 182 </ol> 183 </code></pre> 184 185 <p>If you put blank lines between items, you'll get <code><p></code> tags for the 186 list item text. You can create multi-paragraph list items by indenting 187 the paragraphs by 4 spaces or 1 tab:</p> 188 189 <pre><code>* A list item. 190 191 With multiple paragraphs. 192 193 * Another item in the list. 194 </code></pre> 195 196 <p>Output:</p> 197 198 <pre><code><ul> 199 <li><p>A list item.</p> 200 <p>With multiple paragraphs.</p></li> 201 <li><p>Another item in the list.</p></li> 202 </ul> 203 </code></pre> 204 205 <h3>Links</h3> 206 207 <p>Markdown supports two styles for creating links: <em>inline</em> and 208 <em>reference</em>. With both styles, you use square brackets to delimit the 209 text you want to turn into a link.</p> 210 211 <p>Inline-style links use parentheses immediately after the link text. 212 For example:</p> 213 214 <pre><code>This is an [example link](http://example.com/). 215 </code></pre> 216 217 <p>Output:</p> 218 219 <pre><code><p>This is an <a href="http://example.com/"> 220 example link</a>.</p> 221 </code></pre> 222 223 <p>Optionally, you may include a title attribute in the parentheses:</p> 224 225 <pre><code>This is an [example link](http://example.com/ "With a Title"). 226 </code></pre> 227 228 <p>Output:</p> 229 230 <pre><code><p>This is an <a href="http://example.com/" title="With a Title"> 231 example link</a>.</p> 232 </code></pre> 233 234 <p>Reference-style links allow you to refer to your links by names, which 235 you define elsewhere in your document:</p> 236 237 <pre><code>I get 10 times more traffic from [Google][1] than from 238 [Yahoo][2] or [MSN][3]. 239 240 [1]: http://google.com/ "Google" 241 [2]: http://search.yahoo.com/ "Yahoo Search" 242 [3]: http://search.msn.com/ "MSN Search" 243 </code></pre> 244 245 <p>Output:</p> 246 247 <pre><code><p>I get 10 times more traffic from <a href="http://google.com/" 248 title="Google">Google</a> than from <a href="http://search.yahoo.com/" 249 title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/" 250 title="MSN Search">MSN</a>.</p> 251 </code></pre> 252 253 <p>The title attribute is optional. Link names may contain letters, 254 numbers and spaces, but are <em>not</em> case sensitive:</p> 255 256 <pre><code>I start my morning with a cup of coffee and 257 [The New York Times][NY Times]. 258 259 [ny times]: http://www.nytimes.com/ 260 </code></pre> 261 262 <p>Output:</p> 263 264 <pre><code><p>I start my morning with a cup of coffee and 265 <a href="http://www.nytimes.com/">The New York Times</a>.</p> 266 </code></pre> 267 268 <h3>Images</h3> 269 270 <p>Image syntax is very much like link syntax.</p> 271 272 <p>Inline (titles are optional):</p> 273 274 <pre><code> 275 </code></pre> 276 277 <p>Reference-style:</p> 278 279 <pre><code>![alt text][id] 280 281 [id]: /path/to/img.jpg "Title" 282 </code></pre> 283 284 <p>Both of the above examples produce the same output:</p> 285 286 <pre><code><img src="/path/to/img.jpg" alt="alt text" title="Title" /> 287 </code></pre> 288 289 <h3>Code</h3> 290 291 <p>In a regular paragraph, you can create code span by wrapping text in 292 backtick quotes. Any ampersands (<code>&</code>) and angle brackets (<code><</code> or 293 <code>></code>) will automatically be translated into HTML entities. This makes 294 it easy to use Markdown to write about HTML example code:</p> 295 296 <pre><code>I strongly recommend against using any "<blink>" tags. 297 298 I wish SmartyPants used named entities like "&mdash;"" 299 instead of decimal-encoded entites like "&#8212;". 300 </code></pre> 301 302 <p>Output:</p> 303 304 <pre><code><p>I strongly recommend against using any 305 <code>&lt;blink&gt;</code> tags.</p> 306 307 <p>I wish SmartyPants used named entities like 308 <code>&amp;mdash;</code> instead of decimal-encoded 309 entites like <code>&amp;#8212;</code>.</p> 310 </code></pre> 311 312 <p>To specify an entire block of pre-formatted code, indent every line of 313 the block by 4 spaces or 1 tab. Just like with code spans, <code>&</code>, <code><</code>, 314 and <code>></code> characters will be escaped automatically.</p> 315 316 <p>Markdown:</p> 317 318 <pre><code>If you want your page to validate under XHTML 1.0 Strict, 319 you've got to put paragraph tags in your blockquotes: 320 321 <blockquote> 322 <p>For example.</p> 323 </blockquote> 324 </code></pre> 325 326 <p>Output:</p> 327 328 <pre><code><p>If you want your page to validate under XHTML 1.0 Strict, 329 you've got to put paragraph tags in your blockquotes:</p> 330 331 <pre><code>&lt;blockquote&gt; 332 &lt;p&gt;For example.&lt;/p&gt; 333 &lt;/blockquote&gt; 334 </code></pre> 335 </code></pre> 336 337 <h4>Header4</h4> 338 <div><div><div><div><div><div> 339 <h5>Header5</h5>Body 555. 340 </div></div></div></div></div></div> 341 <h6>Header6</h6> 342 343 ` 344 345 func TestHTML(t *testing.T) { 346 347 req := &api.DocumentConversionRequest{} 348 res := &api.DocumentConversionResponse{} 349 350 err := html.SplitIfHTML(req, res) 351 if err != nil || len(res.PagesHTML) != 0 || len(res.Pages) != 0 || len(res.EmbeddedFiles) != 0 { 352 t.Error(err) 353 return 354 } 355 356 titleTooBig := []byte("<h1>") 357 for i := 0; i < 2048; i++ { 358 titleTooBig = append(titleTooBig, []byte("title too long ")...) 359 } 360 titleTooBig = append(titleTooBig, []byte("</h1>")...) 361 req = &api.DocumentConversionRequest{} 362 res = &api.DocumentConversionResponse{PagesHTML: titleTooBig} 363 err = html.SplitIfHTML(req, res) 364 if err != nil || len(res.Pages[0].Title) > 2000 { 365 t.Error(err) 366 return 367 } 368 369 req = &api.DocumentConversionRequest{} 370 res = &api.DocumentConversionResponse{PagesHTML: []byte(b)} 371 err = html.SplitIfHTML(req, res) 372 if err != nil { 373 t.Error(err) 374 return 375 } 376 //for p, pg := range res.Pages { 377 // t.Logf("%d %d %d %s", p, pg.Level, len(pg.Body), pg.Title) 378 //} 379 if !strings.HasPrefix(res.Pages[10].Title, "Header5") || 380 !strings.HasPrefix(string(res.Pages[10].Body), "Body 555.") { 381 t.Errorf("wrong page ten title: `%s` body: `%s`", res.Pages[10].Title, string(res.Pages[10].Body)) 382 } 383 384 }