Text to HTML: Difference between revisions

Text to HTML (view source)

Revision as of 11:51, 14 February 2024

34,889 bytes added , 4 months ago

m

→‎{{header|Wren}}: Minor tidy including usage now of raw string.

PureFox

9,490

edits

Revision as of 10:58, 5 January 2012 (view source) rosettacode>EMBee m (→‎{{header\|Pike}}: capitalize Pike) ← Older edit		Revision as of 11:51, 14 February 2024 (view source) PureFox (talk \| contribs) m (→‎{{header\|Wren}}: Minor tidy including usage now of raw string.) Newer edit →
(17 intermediate revisions by 10 users not shown)
Line 1: {{Draft task\|Text processing}} When developing a Website it is occasionally necessary to handle text that is received without formatting, and present it in a pleasing manner. to achieve this the text needs to be converted to HTML. Line 14 ⟶ 13: The only requirement is to ensure that the result is valid xhtml. =={{header\|Go}}== This isn't very sophisticated but does a few things in a simple-minded way. <syntaxhighlight lang="go">package main import ( "fmt" "html" "regexp" "strings" ) var t = ` Sample Text This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists. * This is a bulleted list with a less than sign (<) * And this is its second line with a greater than sign (>) A 'normal' paragraph between the lists. 1. This is a numbered list with an ampersand (&) 2. "Second line" in double quotes 3. 'Third line' in single quotes That's all folks.` func main() { p := regexp.MustCompile(`\n\s(\n\s)+`) ul := regexp.MustCompile(`^\`) ol := regexp.MustCompile(`^\d\.`) t = html.EscapeString(t) // escape <, >, &, " and ' paras := p.Split(t, -1) // Assume if first character of first paragraph is white-space // then it's probably a document title. firstChar := paras[0][0] title := "Untitled" k := 0 if firstChar == ' ' \|\| firstChar == '\t' { title = strings.TrimSpace(paras[0]) k = 1 } fmt.Println("<html>") fmt.Printf("<head><title>%s</title></head>\n", title) fmt.Println("<body>") blist := false nlist := false for _, para := range paras[k:] { para2 := strings.TrimSpace(para) if ul.MatchString(para2) { if !blist { blist = true fmt.Println("<ul>") } para2 = strings.TrimSpace(para2[1:]) fmt.Printf(" <li>%s</li>\n", para2) continue } else if blist { blist = false fmt.Println("</ul>") } if ol.MatchString(para2) { if !nlist { nlist = true fmt.Println("<ol>") } para2 = strings.TrimSpace(para2[2:]) fmt.Printf(" <li>%s</li>\n", para2) continue } else if nlist { nlist = false fmt.Println("</ol>") } if !blist && !nlist { fmt.Printf("<p>%s</p>\n", para2) } } if blist { fmt.Println("</ul>") } if nlist { fmt.Println("</ol>") } fmt.Println("</body>") fmt.Println("</html>") }</syntaxhighlight> {{out}} <syntaxhighlight lang="html"><html> <head><title>Sample Text</title></head> <body> <p>This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists.</p> <ul> <li>This is a bulleted list with a less than sign (<)</li> <li>And this is its second line with a greater than sign (>)</li> </ul> <p>A 'normal' paragraph between the lists.</p> <ol> <li>This is a numbered list with an ampersand (&)</li> <li>"Second line" in double quotes</li> <li>'Third line' in single quotes</li> </ol> <p>That's all folks.</p> </body> </html></syntaxhighlight> =={{header\|Julia}}== {{trans\|Go}} <syntaxhighlight lang="julia">using HttpCommon, Printf const exampletxt = """ Sample Text This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists. This is a bulleted list with a less than sign (<) * And this is its second line with a greater than sign (>) A 'normal' paragraph between the lists. 1. This is a numbered list with an ampersand (&) 2. "Second line" in double quotes 3. 'Third line' in single quotes That's all folks.""" function txt_to_html(t = exampletxt) p = r"\n\s(\n\s)+" ul = r"^\" ol = r"^\d\." paras = map(p -> escapeHTML(string(p)), split(t, r"[\r\n]+")) # Assume if first character of first paragraph is white-space # then it's probably a document title. firstchar = first(first(paras)) title = "Untitled" k = 1 if firstchar == ' ' \|\| firstchar == '\t' title = strip(paras[1]) k = 2 end println("<html>") @printf("<head><title>%s</title></head>\n", title) println("<body>") blist, nlist = false, false for para in @view paras[k:end] para2 = strip(para) if occursin(ul, para2) if !blist blist = true println("<ul>") end para2 = strip(para2[2:end]) @printf(" <li>%s</li>\n", para2) continue elseif blist blist = false println("</ul>") end if occursin(ol, para2) if !nlist nlist = true println("<ol>") end para2 = strip(para2[3:end]) @printf(" <li>%s</li>\n", para2) continue elseif nlist nlist = false println("</ol>") end if !blist && !nlist @printf("<p>%s</p>\n", para2) end end if blist println("</ul>") end if nlist println("</ol>") end println("</body>") println("</html>") end txt_to_html() </syntaxhighlight>{{out}} <pre> <html> <head><title>Sample Text</title></head> <body> <p>This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists.</p> <ul> <li>This is a bulleted list with a less than sign (<)</li> <li>And this is its second line with a greater than sign (>)</li> </ul> <p>A 'normal' paragraph between the lists.</p> <ol> <li>This is a numbered list with an ampersand (&)</li> <li>"Second line" in double quotes</li> <li>'Third line' in single quotes</li> </ol> <p>That's all folks.</p> </body> </html> </pre> =={{header\|Nim}}== {{trans\|Go}} <syntaxhighlight lang="nim">import re, strutils, xmltree const Text = """ Sample Text This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists. This is a bulleted list with a less than sign (<) * And this is its second line with a greater than sign (>) A 'normal' paragraph between the lists. 1. This is a numbered list with an ampersand (&) 2. "Second line" in double quotes 3. 'Third line' in single quotes That's all folks.""" let p = re"\n\s(\n\s)+" let ul = re"^\" let ol = re"^\d\." let text = xmltree.escape(Text) let paras = text.split(p) # Assume if first character of first paragraph is white-space # then it's probably a document title. let firstChar = paras[0][0] var titleString = "untitled" var start = 0 if firstChar.isSpaceAscii: titleString = paras[0].strip() start = 1 echo "<html>" echo "<head><title>", titleString, "</title></body>" echo "<body>" var blist, nlist = false for ipara in start..paras.high: var para = paras[ipara].strip() if para.find(ul) >= 0: if not blist: blist = true echo "<ul>" echo " <li>", para[1..^1].strip(), "</li>" continue elif blist: blist = false echo "</ul>" if para.find(ol) >= 0: if not nlist: nlist = true echo "<ol>" echo " <li>", para[2..^1].strip(), "</li>" continue elif nlist: nlist = false echo "</ol>" if not (blist or nlist): echo "<p>", para, "</p>" if blist: echo "</ul>" if nlist: echo "</ol>" echo "</body>" echo "</html>"</syntaxhighlight> {{out}} <pre><html> <head><title>Sample Text</title></body> <body> <p>This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists.</p> <ul> <li>This is a bulleted list with a less than sign (<)</li> <li>And this is its second line with a greater than sign (>)</li> </ul> <p>A 'normal' paragraph between the lists.</p> <ol> <li>This is a numbered list with an ampersand (&)</li> <li>"Second line" in double quotes</li> <li>'Third line' in single quotes</li> </ol> <p>That's all folks.</p> </body> </html></pre> =={{header\|Perl}}== {{trans\|Raku}} <syntaxhighlight lang="perl"># 20201023 added Perl programming solution use strict; use warnings; use Pod::Simple::HTML; # POD example taken from https://juerd.nl/site.plp/perlpodtut my $pod = <<'POD'; =head1 NAME My::Module - An example module =head1 SYNOPSIS use My::Module; my $object = My::Module->new(); print $object->as_string; =head1 DESCRIPTION This module does not really exist, it was made for the sole purpose of demonstrating how POD works. =head2 Methods =over 12 =item C<new> Returns a new My::Module object. =item C<as_string> Returns a stringified representation of the object. This is mainly for debugging purposes. =back =head1 LICENSE This is released under the Artistic License. See L<perlartistic>. =head1 AUTHOR Juerd - L<http://juerd.nl/> =head1 SEE ALSO L<perlpod>, L<perlpodspec> =cut POD my $parser = Pod::Simple::HTML->new(); $parser->output_fh(STDOUT); $parser->parse_string_document($pod)</syntaxhighlight> =={{header\|Phix}}== The best thing to do here is to keep it utterly trivial. <!--<syntaxhighlight lang="phix">(phixonline)--> <span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> <span style="color: #008080;">constant</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">hchars</span><span style="color: #0000FF;">,</span><span style="color: #000000;">hsubs</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">columnize</span><span style="color: #0000FF;">({{</span><span style="color: #008000;">"&"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&amp;"</span><span style="color: #0000FF;">},</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"<"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&lt;"</span><span style="color: #0000FF;">},</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">">"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&gt;"</span><span style="color: #0000FF;">},</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"\""</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&dquo;"</span><span style="color: #0000FF;">},</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"\'"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&squo;"</span><span style="color: #0000FF;">}})</span> <span style="color: #008080;">constant</span> <span style="color: #000000;">fmt</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""" <html> <head><title>%s</title></head> <body> <pre> %s </pre> </body> </html> """</span> <span style="color: #008080;">function</span> <span style="color: #000000;">text_to_html_page</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">title</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">)</span> <span style="color: #000000;">title</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">title</span><span style="color: #0000FF;">,</span><span style="color: #000000;">hchars</span><span style="color: #0000FF;">,</span><span style="color: #000000;">hsubs</span><span style="color: #0000FF;">)</span> <span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">,</span><span style="color: #000000;">hchars</span><span style="color: #0000FF;">,</span><span style="color: #000000;">hsubs</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">return</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fmt</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">title</span><span style="color: #0000FF;">,</span><span style="color: #000000;">text</span><span style="color: #0000FF;">})</span> <span style="color: #000080;font-style:italic;">-- return substitute_all(sprintf(fmt,{title,text}),hchars,hsubs)</span> <span style="color: #008080;">end</span> <span style="color: #008080;">function</span> <span style="color: #008080;">constant</span> <span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""" This is a paragraph a block of code * A one-bullet list > With quoted text > > and code """</span> <span style="color: #7060A8;">puts</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">text_to_html_page</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"my title"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">text</span><span style="color: #0000FF;">))</span> <!--</syntaxhighlight>--> {{out}} The last line of text_to_html() (as commented out) was used to generate the sanitised version of the output, as needed for inclusion on this page. <pre> <html> <head><title>my title</title></head> <body> <pre> This is a paragraph a block of code * A one-bullet list &gt; With quoted text &gt; &gt; and code </pre> </body> </html> </pre> =={{header\|Pike}}== Line 33 ⟶ 474: this implementation is still incomplete. <~~lang~~syntaxhighlight ~~Pike~~lang="pike">// function to calculate the average line length (not used yet below) int linelength(array lines) { Line 150 ⟶ 591: } return root; }</~~lang~~syntaxhighlight> =={{header\|Racket}}== This task seems like it's very under-defined, but the discussion seems to be headed towards a simple markdown specification. I therefore do this with a small interface to [https://github.com/jgm/cmark cmark] to render [http://commonmark.org/ commonmark] text. (Note that this is not some cooked code, it's coming from code that I'm using to render class notes, and hopefully it will be useful to have such an example here. It certainly seems to me as a useful thing compared to some half-baked not-really-markdown-or-anything implementation.) <syntaxhighlight lang="racket"> #lang at-exp racket (require ffi/unsafe ffi/unsafe/define) (define-ffi-definer defcmark (ffi-lib "libcmark")) (define _cmark_opts (_bitmask '(sourcepos = 1 hardbreaks = 2 normalize = 4 smart = 8))) (define-cpointer-type _node) (defcmark cmark_markdown_to_html (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r)))) (define (cmark-markdown-to-html #:options [opts '(normalize smart)] . text) (cmark_markdown_to_html (string->bytes/utf-8 (string-append* text)) opts)) (display @cmark-markdown-to-html{ This is a paragraph a block of code * A one-bullet list > With quoted text > > and code }) </syntaxhighlight> {{out}} <pre> <p>This is a paragraph</p> <pre><code>a block of code </code></pre> <ul> <li>A one-bullet list <blockquote> <p>With quoted text</p> <pre><code>and code </code></pre> </blockquote> </li> </ul> </pre> =={{header\|Raku}}== (formerly Perl 6) {{works with\|Rakudo\|2019.11}} The task specs are essentially non-existent. "Make a best guess at how to render mark-up free text"? Anything that could be trusted at all would either be extremely trivial or insanely complex. And it shows by the the task example writers staying away in droves. Five examples after seven years!? Rather than waste time on that noise, I'll demonstrate POD6 to HTML conversion. POD6 is a simple, text-only mark-up used for Raku documentation. (It's Plain Old Documentation for Raku) It uses pretty simple textual markup and has multiple tools to convert the POD6 document in to many other formats, HTML among them. It is '''not''' markup free, but it is actually usable in production. <syntaxhighlight lang="raku" line>use Pod::To::HTML; use HTML::Escape; my $pod6 = q:to/POD6/; =begin pod A very simple Pod6 document. This is a very high-level, hand-wavey overview. There are I<lots> of other options available. =head1 Section headings =head1 A top level heading =head2 A second level heading =head3 A third level heading =head4 A fourth level heading =head1 Text Ordinary paragraphs do not require an explicit marker or delimiters. Alternatively, there is also an explicit =para marker that can be used to explicitly mark a paragraph. =para This is an ordinary paragraph. Its text will be squeezed and short lines filled. =head1 Code Enclose code in a =code block (or V<C< >> markup for short, inline samples) =begin code my $name = 'Rakudo'; say $name; =end code =head1 Lists =head3 Unordered lists =item Grumpy =item Dopey =item Doc =item Happy =item Bashful =item Sneezy =item Sleepy =head3 Multi-level lists =item1 Animal =item2 Vertebrate =item2 Invertebrate =item1 Phase =item2 Solid =item2 Liquid =item2 Gas =head1 Formatting codes Formatting codes provide a way to add inline mark-up to a piece of text. All Pod6 formatting codes consist of a single capital letter followed immediately by a set of single or double angle brackets; Unicode double angle brackets may be used. Formatting codes may nest other formatting codes. There are many formatting codes available, some of the more common ones: =item1 V<B< >> Bold =item1 V<I< >> Italic =item1 V<U< >> Underline =item1 V<C< >> Code =item1 V<L< >> Hyperlink =item1 V<V< >> Verbatim (Don't interpret anything inside as POD markup) =head1 Tables There is quite extensive markup to allow rendering tables. A simple example: =begin table :caption<Mystery Men> The Shoveller Eddie Stevens King Arthur's singing shovel Blue Raja Geoffrey Smith Master of cutlery Mr Furious Roy Orson Ticking time bomb of fury The Bowler Carol Pinnsler Haunted bowling ball =end table =end pod POD6 # for display on Rosettacode say escape-html render($pod6); # normally #say render($pod6);</syntaxhighlight> {{out\|Returns something like}} <pre><!doctype html> <html lang="en"> <head> <title></title> <meta charset="UTF-8" /> <style> kbd { font-family: "Droid Sans Mono", "Luxi Mono", "Inconsolata", monospace } samp { font-family: "Terminus", "Courier", "Lucida Console", monospace } u { text-decoration: none } .nested { margin-left: 3em; } aside, u { opacity: 0.7 } a[id^="fn-"]:target { background: #ff0 } </style> <link rel="stylesheet" href="//design.raku.org/perl.css"> </head> <body class="pod"> <div id="___top"></div> <nav class="indexgroup"> <table id="TOC"> <caption><h2 id="TOC_Title">Table of Contents</h2></caption> <tr class="toc-level-1"><td class="toc-number">1</td><td class="toc-text"><a href="#Section_headings">Section headings</a></td></tr> <tr class="toc-level-1"><td class="toc-number">2</td><td class="toc-text"><a href="#A_top_level_heading">A top level heading</a></td></tr> <tr class="toc-level-2"><td class="toc-number">2.1</td><td class="toc-text"><a href="#A_second_level_heading">A second level heading</a></td></tr> <tr class="toc-level-3"><td class="toc-number">2.1.1</td><td class="toc-text"><a href="#A_third_level_heading">A third level heading</a></td></tr> <tr class="toc-level-4"><td class="toc-number">2.1.1.1</td><td class="toc-text"><a href="#A_fourth_level_heading">A fourth level heading</a></td></tr> <tr class="toc-level-1"><td class="toc-number">3</td><td class="toc-text"><a href="#Text">Text</a></td></tr> <tr class="toc-level-1"><td class="toc-number">4</td><td class="toc-text"><a href="#Code">Code</a></td></tr> <tr class="toc-level-1"><td class="toc-number">5</td><td class="toc-text"><a href="#Lists">Lists</a></td></tr> <tr class="toc-level-3"><td class="toc-number">5.0.1</td><td class="toc-text"><a href="#Unordered_lists">Unordered lists</a></td></tr> <tr class="toc-level-3"><td class="toc-number">5.0.2</td><td class="toc-text"><a href="#Multi-level_lists">Multi-level lists</a></td></tr> <tr class="toc-level-1"><td class="toc-number">6</td><td class="toc-text"><a href="#Formatting_codes">Formatting codes</a></td></tr> <tr class="toc-level-1"><td class="toc-number">7</td><td class="toc-text"><a href="#Tables">Tables</a></td></tr> </table> </nav> <div class="pod-body"> <p>A very simple Pod6 document.</p> <p>This is a very high-level, hand-wavey overview. There are <em>lots</em> of other options available.</p> <h1 id="Section_headings"><a class="u" href="#___top" title="go to top of document">Section headings</a></h1> <h1 id="A_top_level_heading"><a class="u" href="#___top" title="go to top of document">A top level heading</a></h1> <h2 id="A_second_level_heading"><a class="u" href="#___top" title="go to top of document">A second level heading</a></h2> <h3 id="A_third_level_heading"><a class="u" href="#___top" title="go to top of document">A third level heading</a></h3> <h4 id="A_fourth_level_heading"><a class="u" href="#___top" title="go to top of document">A fourth level heading</a></h4> <h1 id="Text"><a class="u" href="#___top" title="go to top of document">Text</a></h1> <p>Ordinary paragraphs do not require an explicit marker or delimiters.</p> <p>Alternatively, there is also an explicit =para marker that can be used to explicitly mark a paragraph.</p> <p>This is an ordinary paragraph. Its text will be squeezed and short lines filled.</p> <h1 id="Code"><a class="u" href="#___top" title="go to top of document">Code</a></h1> <p>Enclose code in a =code block (or C&lt; &gt; markup for short, inline samples)</p> <pre class="pod-block-code"> my $name = &#39;Rakudo&#39;; say $name; </pre> <h1 id="Lists"><a class="u" href="#___top" title="go to top of document">Lists</a></h1> <h3 id="Unordered_lists"><a class="u" href="#___top" title="go to top of document">Unordered lists</a></h3> <ul><li><p>Grumpy</p> </li> <li><p>Dopey</p> </li> <li><p>Doc</p> </li> <li><p>Happy</p> </li> <li><p>Bashful</p> </li> <li><p>Sneezy</p> </li> <li><p>Sleepy</p> </li> </ul> <h3 id="Multi-level_lists"><a class="u" href="#___top" title="go to top of document">Multi-level lists</a></h3> <ul><li><p>Animal</p> </li> <ul><li><p>Vertebrate</p> </li> <li><p>Invertebrate</p> </li> </ul> <li><p>Phase</p> </li> <ul><li><p>Solid</p> </li> <li><p>Liquid</p> </li> <li><p>Gas</p> </li> </ul> </ul> <h1 id="Formatting_codes"><a class="u" href="#___top" title="go to top of document">Formatting codes</a></h1> <p>Formatting codes provide a way to add inline mark-up to a piece of text.</p> <p>All Pod6 formatting codes consist of a single capital letter followed immediately by a set of single or double angle brackets; Unicode double angle brackets may be used.</p> <p>Formatting codes may nest other formatting codes.</p> <p>There are many formatting codes available, some of the more common ones:</p> <ul><li><p>B&lt; &gt; Bold</p> </li> <li><p>I&lt; &gt; Italic</p> </li> <li><p>U&lt; &gt; Underline</p> </li> <li><p>C&lt; &gt; Code</p> </li> <li><p>L&lt; &gt; Hyperlink</p> </li> <li><p>V&lt; &gt; Verbatim (Don&#39;t interpret anything inside as POD markup)</p> </li> </ul> <h1 id="Tables"><a class="u" href="#___top" title="go to top of document">Tables</a></h1> <p>There is quite extensive markup to allow rendering tables.</p> <p>A simple example:</p> <table class="pod-table"> <caption>Mystery Men</caption> <tbody> <tr> <td>The Shoveller</td> <td>Eddie Stevens</td> <td>King Arthur&#39;s singing shovel</td> </tr> <tr> <td>Blue Raja</td> <td>Geoffrey Smith</td> <td>Master of cutlery</td> </tr> <tr> <td>Mr Furious</td> <td>Roy Orson</td> <td>Ticking time bomb of fury</td> </tr> <tr> <td>The Bowler</td> <td>Carol Pinnsler</td> <td>Haunted bowling ball</td> </tr> </tbody> </table> </div> </body> </html></pre> =={{header\|Tcl}}== This renderer doesn't do all that much. Indeed, it deliberately avoids doing all the complexity that is possible; instead it seeks to just provide the minimum that could possibly be useful to someone who is doing very simple text pages. <syntaxhighlight lang="tcl">package require Tcl 8.5 proc splitParagraphs {text} { split [regsub -all {\n\s(\n\s)+} [string trim $text] \u0000] "\u0000" } proc determineParagraph {para} { set para [regsub -all {\s\n\s} $para " "] switch -regexp -- $para { {^\s\+\s} { return [list ul [string trimleft $para " \t"]] } {^\s\d+\.\s} { set para [string trimleft $para " \t\n0123456789"] set para [string range $para 1 end] return [list ol [string trimleft $para " \t"]] } {^#+\s} { return [list heading [string trimleft $para " \t#"]] } } return [list normal $para] } proc markupParagraphContent {para} { set para [string map {& & < < > >} $para] regsub -all {_([\w&;]+)_} $para {<i>\1</i>} para regsub -all {\([\w&;]+)\} $para {<b>\1</b>} para regsub -all {`([\w&;]+)`} $para {<tt>\1</tt>} para return $para } proc markupText {title text} { set title [string map {& & < < > >} $title] set result "<html>" append result "<head><title>" $title "</title>\n</head>" append result "<body>" "<h1>$title</h1>\n" set state normal foreach para [splitParagraphs $text] { lassign [determineParagraph $para] type para set para [markupParagraphContent $para] switch $state,$type { normal,normal {append result "<p>" $para "</p>\n"} normal,heading { append result "<h2>" $para "</h2>\n" set type normal } normal,ol {append result "<ol>" "<li>" $para "</li>\n"} normal,ul {append result "<ul>" "<li>" $para "</li>\n"} ul,normal {append result "</ul>" "<p>" $para "</p>\n"} ul,heading { append result "</ul>" "<h2>" $para "</h2>\n" set type normal } ul,ol {append result "</ul>" "<ol>" "<li>" $para "</li>\n"} ul,ul {append result "<li>" $para "</li>\n"} ol,normal {append result "</ol>" "<p>" $para "</p>\n"} ol,heading { append result "</ol>" "<h2>" $para "</h2>\n" set type normal } ol,ol {append result "<li>" $para "</li>\n"} ol,ul {append result "</ol>" "<ul>" "<li>" $para "</li>\n"} } set state $type } if {$state ne "normal"} { append result "</$state>" } return [append result "</body></html>"] }</syntaxhighlight> Here's an example of how it would be used. <syntaxhighlight lang="tcl">set sample " This is an example of how a pseudo-markdown-ish formatting scheme could work. It's really much simpler than markdown, but does support a few things. # Block paragraph types * This is a bulleted list * And this is the second item in it 1. Here's a numbered list 2. Second item 3. Third item # Inline formatting types The formatter can render text with _italics_, bold and in a `typewriter` font. It also does the right thing with <angle brackets> and &ersands, but relies on the encoding of the characters to be conveyed separately." puts [markupText "Sample" $sample]</syntaxhighlight> {{out}} <syntaxhighlight lang="html"><html><head><title>Sample</title> </head><body><h1>Sample</h1> <p>This is an example of how a pseudo-markdown-ish formatting scheme could work. It's really much simpler than markdown, but does support a few things.</p> <h2>Block paragraph types</h2> <ul><li>This is a bulleted list</li> <li>And this is the second item in it</li> </ul><ol><li>Here's a numbered list</li> <li>Second item</li> <li>Third item</li> </ol><h2>Inline formatting types</h2> <p>The formatter can render text with <i>italics</i>, <b>bold</b> and in a <tt>typewriter</tt> font. It also does the right thing with <angle brackets> and &amp;ersands, but relies on the encoding of the characters to be conveyed separately.</p> </body></html></syntaxhighlight> =={{header\|Wren}}== {{trans\|Go}} {{libheader\|Wren-pattern}} <syntaxhighlight lang="wren">import "./pattern" for Pattern var t = """ Sample Text This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists. * This is a bulleted list with a less than sign (<) * And this is its second line with a greater than sign (>) A 'normal' paragraph between the lists. 1. This is a numbered list with an ampersand (&) 2. "Second line" in double quotes 3. 'Third line' in single quotes That's all folks.""" // prefer the standard " for escaping a double-quote character rather than Go's " var escapes = [ ["&", "&"], ["<", "<"], [">", ">"], ["\"", """], ["'", "'"] ] for (esc in escapes) t = t.replace(esc[0], esc[1]) var paras = t.split("\n\n") var ol = Pattern.new("/d.", Pattern.start) // Assume if first character of first paragraph is white-space // then it's probably a document title. var firstChar = paras[0][0] var title = "Untitled" var k = 0 if (firstChar == " " \|\| firstChar == "\t") { title = paras[0].trim() k = 1 } System.print("<html>") System.print("<head><title>%(title)</title></head>") System.print("<body>") var blist = false var nlist = false for (para in paras.skip(k)) { var para2 = para.trim() var cont = false if (para2.startsWith("*")) { if (!blist) { blist = true System.print("<ul>") } para2 = para2[1..-1].trim() System.print(" <li>%(para2)</li>") cont = true } else if (blist) { blist = false System.print("</ul>") } if (!cont) { if (ol.isMatch(para2)) { if (!nlist) { nlist = true System.print("<ol>") } para2 = para2[2..-1].trim() System.print(" <li>%(para2)</li>") cont = true } else if (nlist) { nlist = false System.print("</ol>") } if (!cont && !blist && !nlist) System.print("<p>%(para2)</p>") } } if (blist) System.print("</ul>") if (nlist) System.prin("</ol>") System.print("</body>") System.print("</html>") </syntaxhighlight> {{out}} <pre> <html> <head><title>Sample Text</title></head> <body> <p>This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists.</p> <ul> <li>This is a bulleted list with a less than sign (<)</li> <li>And this is its second line with a greater than sign (>)</li> </ul> <p>A 'normal' paragraph between the lists.</p> <ol> <li>This is a numbered list with an ampersand (&)</li> <li>"Second line" in double quotes</li> <li>'Third line' in single quotes</li> </ol> <p>That's all folks.</p> </body> </html> </pre>