Text to HTML: Difference between revisions

Content deleted Content added
Wherrera (talk | contribs)
Kennypete (talk | contribs)
Added Vim Script solution
 
(5 intermediate revisions by 4 users not shown)
Line 16:
=={{header|Go}}==
This isn't very sophisticated but does a few things in a simple-minded way.
<langsyntaxhighlight lang="go">package main
 
import (
Line 106:
fmt.Println("</body>")
fmt.Println("</html>")
}</langsyntaxhighlight>
 
{{out}}
<langsyntaxhighlight lang="html"><html>
<head><title>Sample Text</title></head>
<body>
Line 125:
<p>That&#39;s all folks.</p>
</body>
</html></langsyntaxhighlight>
 
=={{header|Julia}}==
{{trans|Go}}
<langsyntaxhighlight lang="julia">using HttpCommon, Printf
 
const exampletxt = """ Sample Text
Line 212:
 
txt_to_html()
</langsyntaxhighlight>{{out}}
<pre>
<html>
Line 233:
</pre>
 
=={{header|Nim}}==
{{trans|Go}}
<syntaxhighlight lang="nim">import re, strutils, xmltree
 
const Text = """ Sample Text
 
This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists.
 
* This is a bulleted list with a less than sign (<)
 
* And this is its second line with a greater than sign (>)
 
A 'normal' paragraph between the lists.
 
1. This is a numbered list with an ampersand (&)
 
2. "Second line" in double quotes
 
3. 'Third line' in single quotes
 
That's all folks."""
 
 
let p = re"\n\s*(\n\s*)+"
let ul = re"^\*"
let ol = re"^\d\."
let text = xmltree.escape(Text)
let paras = text.split(p)
 
# Assume if first character of first paragraph is white-space
# then it's probably a document title.
let firstChar = paras[0][0]
var titleString = "untitled"
var start = 0
if firstChar.isSpaceAscii:
titleString = paras[0].strip()
start = 1
echo "<html>"
echo "<head><title>", titleString, "</title></body>"
echo "<body>"
 
var blist, nlist = false
for ipara in start..paras.high:
var para = paras[ipara].strip()
 
if para.find(ul) >= 0:
if not blist:
blist = true
echo "<ul>"
echo " <li>", para[1..^1].strip(), "</li>"
continue
elif blist:
blist = false
echo "</ul>"
 
if para.find(ol) >= 0:
if not nlist:
nlist = true
echo "<ol>"
echo " <li>", para[2..^1].strip(), "</li>"
continue
elif nlist:
nlist = false
echo "</ol>"
 
if not (blist or nlist):
echo "<p>", para, "</p>"
 
if blist: echo "</ul>"
if nlist: echo "</ol>"
 
echo "</body>"
echo "</html>"</syntaxhighlight>
 
{{out}}
<pre><html>
<head><title>Sample Text</title></body>
<body>
<p>This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists.</p>
<ul>
<li>This is a bulleted list with a less than sign (&lt;)</li>
<li>And this is its second line with a greater than sign (&gt;)</li>
</ul>
<p>A &apos;normal&apos; paragraph between the lists.</p>
<ol>
<li>This is a numbered list with an ampersand (&amp;)</li>
<li>&quot;Second line&quot; in double quotes</li>
<li>&apos;Third line&apos; in single quotes</li>
</ol>
<p>That&apos;s all folks.</p>
</body>
</html></pre>
 
=={{header|Perl}}==
{{trans|Raku}}
<langsyntaxhighlight Perllang="perl"># 20201023 added Perl programming solution
 
use strict;
Line 295 ⟶ 387:
my $parser = Pod::Simple::HTML->new();
$parser->output_fh(*STDOUT);
$parser->parse_string_document($pod)</langsyntaxhighlight>
 
=={{header|Phix}}==
The best thing to do here is to keep it utterly trivial.
<!--<syntaxhighlight lang="phix">(phixonline)-->
<lang Phix>constant {hchars,hsubs} = columnize({{"&","&amp;"},
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
{"<","&lt;"},
<span style="color: #008080;">constant</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">hchars</span><span style="color: #0000FF;">,</span><span style="color: #000000;">hsubs</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">columnize</span><span style="color: #0000FF;">({{</span><span style="color: #008000;">"&"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&amp;amp;"</span><span style="color: #0000FF;">},</span>
{">","&gt;"},
<span style="color: #0000FF;">{</span><span style="color: #008000;">"&lt;"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&amp;lt;"</span><span style="color: #0000FF;">},</span>
{"\"","&quot;"},
<span style="color: #0000FF;">{</span><span style="color: #008000;">"&gt;"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&amp;gt;"</span><span style="color: #0000FF;">},</span>
{"\'","&apos;"}})
<span style="color: #0000FF;">{</span><span style="color: #008000;">"\""</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&dquo;"</span><span style="color: #0000FF;">},</span>
 
<span style="color: #0000FF;">{</span><span style="color: #008000;">"\'"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"&squo;"</span><span style="color: #0000FF;">}})</span>
constant fmt = """
<html>
<span style="color: #008080;">constant</span> <span style="color: #000000;">fmt</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"""
<head><title>%s</title></head>
&lt;html&gt;
<body>
&lt;head&gt;&lt;title&gt;%s&lt;/title&gt;&lt;/head&gt;
<pre>
&lt;body&gt;
%s
</ &lt;pre>&gt;
%s
</body>
&lt;/pre&gt;
</html>
&lt;/body&gt;
"""
&lt;/html&gt;
 
"""</span>
function text_to_html_page(string title, text)
title = substitute_all(title,hchars,hsubs)
<span style="color: #008080;">function</span> <span style="color: #000000;">text_to_html_page</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">title</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">)</span>
text = substitute_all(text,hchars,hsubs)
<span style="color: #000000;">title</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">title</span><span style="color: #0000FF;">,</span><span style="color: #000000;">hchars</span><span style="color: #0000FF;">,</span><span style="color: #000000;">hsubs</span><span style="color: #0000FF;">)</span>
return sprintf(fmt,{title,text})
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">,</span><span style="color: #000000;">hchars</span><span style="color: #0000FF;">,</span><span style="color: #000000;">hsubs</span><span style="color: #0000FF;">)</span>
-- return substitute_all(sprintf(fmt,{title,text}),hchars,hsubs)
<span style="color: #008080;">return</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fmt</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">title</span><span style="color: #0000FF;">,</span><span style="color: #000000;">text</span><span style="color: #0000FF;">})</span>
end function
<span style="color: #000080;font-style:italic;">-- return substitute_all(sprintf(fmt,{title,text}),hchars,hsubs)</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
constant text = """
This is
<span style="color: #008080;">constant</span> <span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"""
a paragraph
This is
a block ofparagraph
code
a block of
* A one-bullet list code
> With quoted text
* A one-bullet list
>
> &gt; With quoted and codetext
&gt;
"""
&gt; and code
 
"""</span>
puts(1,text_to_html_page("my title",text))</lang>
<span style="color: #7060A8;">puts</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">text_to_html_page</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"my title"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">text</span><span style="color: #0000FF;">))</span>
<!--</syntaxhighlight>-->
{{out}}
The last line of text_to_html() (as commented out) was used to generate the
Line 379 ⟶ 474:
 
this implementation is still incomplete.
<langsyntaxhighlight Pikelang="pike">// function to calculate the average line length (not used yet below)
int linelength(array lines)
{
Line 496 ⟶ 591:
}
return root;
}</langsyntaxhighlight>
 
=={{header|Racket}}==
Line 506 ⟶ 601:
It certainly seems to me as a useful thing compared to some half-baked not-really-markdown-or-anything implementation.)
 
<langsyntaxhighlight lang="racket">
#lang at-exp racket
 
Line 535 ⟶ 630:
> and code
})
</syntaxhighlight>
</lang>
 
{{out}}
Line 563 ⟶ 658:
 
It is '''not''' markup free, but it is actually usable in production.
<syntaxhighlight lang="raku" perl6line>use Pod::To::HTML;
use HTML::Escape;
 
Line 667 ⟶ 762:
 
# normally
#say render($pod6);</langsyntaxhighlight>
 
{{out|Returns something like}}
Line 796 ⟶ 891:
=={{header|Tcl}}==
This renderer doesn't do all that much. Indeed, it deliberately avoids doing all the complexity that is possible; instead it seeks to just provide the minimum that could possibly be useful to someone who is doing very simple text pages.
<langsyntaxhighlight lang="tcl">package require Tcl 8.5
 
proc splitParagraphs {text} {
Line 866 ⟶ 961:
}
return [append result "</body></html>"]
}</langsyntaxhighlight>
Here's an example of how it would be used.
<langsyntaxhighlight lang="tcl">set sample "
This is an example of how a pseudo-markdown-ish formatting scheme could
work. It's really much simpler than markdown, but does support a few things.
Line 890 ⟶ 985:
but relies on the encoding of the characters to be conveyed separately."
 
puts [markupText "Sample" $sample]</langsyntaxhighlight>
{{out}}
<langsyntaxhighlight lang="html"><html><head><title>Sample</title>
</head><body><h1>Sample</h1>
<p>This is an example of how a pseudo-markdown-ish formatting scheme could work. It's really much simpler than markdown, but does support a few things.</p>
Line 903 ⟶ 998:
</ol><h2>Inline formatting types</h2>
<p>The formatter can render text with <i>italics</i>, <b>bold</b> and in a <tt>typewriter</tt> font. It also does the right thing with &lt;angle brackets&gt; and &amp;amp;ersands, but relies on the encoding of the characters to be conveyed separately.</p>
</body></html></langsyntaxhighlight>
 
=={{header|Vim Script}}==
The problem description is quite open-ended, so this example considers the following as criteria for this Vim Script solution:
* The initial line has the title, which will also be treated as heading level 1 and centred.
* Centred lines (i.e., preceded by more than one space) will be treated as heading level 2 and also centred.
* There is no '''markup''' (as you would see in Markdown, Asciidoc, or other light markup languages). However, this excludes...
* Bulleted and numbered lists, which are determined by lines starting with asterisk-space and numeral-period-space respectively (as you would expect in "plain text").
* Tables in the input are identified by text delimited by tab characters (in contiguous lines), with the first line treated as the table's header.
* Since the output is XHTML, (a) The XML declaration, DOCTYPE, and XML namespace should be as per XHTML 1.0 Strict, and (b) XML predefined entities should be used where appropriate, i.e., &amp;amp;, &amp;apos;, &amp;gt;, &amp;lt;, and &amp;quot; but if character references are in the text file already those should be left as-is.
* Hypertext external links will be handled, and their content should be replicated in the main text.
 
'''Input file'''
<pre>
Text to HTML using Vim Script
 
Introduction
 
This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists.
 
Lists
 
A 'normal' paragraph before a list.
 
* This is a bulleted list with a less than sign (<)
 
* And this is its second line with a greater than sign (>)
 
A 'normal' paragraph between the lists.
 
1. This is a numbered list with an ampersand (&), but DO NOT substitute the ampersands within character references like &amp;#x1F606; (😆)
 
2. "Second line" in double quotes, with “smart” quotes
 
3. 'Third line' in single quotes, with ‘smart’ ones too, and
 
4. This, https://rosettacode.org/wiki/Text_to_HTML, is a URI.
 
Tables
 
A normal paragraph before a table, which has been formulated with U+0009 tab characters:
 
Head cell 1 Head cell 2 Head cell 3
Row 2 col 1 Row 2 col 2 Row 2 col 3
Row 3 col 1 Row 3 col 2 Row 3 col 3
Row 4 col 1 Row 4 col 2 Row 4 col 3
 
The HTML output may be checked against https://validator.w3.org/check to validate that it is valid XHTML.
 
Conclusion
 
That's all folks.
</pre>
 
<small>
NB: &amp;#x1F606; in the input file needed to have &amp;amp; added to it to display correctly.</small>
 
'''Vim Script (and running it)'''
The following Vim Script has been written to be run from the command line with:
<pre>vim -c "source Text_to_HTML.vim" Text_to_HTML.xhtml</pre>
where:
* ''Text_to_HTML.xhtml'' is the input file (a copy of the .txt file to convert), above, which will be overwritten by
* ''Text_to_HTML.vim'', the Vim Script, reproduced below.
 
<syntaxhighlight lang="Vim Script">
" Substitute the XML predefined character entities
%s/&\ze\([^A-z#]\)/\&amp;/g
%s/>/\&gt;/g
%s/</\&lt;/g
%s/"/\&quot;/g
%s/'/\&apos;/g
" Substitute URIs: presumes ! $ & ' ( ) * + , ; = : will be %xx escaped
%s/http[s]\?:\/\/[A-z0-9._~:/-]\+\ze[^.:]/<a href="\0">\0<\/a>
" Substitute simple tables, which use tabs (U+0009)
%s/\([^\t]\+\t.\+\n\n\?\)\+/<table>\r\0<\/table>\r/
%s/\([^\t]\+\t.\+\n\n\?\)\+/<thead>\0<\/tbody>/
%s/\(<thead>\)\(.\+\)/\1\r<tr>\2<\/tr>\r<\/thead>\r<tbody>/
%s/^\([^<][^\t]\+\t.\+\)\n\n\?\(<\/tbody>\)/<tr>\1<\/tr>\r\2\r/
%s/^\([^<][^\t]\+\t.\+\)\n\n\?/<tr>\1<\/tr>\r/
%s/<tr>\zs.*\ze<\/tr>/\=substitute(submatch(0), '\t', '<\/td><td>', 'g')/g
%s/<tr>/&<td>/
%s/<\/tr>/<\/td>&/
" Substitute the unordered list items, and temporarily precede them with <!--ulx-->
%s/* \(.\+\)\n\n*/<!--ulx--><li>\1<\/li>\r/
" Substitute the ordered list items, and temporarily precede them with <!--olx-->
%s/\d[.] \(.\+\)\n\n*/<!--olx--><li>\1<\/li>\r/
" Clean up <!--olx--> contiguous lines, wrapping them in <ol>
%s/\(<!--olx--><li>.\+\n\)\+/<ol>\r&<\/ol>\r/
" Clean up <!--ulx--> contiguous lines, wrapping them in <ul>
%s/\(<!--ulx--><li>.\+\n\)\+/<ul>\r&<\/ul>\r/
" Clean up <!--?lx--> - remove the placeholder comment
%s/<!--.lx-->//g
" Add the XML declaration, XHTML strict DOCTYPE, <head> and <title> block (with <script> and CSS for the tables), putting the text within <title>...</title>
1s/\s\+\(.\+\)\n\n\?/<\?xml version="1.0" encoding="UTF-8"\?>\r<!DOCTYPE html PUBLIC "-\/\/W3C\/\/DTD XHTML 1.0 Strict\/\/EN" "http:\/\/www.w3.org\/TR\/xhtml1\/DTD\/xhtml1-strict.dtd">\r<html xmlns="http:\/\/www.w3.org\/1999\/xhtml" xml:lang="en" lang="en">\r<head><title>\1<\/title>\r<style type="text\/css">\rh1, h2 { font-weight: bold; text-align: center; }\rtable, th, td { border: 1px solid black; }\r<\/style>\r<\/head>\r<body>\r<h1>\1<\/h1>\r/
" Substitute paragraphs starting with space+ A-Z and wrap within a <h2>...</h2>
%s/^\s\+\([A-Z].\+\)\n/<h2>\1<\/h2>\r/
" Substitute paragraphs starting with A-Z and wrap within a <p>...</p>
%s/^\([A-Z].\+\)\n/<p>\1<\/p>\r/
" Add the </body> and </html> to the end of the buffer
$s/\n/&<\/body>\r<\/html>/
" Substitute double returns with single returns
%s/\n\n/\r/
" Write the file and quit Vim
wq!
</syntaxhighlight>
 
{{out}}
<pre>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head><title>Text to HTML using Vim Script</title>
<style type="text/css">
h1, h2 { font-weight: bold; text-align: center; }
table, th, td { border: 1px solid black; }
</style>
</head>
<body>
<h1>Text to HTML using Vim Script</h1>
<h2>Introduction</h2>
<p>This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists.</p>
<h2>Lists</h2>
<p>A &apos;normal&apos; paragraph before a list.</p>
<ul>
<li>This is a bulleted list with a less than sign (&lt;)</li>
<li>And this is its second line with a greater than sign (&gt;)</li>
</ul>
<p>A &apos;normal&apos; paragraph between the lists.</p>
<ol>
<li>This is a numbered list with an ampersand (&amp;), but DO NOT substitute the ampersands within character references like &amp;#x1F606; (😆)</li>
<li>&quot;Second line&quot; in double quotes, with “smart” quotes</li>
<li>&apos;Third line&apos; in single quotes, with ‘smart’ ones too, and</li>
<li>This, <a href="https://rosettacode.org/wiki/Text_to_HTML">https://rosettacode.org/wiki/Text_to_HTML</a>, is a URI.</li>
</ol>
<h2>Tables</h2>
<p>A normal paragraph before a table, which has been formulated with U+0009 tab characters:</p>
<table>
<thead>
<tr><td>Head cell 1</td><td>Head cell 2</td><td>Head cell 3</td></tr>
</thead>
<tbody>
<tr><td>Row 2 col 1</td><td>Row 2 col 2</td><td>Row 2 col 3</td></tr>
<tr><td>Row 3 col 1</td><td>Row 3 col 2</td><td>Row 3 col 3</td></tr>
<tr><td>Row 4 col 1</td><td>Row 4 col 2</td><td>Row 4 col 3</td></tr>
</tbody>
</table>
<p>The HTML output may be checked against <a href="https://validator.w3.org/check">https://validator.w3.org/check</a> to validate that it is valid XHTML.</p>
<h2>Conclusion</h2>
<p>That&apos;s all folks.</p>
</body>
</html>
</pre>
 
<small>NB: Again, &amp;#x1F606; in the output file needed to have &amp;amp; added to it to display correctly.</small>
 
This output validates (checked, as noted in the penultimate paragraph of the output, at https://validator.w3.org/check).
 
=={{header|Wren}}==
{{trans|Go}}
{{libheader|Wren-pattern}}
<syntaxhighlight lang="wren">import "./pattern" for Pattern
Note that Wren doesn't support any form of raw string so we need to construct the sample text by concatenating strings for each paragraph.
<lang ecmascript>import "/pattern" for Pattern
 
var t = """ Sample Text
 
" Sample Text\n\n" +
"This is an example of converting plain text to HTML which demonstrates extracting a title and escaping certain characters within bulleted and numbered lists.\n\n" +
 
"* This is a bulleted list with a less than sign (<)\n\n" +
"* And thisThis is itsa secondbulleted linelist with a greaterless than sign (><)\n\n" +
 
"A 'normal' paragraph between the lists.\n\n" +
* And this is its second line with a greater than sign (>)
"1. This is a numbered list with an ampersand (&)\n\n" +
 
"2. \"Second line\" in double quotes\n\n" +
A 'normal' paragraph between the lists.
"3. 'Third line' in single quotes\n\n" +
 
"That's all folks."
1. This is a numbered list with an ampersand (&)
 
2. "Second line" in double quotes
 
3. 'Third line' in single quotes
 
That's all folks."""
 
// prefer the standard &quot; for escaping a double-quote character rather than Go's &#34;
Line 980 ⟶ 1,236:
System.print("</body>")
System.print("</html>")
</syntaxhighlight>
</lang>
 
{{out}}