User:EMBee/Scrap: Difference between revisions

From Rosetta Code
Content added Content deleted
(potential algorithm for a text to html conversion)
(→‎Text to HTML: some code to convert text to html (incomplete))
Line 22: Line 22:
* create an xml node
* create an xml node
* add elements to node
* add elements to node
* add lines to element if multiline like paragraph
* add lines to element if multiline paragraph

<lang Pike>// function to calculate the average line length:
int linelength(array lines)
{
array sizes = sizeof(lines[*])-({0});
sizes = sort(sizes);

// only consider the larger half of lines minus the top 5%
array larger = sizes[sizeof(sizes)/2..sizeof(sizes)-sizeof(sizes)/20];

int averagelarger = `+(@larger)/sizeof(larger);
return averagelarger;
}

array mark_up(array lines)
{
array markup = ({});

// find special lines
foreach(lines; int index; string line)
{
string strippedline = String.trim_whites(line);
if (sizeof(strippedline))
{
string firstchar = strippedline[0..0];
int pos = search(line, firstchar);

if (lines[index-1]-" "-"\t" =="" && lines[index+1]-" "-"\t" =="")
markup +=({ ({ "heading", strippedline, pos }) });
else if (firstchar == "*")
markup += ({ ({ "bullet", strippedline, pos }) });
else if ( (<"0","1","2","3","4","5","6","7","8","9">)[firstchar] )
markup += ({ ({ "number", strippedline, pos }) });
else if (pos > 0)
markup += ({ ({ "indent", strippedline, pos }) });
else
markup += ({ ({ "regular", strippedline, pos }) });
}
else markup += ({ ({ "empty" }) });
}

foreach(markup; int index; array line)
{
if (index > 0 && index < sizeof(markup)-1 )
{
if (line[0] == "regular" && markup[index-1][0] != "regular" && markup[index+1][0] != "regular")
line[0] = "heading";
else if (line[0] == "empty" && markup[index-1][0] == "regular" && markup[index+1][0] == "regular")
line[0] = "new paragraph";
else if (line[0] == "empty" && markup[index-1][0] == "regular" && markup[index+1][0] != "regular")
line[0] = "end paragraph";
else if (line[0] == "empty" && markup[index-1][0] != "regular" && markup[index+1][0] == "regular")
line[0] = "begin paragraph";
}
}
return markup;
}
</lang>

Revision as of 07:16, 5 January 2012

  • add "suggest task" and "task by category" links to tasks page.
  • suggest task for binary math.
  • draft task for math with boolean lists like in early lisp as per http://paste.lisp.org/display/126287

Text to HTML

Convert plain text to HTML. The plain text has no formatting information. It may have centered headlines, numbered sections, paragraphs, lists, and URIs. It could even have tables.

potential algorithm:

  • split by line
  • find average line length to identify centered lines
  • find isolated lines to identify section headings
  • find URIs
  • identify section numbering
  • identify bullet and numbered lists
  • identify paragraphs
  • identify indented lines
  • if possible identify tables

to ensure valid html/xhtml create a nested structure:

  • create an xml node
  • add elements to node
  • add lines to element if multiline paragraph

<lang Pike>// function to calculate the average line length: int linelength(array lines) {

   array sizes = sizeof(lines[*])-({0}); 
   sizes = sort(sizes); 
   // only consider the larger half of lines minus the top 5%
   array larger = sizes[sizeof(sizes)/2..sizeof(sizes)-sizeof(sizes)/20];
   int averagelarger = `+(@larger)/sizeof(larger);
   return averagelarger; 

}

array mark_up(array lines) {

   array markup = ({});
   // find special lines
   foreach(lines; int index; string line)
   {
       string strippedline = String.trim_whites(line);
       if (sizeof(strippedline))
       {
           string firstchar = strippedline[0..0];
           int pos = search(line, firstchar);
           if (lines[index-1]-" "-"\t" =="" && lines[index+1]-" "-"\t" =="")
               markup +=({ ({ "heading", strippedline, pos }) });
           else if (firstchar == "*")
               markup += ({ ({ "bullet", strippedline, pos }) });
           else if ( (<"0","1","2","3","4","5","6","7","8","9">)[firstchar] )
               markup += ({ ({ "number", strippedline, pos }) });
           else if (pos > 0)
               markup += ({ ({ "indent", strippedline, pos }) });
           else            
               markup += ({ ({ "regular", strippedline, pos }) });
       }
       else markup += ({ ({ "empty" }) });
   }
   foreach(markup; int index; array line)
   {
       if (index > 0 && index < sizeof(markup)-1 )
       {
           if (line[0] == "regular" && markup[index-1][0] != "regular" && markup[index+1][0] != "regular")
               line[0] = "heading";
           else if (line[0] == "empty" && markup[index-1][0] == "regular" && markup[index+1][0] == "regular")
               line[0] = "new paragraph";
           else if (line[0] == "empty" && markup[index-1][0] == "regular" && markup[index+1][0] != "regular")
               line[0] = "end paragraph";
           else if (line[0] == "empty" && markup[index-1][0] != "regular" && markup[index+1][0] == "regular")
               line[0] = "begin paragraph";
       }
   }
   return markup;

} </lang>