User:EMBee/Scrap: Difference between revisions

From Rosetta Code
Content added Content deleted
(→‎Text to HTML: some code to convert text to html (incomplete))
Line 37: Line 37:
}
}


array mark_up(array lines)
object mark_up(array lines)
{
{
array markup = ({});
array markup = ({});
Line 70: Line 70:
if (line[0] == "regular" && markup[index-1][0] != "regular" && markup[index+1][0] != "regular")
if (line[0] == "regular" && markup[index-1][0] != "regular" && markup[index+1][0] != "regular")
line[0] = "heading";
line[0] = "heading";
}
else if (line[0] == "empty" && markup[index-1][0] == "regular" && markup[index+1][0] == "regular")
}

foreach(markup; int index; array line)
{
if (index > 0 && index < sizeof(markup)-1 )
{
if (line[0] == "empty" && markup[index-1][0] == "regular" && markup[index+1][0] == "regular")
line[0] = "new paragraph";
line[0] = "new paragraph";
else if (line[0] == "empty" && markup[index-1][0] == "regular" && markup[index+1][0] != "regular")
else if (line[0] == "empty" && markup[index-1][0] == "regular" && markup[index+1][0] != "regular")
Line 78: Line 85:
}
}
}
}

return markup;
object root = Parser.XML.Tree.SimpleRootNode();
array current = ({ Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_ELEMENT, "div", ([]), "") });
root->add_child(current[-1]);

foreach (markup; int index; array line)
{
switch(line[0])
{
case "heading":
object h = Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_ELEMENT, "h3", ([]), "");
h->add_child(Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_TEXT, "", ([]), line[1]));
current[-1]->add_child(h);
break;
case "bullet":
case "number":
object li = Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_ELEMENT, "li", ([]), "");
li->add_child(Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_TEXT, "", ([]), line[1]));
current[-1]->add_child(li);
current = Array.push(current, li);
break;
case "indent":
if (markup[index-1][0] != "bullet" && markup[index-1][0] != "number")
current = Array.pop(current)[1];
current[-1]->add_child(Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_TEXT, "", ([]), line[1]));
break;
case "new paragraph":
current = Array.pop(current)[1];
case "begin paragraph":
object p = Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_ELEMENT, "p", ([]), "");
current[-1]->add_child(p);
current = Array.push(current, p);
break;
case "end paragraph":
current = Array.pop(current)[1];
break;
case "regular":
current[-1]->add_child(Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_TEXT, "", ([]), line[1]));
case "empty":
break;
}
}
return root;
}
}


</lang>
</lang>

Revision as of 10:17, 5 January 2012

  • add "suggest task" and "task by category" links to tasks page.
  • suggest task for binary math.
  • draft task for math with boolean lists like in early lisp as per http://paste.lisp.org/display/126287

Text to HTML

Convert plain text to HTML. The plain text has no formatting information. It may have centered headlines, numbered sections, paragraphs, lists, and URIs. It could even have tables.

potential algorithm:

  • split by line
  • find average line length to identify centered lines
  • find isolated lines to identify section headings
  • find URIs
  • identify section numbering
  • identify bullet and numbered lists
  • identify paragraphs
  • identify indented lines
  • if possible identify tables

to ensure valid html/xhtml create a nested structure:

  • create an xml node
  • add elements to node
  • add lines to element if multiline paragraph

<lang Pike>// function to calculate the average line length: int linelength(array lines) {

   array sizes = sizeof(lines[*])-({0}); 
   sizes = sort(sizes); 
   // only consider the larger half of lines minus the top 5%
   array larger = sizes[sizeof(sizes)/2..sizeof(sizes)-sizeof(sizes)/20];
   int averagelarger = `+(@larger)/sizeof(larger);
   return averagelarger; 

}

object mark_up(array lines) {

   array markup = ({});
   // find special lines
   foreach(lines; int index; string line)
   {
       string strippedline = String.trim_whites(line);
       if (sizeof(strippedline))
       {
           string firstchar = strippedline[0..0];
           int pos = search(line, firstchar);
           if (lines[index-1]-" "-"\t" =="" && lines[index+1]-" "-"\t" =="")
               markup +=({ ({ "heading", strippedline, pos }) });
           else if (firstchar == "*")
               markup += ({ ({ "bullet", strippedline, pos }) });
           else if ( (<"0","1","2","3","4","5","6","7","8","9">)[firstchar] )
               markup += ({ ({ "number", strippedline, pos }) });
           else if (pos > 0)
               markup += ({ ({ "indent", strippedline, pos }) });
           else            
               markup += ({ ({ "regular", strippedline, pos }) });
       }
       else markup += ({ ({ "empty" }) });
   }
   foreach(markup; int index; array line)
   {
       if (index > 0 && index < sizeof(markup)-1 )
       {
           if (line[0] == "regular" && markup[index-1][0] != "regular" && markup[index+1][0] != "regular")
               line[0] = "heading";
       }
   }
   foreach(markup; int index; array line)
   {
       if (index > 0 && index < sizeof(markup)-1 )
       {
           if (line[0] == "empty" && markup[index-1][0] == "regular" && markup[index+1][0] == "regular")
               line[0] = "new paragraph";
           else if (line[0] == "empty" && markup[index-1][0] == "regular" && markup[index+1][0] != "regular")
               line[0] = "end paragraph";
           else if (line[0] == "empty" && markup[index-1][0] != "regular" && markup[index+1][0] == "regular")
               line[0] = "begin paragraph";
       }
   }
   object root = Parser.XML.Tree.SimpleRootNode(); 
   array current = ({ Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_ELEMENT, "div", ([]), "") });
   root->add_child(current[-1]);
   foreach (markup; int index; array line)
   {
       switch(line[0])
       {
           case "heading": 
                     object h = Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_ELEMENT, "h3", ([]), "");
                     h->add_child(Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_TEXT, "", ([]), line[1]));
                     current[-1]->add_child(h);
                 break;
           case "bullet":
           case "number":
                     object li = Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_ELEMENT, "li", ([]), "");
                     li->add_child(Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_TEXT, "", ([]), line[1]));
                     current[-1]->add_child(li);
                     current = Array.push(current, li);
                 break;
           case "indent":
                     if (markup[index-1][0] != "bullet" && markup[index-1][0] != "number")
                         current = Array.pop(current)[1];
                     current[-1]->add_child(Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_TEXT, "", ([]), line[1]));
                 break;
           case "new paragraph":
                     current = Array.pop(current)[1];
           case "begin paragraph":
                     object p = Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_ELEMENT, "p", ([]), "");
                     current[-1]->add_child(p); 
                     current = Array.push(current, p);
                break;
           case "end paragraph":
                     current = Array.pop(current)[1];
                break;
           case "regular":           
                     current[-1]->add_child(Parser.XML.Tree.SimpleNode(Parser.XML.Tree.XML_TEXT, "", ([]), line[1]));
           case "empty": 
                 break;
       }
   }   
   return root;

}


</lang>