Syntax highlighting using Mediawiki formatting
Introduction
When formatting a page for display, Mediawiki allows the page to include bold and italic text by placing the bold/italic text within paired repeated-single quote characters - 3 single quotes for bold and 2 for italic, 5 for bold italic.
E.g.:
'''bold-word''' and ''italic-word'' appears as bold-word and italic-word.
This could be used to provide simple syntax-highlighting without the use of the relatively more expensive <syntachighlight> tags or for languages not currently supported by Pygments.
A few languages on Rosetta Code are currently using schemes like this.
- Task
The task is to write a syntax highlighter that given a source in your language will output a wiki formatted version of the source with the keywords/reserved words in bold and the comments in italics.
Note that each source line (including blank lines) should be output with a leading space, to ensure the source is treated as a single block.
Additionally, translate the following characters:
- single-quote (') to '
- ampersand (&) to &
- less-than (<) to <
- greater-than (>) to >
or enclose them in <nowiki> tags.
If your language doesn't have keywords/reserved words or comments, use your judgement on what to highlight in bold or italic : )
Presenting your source
Instead of showing your source within syntaxhighlight tags and having a separate output block, just show the source as the output from your program when given its own source to process.
I.e., don't use syntaxhighlight tags.
See also
https://www.mediawiki.org/wiki/Help:Formatting
ALGOL 68
Handles upper-stropping Algol 68 sources (as used by ALGOL 68G and most other compilers).
# Convert an upper-stropped Algol 68 source to "wiki" format # # each line is preceeded by a space, # # bold words are enclosed in ''' and ''' and comments in '' and '' # # ', &, < and > are converted to ' & < and > # # everything else if output as is # # the source is read from stand in and written to stand out # # the last line in the file must end with a newline # # { and } are assumed to be alternatives for ( and ), if { } should be # # treated as comments ( as in ALGOL68RS/algol68toc ) # # change rs style brief comments to TRUE # BEGIN # TRUE if {} delimits a nestable brief comment, as in ALGOL 68RS and # # algol68toc, FALSE if {} are alternatives to () as in ALGOL 68G # BOOL rs style brief comments = FALSE; BOOL at eof := FALSE; # TRUE if EOF has been reached, FALSE otherwise # on logical file end( stand in # set EOF handler for stand in # , ( REF FILE f )BOOL: # note that we reached EOF on the latest read # # and return TRUE so processing can continue # at eof := TRUE ); CHAR nl = REPR 10; # newline character # INT error count := 0; # number of errors reported # STRING line := nl; # current source line # INT pos := LWB line; # current position in line # CHAR c := " "; # current source character # PROC error = ( STRING message )VOID: # reports an error # BEGIN error count +:= 1; print( ( newline, newline, "**** ", message, newline ) ) END # error # ; # reports an unterminated construct ( e.g. string, comment ) # PROC unterminated = ( STRING construct )VOID: error( "Unterminated " + construct ); PROC next char = VOID: # gets the next source character, stores it in c # IF pos <= UPB line THEN c := line[ pos ]; # not past the end of the source line # pos +:= 1 ELIF # past the end of the current source line - get the next # at eof := FALSE; read( ( line, newline ) ); NOT at eof THEN line +:= nl; # have another line # pos := LWB line; c := line[ pos ]; pos +:= 1 ELSE line := ""; # reached eof # c := REPR 0 FI # next char # ; PROC out char = ( CHAR ch )VOID: # conveerts and outputs ch # IF ch = nl THEN print( ( newline, " " ) ) ELIF ch = "<" THEN print( ( "<" ) ) ELIF ch = ">" THEN print( ( ">" ) ) ELIF ch = "&" THEN print( ( "&" ) ) ELIF ch = "'" THEN print( ( "'" ) ) ELSE print( ch ) FI # out char # ; # outputs a wiki start/end italic delimiter # PROC italic delimiter = VOID: print( ( "''" ) ); # outputs a wiki start/end bold delimiter # PROC bold delimiter = VOID: print( ( "'''" ) ); # returns TRUE if the current character is a string delimiter # PROC have string delimiter = BOOL: c = """"; # returns TRUE if the current character can start a bold word # PROC have bold = BOOL: c >= "A" AND c <= "Z"; # outputs a brief comment to stand out # # end char is the closing delimiter, # # nested char is the opening delimiter for nestable brief comments # # if nested char is blank, the brief comment does not nest # # this handles ALGOL 68RS and algol68toc style {} comments # PROC copy brief comment = ( CHAR end char, CHAR nested char )VOID: BEGIN out char( c ); WHILE next char; NOT at eof AND c /= end char DO IF c = nested char AND nested char /= " " THEN # nested brief comment # copy brief comment( end char, nested char ) ELSE # notmal comment char # out char( c ) FI OD; IF at eof THEN # unterminated comment # unterminated( """" + end char + """ comment" ); c := end char FI; out char( c ); next char END # copy brief comment # ; PROC copy string = VOID: # outputs a string denotation from the source # WHILE have string delimiter DO # within a string denotation, # WHILE out char( c ); # "" denotes the " character # next char; NOT at eof AND NOT have string delimiter DO SKIP OD; IF NOT have string delimiter THEN unterminated( "string" ); c := """" FI; out char( c ); next char OD # copy string # ; PROC get bold word = STRING: # gets a bold word from then source # BEGIN STRING result := ""; WHILE have bold OR c = "_" DO result +:= c; next char OD; result END # get bold word # ; PROC copy to bold = STRING: # copies the source to the output # IF at eof # until a bold word is encountered # THEN "" ELSE STRING result := ""; WHILE out char( c ); next char; NOT at eof AND NOT have bold DO SKIP OD; IF NOT at eof THEN result := get bold word FI; result FI # copy to bold # ; PROC bold word or comment = VOID: # handles a bold COMMENT # IF STRING bold word := get bold word; # or other bold word # bold word = "CO" OR bold word = "COMMENT" THEN italic delimiter; # have a bold comment # STRING delimiter = bold word; WHILE print( ( bold word ) ); bold word := copy to bold; NOT at eof AND bold word /= delimiter DO SKIP OD; IF at eof THEN unterminated( """" + delimiter + """ comment" ) FI; print( ( delimiter ) ); italic delimiter ELSE # some other bold word # bold delimiter; print( ( bold word ) ); bold delimiter FI # bold word or comment # ; # copy the source to stand out, conveerting to wiki format # next char; WHILE NOT at eof DO IF c = "#" THEN # brief comment # italic delimiter; copy brief comment( "#", " " ); italic delimiter ELIF c = "{" AND rs style brief comments THEN # nestable brief comment ( ALGOL 68RS and algol68toc ) # italic delimiter; copy brief comment( "}", "{" ); italic delimiter ELIF have string delimiter THEN # STRING or CHAR denotation # copy string ELIF have bold THEN # have a bold word # bold word or comment ELSE # anything else # out char( c ); next char FI OD; IF error count > 0 THEN # had errors processing the source # print( ( "**** ", whole( error count, 0 ), " errors", newline ) ) FI END
{header|AWK}}
# convert an AWK source to wiki format # each line is preceeded by a space, # reserved words are enclosed in ''' and ''' and comments in '' and '' # ', &, < and > are converted to ' & < and > # everything else if output as is # the wiki source is written to stdout BEGIN \ { # reserved word list as in gawk and treating getline as reserved kw = "BEGIN/BEGINFILE/END/ENDFILE/" \ "break/case/continue/default/delete/do/while/else/" \ "exit/for/in/function/func/if/next/nextfile/switch/" \ "getline"; n = split( kw, reservedWords, "/" ); for( w = 1; w <= n; w ++ ) { reserved[ reservedWords[ w ] ] = w; } } # BEGIN { printf( " " ); line = $0; gsub( /&/, "\\&", line ); gsub( /</, "\\<", line ); gsub( />/, "\\>", line ); gsub( /'/, "\\'", line ); if( line != "" ) { c = ""; nextChar(); do { if ( c == "#" ) { # comment printf( "''#%s''", line ); c = ""; } else if( c == "\"" ) { # string literal do { if( c == "\\" ) { printf( "%s", c ); nextChar(); } printf( "%s", c ); nextChar(); } while( c != "\"" && c != "" ); if( c != "\"" ) { printf( "**** Unterminated string\n" ); } else { nextChar(); } printf( "\"" ); } else if( c == "/" && lastC !~ /[A-Za-z0-9_.]/ ) { # pattern bracketDepth = 0; printf( "%s", c ); nextChar(); while( c != "" && ( c != "/" || bracketDepth > 0 ) ) { if( c == "\\" || c == "[" ) { if ( c == "[" ) { bracketDepth ++; } printf( "%s", c ); nextChar(); } else if( c == "]" ) { bracketDepth --; } printf( "%s", c ); nextChar(); } if( c != "/" ) { printf( "**** Unterminated pattern\n" ); } else { nextChar(); } printf( "/" ); } else if( c ~ /[A-Za-z]/ ) { # have a reserved word or identifier word = ""; do { word = word c; nextChar(); } while( c ~ /[A-Za-z0-9_]/ ); if( word in reserved ) { word = "'''" word "'''"; } printf( "%s", word ); } else { # something else printf( "%s", c ); nextChar(); } } while( c != "" ); } printf( "\n" ); } function nextChar() { if( c != " " ) { # the last character wasn't a space, save it so we can recognise patterns lastC = c; } if( line == "" ) { # at end of line c = ""; } else { # not end of line c = substr( line, 1, 1 ); line = substr( line, 2 ); } } # nextChar