Strip block comments
You are encouraged to solve this task according to the task description, using any language you may know.
A block comment begins with a begining delimiter and ends with a ending delimiter, including the delimiters.
Task : Strips block comments from text (of codes). Your demos should at least handle simple, non-nested and multiline block comment delimiters.
Sample text for striping.
/** * Some comments * longer comments here that we can parse. * * Rahoo */ function subroutine() { a = /* inline comment */ b + c ; } /*/ <-- triky comments */ /** * Another comment. */ function something() { }
D
<lang d>import std.stdio ; import std.regexp, std.algorithm ;
string[] sepComment(string s, string cpat[] ...) {
assert(cpat.length == 2, "sepComment : 2 pattern arguments for comment begin & end") ; string[] res = new string[](2) ; int p = 0, q = 0 /* cursors */, ic = 0 ; // inside comment? int[] plen = new int[](2) ; // this's for handling /*/
bool advCursor() { auto m = std.regexp.search(s[p..$], cpat[ic]) ; if(m is null) return false ; plen[ic] = max(0, plen[ic], m[0].length) ; q = p + m.pre.length ; // got comment head if(ic) { q += m[0].length ; } // or comment tail if(std.regexp.find(m[0], "\n|\r") != -1) // special adjust for \n\r q-- ; return true ; }
while(true) { if(!advCursor()) break ; res[ic] ~= s[p..q] ; // save slice of result if( ic && (q - p < plen[0] + plen[1])) { // this handle /*/ pattern p = q ; if(!advCursor()) break ; res[ic] ~= s[p..q] ; // save result again } p = q ; // advance cursor ic = 1 - ic ; // toggle search type } if(ic) throw new Exception("Mismatched Comment") ; res[ic] ~= s[p..$] ; // save rest(non-comment) return res ;
}
void main() {
string s = ` /** * Some comments * longer comments here that we can parse. * * Rahoo */ function subroutine() { a = /* inline comment */ b + c ; } /*/ <-- triky comments */
/** * Another comment. */ function something() { }` ;
writefln("==original:\n%s", s) ; auto t = sepComment(s, `/\*`, `\*/`) ; writefln("==comment stripped:\n%s\n==stripped comment:\n%s", t[0], t[1]) ;
s = "apples, pears # and bananas
apples, pears ; and bananas " ; // test for line comment
writefln("==original:\n%s", s) ; t = sepComment(s, `#|;`, `[\n\r]|$`) ; writefln("==comment stripped:\n%s\n==stripped comment:\n%s", t[0], t[1]) ;
}</lang> part of output:
==comment stripped: function subroutine() { a = b + c ; } function something() { } ==stripped comment: /** * Some comments * longer comments here that we can parse. * * Rahoo *//* inline comment *//*/ <-- triky comments *//** * Another comment. */
Perl 6
<lang perl6>sample().split(/ '/*' .+? '*/' /).print;
sub sample { ' /**
* Some comments * longer comments here that we can parse. * * Rahoo */ function subroutine() { a = /* inline comment */ b + c ; } /*/ <-- triky comments */
/** * Another comment. */ function something() { }
'}</lang>
Output:
function subroutine() { a = b + c ; } function something() { }
PicoLisp
<lang PicoLisp>(in "sample.txt"
(while (echo "/*") (out "/dev/null" (echo "*/")) ) )</lang>
Output:
function subroutine() { a = b + c ; } function something() { }
PureBasic
Solution using regular expressions. A procedure to stripBlocks() procedure is defined that will strip comments between any two delimeters. <lang PureBasic>Procedure.s escapeChars(text.s)
Static specialChars.s = "[\^$.|?*+()" Protected output.s, nextChar.s, i, countChar = Len(text) For i = 1 To countChar nextChar = Mid(text, i, 1) If FindString(specialChars, nextChar, 1) output + "\" + nextChar Else output + nextChar EndIf Next ProcedureReturn output
EndProcedure
Procedure.s stripBlocks(text.s, first.s, last.s)
Protected delimter_1.s = escapeChars(first), delimter_2.s = escapeChars(last): Debug delimter_1: Debug delimter_2 Protected expNum = CreateRegularExpression(#PB_Any, delimter_1 + ".*?" + delimter_2, #PB_RegularExpression_DotAll) Protected output.s = ReplaceRegularExpression(expNum, text, "") FreeRegularExpression(expNum) ProcedureReturn output
EndProcedure
Define source.s source.s = " /**" + #CRLF$ source.s + " * Some comments" + #CRLF$ source.s + " * longer comments here that we can parse." + #CRLF$ source.s + " *" + #CRLF$ source.s + " * Rahoo " + #CRLF$ source.s + " */" + #CRLF$ source.s + " function subroutine() {" + #CRLF$ source.s + " a = /* inline comment */ b + c ;" + #CRLF$ source.s + " }" + #CRLF$ source.s + " /*/ <-- triky comments */" + #CRLF$ source.s + "" + #CRLF$ source.s + " /**" + #CRLF$ source.s + " * Another comment." + #CRLF$ source.s + " */" + #CRLF$ source.s + " function something() {" + #CRLF$ source.s + " }" + #CRLF$
If OpenConsole()
PrintN("--- source ---") PrintN(source) PrintN("--- source with block comments between '/*' and '*/' removed ---") PrintN(stripBlocks(source, "/*", "*/")) PrintN("--- source with block comments between '*' and '*' removed ---") PrintN(stripBlocks(source, "*", "*")) Print(#CRLF$ + #CRLF$ + "Press ENTER to exit"): Input() CloseConsole()
EndIf</lang> Sample output:
--- source --- /** * Some comments * longer comments here that we can parse. * * Rahoo */ function subroutine() { a = /* inline comment */ b + c ; } /*/ <-- triky comments */ /** * Another comment. */ function something() { } --- source with block comments between '/*' and '*/' removed --- function subroutine() { a = b + c ; } function something() { } --- source with block comments between '*' and '*' removed --- / longer comments here that we can parse. Rahoo inline comment / <-- triky comments Another comment. */ function something() { }
Tcl
<lang tcl>proc stripBlockComment {string {openDelimiter "/*"} {closeDelimiter "*/"}} {
# Convert the delimiters to REs by backslashing all non-alnum characters set openAsRE [regsub -all {\W} $openDelimiter {\\&}] set closeAsRE [regsub -all {\W} $closeDelimiter {\\&}]
# Now remove the blocks using a dynamic non-greedy regular expression regsub -all "$openAsRE.*?$closeAsRE" $string ""
}</lang> Demonstration code: <lang tcl>puts [stripBlockComment " /**
* Some comments * longer comments here that we can parse. * * Rahoo */ function subroutine() { a = /* inline comment */ b + c ; } /*/ <-- triky comments */
/** * Another comment. */ function something() { }
"]</lang> Output:
function subroutine() { a = b + c ; } function something() { }