Strip control codes and extended characters from a string: Difference between revisions

Add Seed7 example
(Add Seed7 example)
Line 484:
 
print stripped("\ba\x00b\n\rc\fd\xc3")</lang>Output:<lang>abcd</lang>
 
=={{header|Seed7}}==
Seed7 [http://seed7.sourceforge.net/manual/types.htm#string strings] are UTF-32 encoded,
therefore no destinction between BYTE and Unicode strings is necessary.
The example below uses [http://seed7.sourceforge.net/libraries/utf8.htm#STD_UTF8_OUT STD_UTF8_OUT]
from the library [http://seed7.sourceforge.net/libraries/utf8.htm utf8.s7i], to write
Unicode characters with UTF-8 encoding to the console.
 
<lang seed7>$ include "seed7_05.s7i";
include "utf8.s7i";
 
const func string: stripControl (in string: stri) is func
result
var string: stripped is "";
local
var integer: old_pos is 1;
var integer: index is 0;
var char: ch is ' ';
begin
for ch key index range stri do
if ch < ' ' or ch = '\127\' then
stripped &:= stri[old_pos .. pred(index)];
old_pos := succ(index);
end if;
end for;
stripped &:= stri[old_pos ..];
end func;
 
const func string: stripControlAndExtended (in string: stri) is func
result
var string: stripped is "";
local
var integer: old_pos is 1;
var integer: index is 0;
var char: ch is ' ';
begin
for ch key index range stri do
if ch < ' ' or ch >= '\127\' then
stripped &:= stri[old_pos .. pred(index)];
old_pos := succ(index);
end if;
end for;
stripped &:= stri[old_pos ..];
end func;
 
const string: src is "déjà vu\ # Unicode
\\n\0\\31\ \33\\126\\127\\128\\255\\n\ # Various boundary cases
\as⃝df̅"; # Unicode combining characters
const proc: main is func
begin
OUT := STD_UTF8_OUT;
writeln("source text:");
writeln(src);
writeln("Stripped of control codes:");
writeln(stripControl(src));
writeln("Stripped of control codes and extended characters:");
writeln(stripControlAndExtended(src));
end func;</lang>
 
Output:
<pre>
source text:
déjà vu
� !~€ÿ
as⃝df̅
Stripped of control codes:
déjà vu !~€ÿas⃝df̅
Stripped of control codes and extended characters:
dj vu !~asdf
</pre>
 
=={{header|Tcl}}==