Strip control codes and extended characters from a string: Difference between revisions

m
→‎{{header|Phix}}: syntax coloured, made p2js compatible-ish, plus notes
(Added solution for Action!)
m (→‎{{header|Phix}}: syntax coloured, made p2js compatible-ish, plus notes)
Line 1,620:
While you can delete a character from a string using say s[i..i] = "", the fastest and easiest way is always just
to build a new one character-by-character.<br>
I've credited Ada solely for the sensible fromch / toch / abovech idea.
<!--<lang Phix>(phixonline)-->
<lang Phix>function filter(string s, integer fromch=' ', toch=#7E, abovech=#7F)
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
string res = ""
<span style="color: #7060A8;">requires</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"1.0.2"</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (param default fixes in pwa/p2js)</span>
for i=1 to length(s) do
<span style="color: #008080;">function</span> <span style="color: #000000;">filter_it</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">fromch</span><span style="color: #0000FF;">=</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">toch</span><span style="color: #0000FF;">=</span><span style="color: #000000;">#7E</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">abovech</span><span style="color: #0000FF;">=</span><span style="color: #000000;">#7F</span><span style="color: #0000FF;">)</span>
integer ch = s[i]
<span style="color: #004080;">string</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
if ch>=fromch and (ch<=toch or ch>abovech) then
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
res &= ch
<span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
end if
<span style="color: #008080;">if</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #000000;">fromch</span> <span style="color: #008080;">and</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">toch</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">></span><span style="color: #000000;">abovech</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
end for
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">ch</span>
return res
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end function
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">put_line</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s \"%s\", Length:%d\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">text</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">full</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"\u0000 abc\u00E9def\u007F"</span>
<span style="color: #000000;">put_line</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"The full string:"</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">full</span><span style="color: #0000FF;">)</span>
procedure put_line(string text, s)
<span style="color: #000000;">put_line</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"No Control Chars:"</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">filter_it</span><span style="color: #0000FF;">(</span><span style="color: #000000;">full</span><span style="color: #0000FF;">))</span> <span style="color: #000080;font-style:italic;">-- default values for fromch, toch, and abovech</span>
printf(1,"%s \"%s\", Length:%d\n",{text,s,length(s)})
<span style="color: #000000;">put_line</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"\" and no Extended:"</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">filter_it</span><span style="color: #0000FF;">(</span><span style="color: #000000;">full</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">abovech</span><span style="color: #0000FF;">:=</span><span style="color: #000000;">#FF</span><span style="color: #0000FF;">))</span> <span style="color: #000080;font-style:italic;">-- defaults for fromch and toch</span>
end procedure
<!--</lang>-->
string full = "\u0000 abc\u00E9def\u007F"
 
put_line("The full string:", full)
put_line("No Control Chars:", filter(full)) -- default values for fromch, toch, and abovech
put_line("\" and no Extended:", filter(full, abovech:=#FF)) -- defaults for fromch and toch</lang>
{{out}}
(desktop/Phix, in a grubby Windows console)
<pre>
The full string: " abc+®def�", Length:11
Line 1,647 ⟶ 1,652:
" and no Extended: " abcdef", Length:7
</pre>
(pwa/p2js)
<pre>
The full string: " abcédef", Length:10
No Control Chars: " abcédef", Length:8
" and no Extended: " abcdef", Length:7
</pre>
The reason is that JavaScript handles unicode slightly differently. On desktop/Phix, \u0000 is treated as the single ''byte'' #00, and
likewise \u007F as the single byte #7F, but \u00E9 is converted to the utf-8 sequence #C3,#A9 - hence the disagreement over the initial and middle lengths. For proper compatibility you would have to start playing with utf8_to_utf32() and friends. Also as you can see the grubby Windows console does not display utf8 nicely, so we get an ugly "+®" for what should be a single \u00E9 character. You should get slightly better results on a Linux console, and maybe if you have more fonts installed on Windows than I do, things might look better with a different one, plus of course the text is far more likely to display correctly in a GUI, but that's a bit beyond the remit of this simple task I fear.
 
=={{header|PicoLisp}}==
7,820

edits