XXXX redacted: Difference between revisions

m
→‎{{header|Phix}}: added syntax colouring and online link, with \t handling removed for that.
(Added AutoHotkey)
m (→‎{{header|Phix}}: added syntax colouring and online link, with \t handling removed for that.)
Line 1,021:
Written on the assumption that overkill implies partial (see talk page).<br>
utf32_length() fashioned after [[Reverse_a_string#Phix]] with added ZWJ - I do not expect it to be entirely complete.
<!--<lang Phix>enum WHOLE,PARTIAL,OVERKILL,INSENSITIVE(phixonline)-->
<span style="color: #008080;">enum</span> <span style="color: #000000;">WHOLE</span><span style="color: #0000FF;">,</span><span style="color: #000000;">PARTIAL</span><span style="color: #0000FF;">,</span><span style="color: #000000;">OVERKILL</span><span style="color: #0000FF;">,</span><span style="color: #000000;">INSENSITIVE</span>
constant spunc = " \r\n\t.?\"" -- spaces and punctuation
<span style="color: #008080;">constant</span> <span style="color: #000000;">spunc</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">" \r\n.?\""</span> <span style="color: #000080;font-style:italic;">-- spaces and punctuation</span>
 
function utf32_length(sequence utf32)
<span style="color: #008080;">function</span> <span style="color: #000000;">utf32_length</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">utf32</span><span style="color: #0000FF;">)</span>
integer l = length(utf32)
<span style="color: #004080;">integer</span> <span style="color: #000000;">l</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">utf32</span><span style="color: #0000FF;">)</span>
for i=1 to l do
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">l</span> <span style="color: #008080;">do</span>
integer ch = utf32[i]
<span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">utf32</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
if (ch>=0x300 and ch<=0x36f)
<span style="color: #008080;">if</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #000000;">0x300</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">0x36f</span><span style="color: #0000FF;">)</span>
or (ch>=0x1dc0 and ch<=0x1dff)
<span style="color: #008080;">or</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #000000;">0x1dc0</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">0x1dff</span><span style="color: #0000FF;">)</span>
or (ch>=0x20d0 and ch<=0x20ff)
<span style="color: #008080;">or</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #000000;">0x20d0</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">0x20ff</span><span style="color: #0000FF;">)</span>
or (ch>=0xfe20 and ch<=0xfe2f) then
<span style="color: #008080;">or</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">>=</span><span style="color: #000000;">0xfe20</span> <span style="color: #008080;">and</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">0xfe2f</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
l -= 1
<span style="color: #000000;">l</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span>
elsif ch=0x200D then -- ZERO WIDTH JOINER
<span style="color: #008080;">elsif</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0x200D</span> <span style="color: #008080;">then</span> <span style="color: #000080;font-style:italic;">-- ZERO WIDTH JOINER</span>
l -= 2
<span style="color: #000000;">l</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">2</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
return l
<span style="color: #008080;">return</span> <span style="color: #000000;">l</span>
end function
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
 
function redact(string text, word, integer options)
<span style="color: #008080;">function</span> <span style="color: #000000;">redact</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">word</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">options</span><span style="color: #0000FF;">)</span>
sequence t_utf32 = utf8_to_utf32(text),
<span style="color: #004080;">sequence</span> <span style="color: #000000;">t_utf32</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">utf8_to_utf32</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">),</span>
l_utf32 = t_utf32,
<span style="color: #000000;">l_utf32</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">t_utf32</span><span style="color: #0000FF;">,</span>
w_utf32 = utf8_to_utf32(word)
<span style="color: #000000;">w_utf32</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">utf8_to_utf32</span><span style="color: #0000FF;">(</span><span style="color: #000000;">word</span><span style="color: #0000FF;">)</span>
string opt = "[?|s]"
<span style="color: #004080;">string</span> <span style="color: #000000;">opt</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"[?|s]"</span>
if options>INSENSITIVE then
<span style="color: #008080;">if</span> <span style="color: #000000;">options</span><span style="color: #0000FF;">></span><span style="color: #000000;">INSENSITIVE</span> <span style="color: #008080;">then</span>
options -= INSENSITIVE
<span style="color: #000000;">options</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">INSENSITIVE</span>
opt[4] = 'i'
<span style="color: #000000;">opt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">4</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'i'</span>
l_utf32 = lower(t_utf32)
<span style="color: #000000;">l_utf32</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">lower</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t_utf32</span><span style="color: #0000FF;">)</span>
w_utf32 = lower(w_utf32)
<span style="color: #000000;">w_utf32</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">lower</span><span style="color: #0000FF;">(</span><span style="color: #000000;">w_utf32</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
opt[2] = "wpo"[options]
<span style="color: #000000;">opt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"wpo"</span><span style="color: #0000FF;">[</span><span style="color: #000000;">options</span><span style="color: #0000FF;">]</span>
integer idx = 1
<span style="color: #004080;">integer</span> <span style="color: #000000;">idx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
while true do
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
idx = match(w_utf32,l_utf32,idx)
<span style="color: #000000;">idx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">w_utf32</span><span style="color: #0000FF;">,</span><span style="color: #000000;">l_utf32</span><span style="color: #0000FF;">,</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">)</span>
if idx=0 then exit end if
<span style="color: #008080;">if</span> <span style="color: #000000;">idx</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
integer edx = idx+length(w_utf32)-1
<span style="color: #004080;">integer</span> <span style="color: #000000;">edx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">idx</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">w_utf32</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span>
if options=WHOLE then
<span style="color: #008080;">if</span> <span style="color: #000000;">options</span><span style="color: #0000FF;">=</span><span style="color: #000000;">WHOLE</span> <span style="color: #008080;">then</span>
if (idx=1 or find(l_utf32[idx-1],spunc))
<span style="color: #008080;">if</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">or</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">l_utf32</span><span style="color: #0000FF;">[</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span><span style="color: #000000;">spunc</span><span style="color: #0000FF;">))</span>
and (edx=length(l_utf32) or find(l_utf32[edx+1],spunc)) then
<span style="color: #008080;">and</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">edx</span><span style="color: #0000FF;">=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">l_utf32</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">or</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">l_utf32</span><span style="color: #0000FF;">[</span><span style="color: #000000;">edx</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span><span style="color: #000000;">spunc</span><span style="color: #0000FF;">))</span> <span style="color: #008080;">then</span>
t_utf32[idx..edx] = repeat('X',utf32_length(t_utf32[idx..edx]))
<span style="color: #000000;">t_utf32</span><span style="color: #0000FF;">[</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">..</span><span style="color: #000000;">edx</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #008000;">'X'</span><span style="color: #0000FF;">,</span><span style="color: #000000;">utf32_length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t_utf32</span><span style="color: #0000FF;">[</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">..</span><span style="color: #000000;">edx</span><span style="color: #0000FF;">]))</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
elsif options=PARTIAL
<span style="color: #008080;">elsif</span> <span style="color: #000000;">options</span><span style="color: #0000FF;">=</span><span style="color: #000000;">PARTIAL</span>
or options=OVERKILL then
<span style="color: #008080;">or</span> <span style="color: #000000;">options</span><span style="color: #0000FF;">=</span><span style="color: #000000;">OVERKILL</span> <span style="color: #008080;">then</span>
if options=OVERKILL then
<span style="color: #008080;">if</span> <span style="color: #000000;">options</span><span style="color: #0000FF;">=</span><span style="color: #000000;">OVERKILL</span> <span style="color: #008080;">then</span>
while idx>1 and not find(l_utf32[idx-1],spunc) do idx -= 1 end while
<span style="color: #008080;">while</span> <span style="color: #000000;">idx</span><span style="color: #0000FF;">></span><span style="color: #000000;">1</span> <span style="color: #008080;">and</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">l_utf32</span><span style="color: #0000FF;">[</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span><span style="color: #000000;">spunc</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> <span style="color: #000000;">idx</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
while edx<length(l_utf32) and not find(l_utf32[edx+1],spunc) do edx += 1 end while
<span style="color: #008080;">while</span> <span style="color: #000000;">edx</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">l_utf32</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">l_utf32</span><span style="color: #0000FF;">[</span><span style="color: #000000;">edx</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span><span style="color: #000000;">spunc</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> <span style="color: #000000;">edx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
t_utf32[idx..edx] = repeat('X',utf32_length(t_utf32[idx..edx]))
<span style="color: #000000;">t_utf32</span><span style="color: #0000FF;">[</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">..</span><span style="color: #000000;">edx</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #008000;">'X'</span><span style="color: #0000FF;">,</span><span style="color: #000000;">utf32_length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t_utf32</span><span style="color: #0000FF;">[</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">..</span><span style="color: #000000;">edx</span><span style="color: #0000FF;">]))</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
idx = edx+1
<span style="color: #000000;">idx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">edx</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span>
end while
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
text = utf32_to_utf8(t_utf32)
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">utf32_to_utf8</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t_utf32</span><span style="color: #0000FF;">)</span>
return {opt,text}
<span style="color: #008080;">return</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">opt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">text</span><span style="color: #0000FF;">}</span>
end function
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
 
constant test = `
<span style="color: #008080;">constant</span> <span style="color: #000000;">test</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">`
Tom? Toms bottom tomato is in his stomach while playing the "Tom-tom" brand tom-toms. That's so tom.`,
Tom? Toms bottom tomato is in his stomach while playing the "Tom-tom" brand tom-toms. That's so tom.`</span><span style="color: #0000FF;">,</span>
tests = {"Tom","tom","t"}
<span style="color: #000000;">tests</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"Tom"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"tom"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"t"</span><span style="color: #0000FF;">}</span>
for t=1 to length(tests) do
<span style="color: #008080;">for</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
printf(1,"Redact %s:\n",{tests[t]})
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Redact %s:\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">t</span><span style="color: #0000FF;">]})</span>
for o=WHOLE to OVERKILL do
<span style="color: #008080;">for</span> <span style="color: #000000;">o</span><span style="color: #0000FF;">=</span><span style="color: #000000;">WHOLE</span> <span style="color: #008080;">to</span> <span style="color: #000000;">OVERKILL</span> <span style="color: #008080;">do</span>
printf(1,"%s:%s\n",redact(test,tests[t],o))
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s:%s\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">redact</span><span style="color: #0000FF;">(</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">t</span><span style="color: #0000FF;">],</span><span style="color: #000000;">o</span><span style="color: #0000FF;">))</span>
printf(1,"%s:%s\n",redact(test,tests[t],o+INSENSITIVE))
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s:%s\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">redact</span><span style="color: #0000FF;">(</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">t</span><span style="color: #0000FF;">],</span><span style="color: #000000;">o</span><span style="color: #0000FF;">+</span><span style="color: #000000;">INSENSITIVE</span><span style="color: #0000FF;">))</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
constant ut = "🧑 👨 🧔 👨‍👩‍👦",
<span style="color: #008080;">constant</span> <span style="color: #000000;">ut</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"🧑 👨 🧔 👨‍👩‍👦"</span><span style="color: #0000FF;">,</span>
fmt = """
<span style="color: #000000;">fmt</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"""
 
%s
%s
Redact 👨 %s %s
Redact 👨‍👩‍👦👨 %s %s
Redact 👨‍👩‍👦 %s %s
"""
"""</span>
printf(1,fmt,{ut}&redact(ut,"👨",WHOLE)&redact(ut,"👨‍👩‍👦",WHOLE))</lang>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fmt</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">ut</span><span style="color: #0000FF;">}&</span><span style="color: #000000;">redact</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ut</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"👨"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">WHOLE</span><span style="color: #0000FF;">)&</span><span style="color: #000000;">redact</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ut</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"👨‍👩‍👦"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">WHOLE</span><span style="color: #0000FF;">))</span>
<!--</lang>-->
{{out}}
The windows console makes a complete mockery of those unicode characters, though it should look better on linux...
<pre>
Redact Tom:
Line 1,118 ⟶ 1,119:
[o|s]:Tom? Toms XXXXXX XXXXXX is in his XXXXXXX while playing XXX "XXXXXXX" brand XXXXXXXX. XXXXXX so XXX.
[o|i]:XXX? XXXX XXXXXX XXXXXX is in his XXXXXXX while playing XXX "XXXXXXX" brand XXXXXXXX. XXXXXX so XXX.
 
               🧑 👨 🧔 👨‍👩‍👦
­ƒºæ ­ƒæ¿ ­ƒºö ­ƒæ¿ÔÇì­ƒæ®ÔÇì­ƒæª
Redact 👨 [w|s] 🧑 XX 🧔 👨‍👩‍👦
Redact ­ƒæ¿ [w|s] ­ƒºæ X ­ƒºö ­ƒæ¿ÔÇì­ƒæ®ÔÇì­ƒæª
Redact 👨‍👩‍👦 [w|s] 🧑 👨 🧔 XXXX
Redact ­ƒæ¿ÔÇì­ƒæ®ÔÇì­ƒæª [w|s] ­ƒºæ ­ƒæ¿ ­ƒºö X
</pre>
You can run this online [http://phix.x10.mx/p2js/redact.htm here]. Note the windows console makes a complete mockery of those unicode characters.
 
=={{header|Raku}}==
7,806

edits