Idiomatically determine all the characters that can be used for symbols: Difference between revisions
Content added Content deleted
(→{{header|Phix}}: enhanced the compiler!) |
|||
Line 400: | Line 400: | ||
{ng2,length(ok2),ok2}} |
{ng2,length(ok2),ok2}} |
||
end function |
end function |
||
sequence r = check(0,127) |
sequence r = check(0,127) |
||
printf(1,"ansi characters:\n===============\n") |
printf(1,"ansi characters:\n===============\n") |
||
printf(1,"1st character: %d |
printf(1,"1st character: %d bad, %d OK %s\n",r[1]) |
||
printf(1,"2nd..nth char: %d |
printf(1,"2nd..nth char: %d bad, %d OK %s\n\n",r[2]) |
||
r = check(128,255) |
r = check(128,255) |
||
integer ok8 = 0, ng8 = 0 |
integer ok8 = 0, ng8 = 0 |
||
sequence good = "" |
|||
for i=#80 to #10FFFF do |
for i=#80 to #10FFFF do |
||
if i<#D800 or i>#DFFF then |
if i<#D800 or i>#DFFF then |
||
Line 424: | Line 424: | ||
if ok then |
if ok then |
||
ok8 += 1 |
ok8 += 1 |
||
good &= utf8&", " |
|||
else |
else |
||
ng8 += 1 |
ng8 += 1 |
||
Line 431: | Line 430: | ||
end for |
end for |
||
printf(1,"utf8 characters: \n===============\n") |
printf(1,"utf8 characters: \n===============\n") |
||
printf(1," |
printf(1,"bad:%,d, good:%,d\n",{ng8,ok8})</lang> |
||
if platform()=LINUX then |
|||
-- (comes out gibberish on a windows console...) |
|||
printf(1,"%s\n",{good}) |
|||
end if</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
ansi characters: |
ansi characters: |
||
=============== |
=============== |
||
1st character: 75 |
1st character: 75 bad, 53 OK ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz |
||
2nd..nth char: 65 |
2nd..nth char: 65 bad, 63 OK 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz |
||
utf8 characters: |
utf8 characters: |
||
=============== |
=============== |
||
bad:0, good:1,111,936 |
|||
good:48, bad:1111888 |
|||
, Έ, Δ, Κ, Σ, λ, π, ψ, ϔ, Ϛ, ϣ, ϻ, , , —, ‚, ‣, ※, ∀, ∈, ∔, √, ∣, ∻, ─, ┈, └, ┚, ┣, ┻, ⚀, ⚈, ⚔, ⚚, ⚣, ⚻, ⣀, ⣈, ⣔, ⣚, ⣣, ⣻, ⻀, ⻈, ⻔, ⻚, ⻣, , |
|||
</pre> |
</pre> |
||
Note that |
Note that versions prior to 0.8.1 only permit a mere 48 utf8 characters. |
||
<lang Phix>charset[#80] = LETTER -- more unicode |
|||
charset[#88] = LETTER -- more unicode |
|||
charset[#94] = LETTER -- for rosettacode/unicode (as ptok.e is not stored in utf8) |
|||
charset[#9A] = LETTER -- for rosettacode/unicode |
|||
charset[#A3] = LETTER -- for rosettacode/unicode |
|||
charset[#BB] = LETTER -- for rosettacode/unicode |
|||
charset[#CE] = LETTER -- for rosettacode/unicode |
|||
charset[#CF] = LETTER |
|||
charset[#E2] = LETTER</lang> |
|||
If that is extended (with more utf-8 handling) then obviously the output will change.<br> |
|||
I am a little surprised at just how few ad-hoc utf8 characters have been supported so far. |
|||
=={{header|Python}}== |
=={{header|Python}}== |