Idiomatically determine all the characters that can be used for symbols: Difference between revisions

Content added Content deleted

Inline

@@ Line 400: / Line 400: @@
             {ng2,length(ok2),ok2}}
 end function
 sequence r = check(0,127)
 printf(1,"ansi characters:\n===============\n")
-printf(1,"1st character: %d no good, %d OK %s\n",r[1])
+printf(1,"1st character: %d bad, %d OK %s\n",r[1])
-printf(1,"2nd..nth char: %d no good, %d OK %s\n\n",r[2])
+printf(1,"2nd..nth char: %d bad, %d OK %s\n\n",r[2])
 r = check(128,255)
 integer ok8 = 0, ng8 = 0
-sequence good = ""
 for i=#80 to #10FFFF do
     if i<#D800 or i>#DFFF then
@@ Line 424: / Line 424: @@
         if ok then
             ok8 += 1
-            good &= utf8&", "
         else
             ng8 += 1
@@ Line 431: / Line 430: @@
 end for
 printf(1,"utf8 characters:   \n===============\n")
-printf(1,"good:%d, bad:%d\n",{ok8,ng8})
+printf(1,"bad:%,d, good:%,d\n",{ng8,ok8})</lang>
-if platform()=LINUX then
-    -- (comes out gibberish on a windows console...)
-    printf(1,"%s\n",{good})
-end if</lang>
 {{out}}
 <pre>
 ansi characters:
 ===============
-st character: 75 no good, 53 OK ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
+st character: 75 bad, 53 OK ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
-nd..nth char: 65 no good, 63 OK 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
+nd..nth char: 65 bad, 63 OK 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
 utf8 characters:
 ===============
+bad:0, good:1,111,936
-good:48, bad:1111888
-΀, Έ, Δ, Κ, Σ, λ, π, ψ, ϔ, Ϛ, ϣ, ϻ,  ,  , —, ‚, ‣, ※, ∀, ∈, ∔, √, ∣, ∻, ─, ┈, └, ┚, ┣, ┻, ⚀, ⚈, ⚔, ⚚, ⚣, ⚻, ⣀, ⣈, ⣔, ⣚, ⣣, ⣻, ⻀, ⻈, ⻔, ⻚, ⻣, ⻻,
 </pre>
-Note that ptok.e (part of the compiler) currently contains the following:
+Note that versions prior to 0.8.1 only permit a mere 48 utf8 characters.
-<lang Phix>charset[#80] = LETTER   -- more unicode
-charset[#88] = LETTER   -- more unicode
-charset[#94] = LETTER   -- for rosettacode/unicode (as ptok.e is not stored in utf8)
-charset[#9A] = LETTER   -- for rosettacode/unicode
-charset[#A3] = LETTER   -- for rosettacode/unicode
-charset[#BB] = LETTER   -- for rosettacode/unicode
-charset[#CE] = LETTER   -- for rosettacode/unicode
-charset[#CF] = LETTER
-charset[#E2] = LETTER</lang>
-If that is extended (with more utf-8 handling) then obviously the output will change.<br>
-I am a little surprised at just how few ad-hoc utf8 characters have been supported so far.
 =={{header|Python}}==