Strip control codes and extended characters from a string: Difference between revisions
Content added Content deleted
(→{{header|PicoLisp}}: add perl 6) |
(Go solution) |
||
Line 76: | Line 76: | ||
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ |
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ |
||
!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~</lang> |
!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~</lang> |
||
=={{header|Go}}== |
|||
Go works for ASCII and non-ASCII systems. The first pair of functions below interpret strings as byte strings, presumably useful for strings consisting of ASCII and 8-bit extended ASCII data. The second pair of functions interpret strings as UTF-8. |
|||
<lang go>package main |
|||
import ( |
|||
"fmt" |
|||
"strings" |
|||
) |
|||
// two byte-oriented functions identical except for operator comparing c to 127. |
|||
func stripCtlFromBytes(str string) string { |
|||
b := make([]byte, len(str)) |
|||
var bl int |
|||
for i := 0; i < len(str); i++ { |
|||
c := str[i] |
|||
if c >= 32 && c != 127 { |
|||
b[bl] = c |
|||
bl++ |
|||
} |
|||
} |
|||
return string(b[:bl]) |
|||
} |
|||
func stripCtlAndExtFromBytes(str string) string { |
|||
b := make([]byte, len(str)) |
|||
var bl int |
|||
for i := 0; i < len(str); i++ { |
|||
c := str[i] |
|||
if c >= 32 && c < 127 { |
|||
b[bl] = c |
|||
bl++ |
|||
} |
|||
} |
|||
return string(b[:bl]) |
|||
} |
|||
// two UTF-8 functions identical except for operator comparing c to 127 |
|||
func stripCtlFromUTF8(str string) string { |
|||
return strings.Map(func(rune int) int { |
|||
if rune >= 32 && rune != 127 { |
|||
return rune |
|||
} |
|||
return -1 |
|||
}, str) |
|||
} |
|||
func stripCtlAndExtFromUTF8(str string) string { |
|||
return strings.Map(func(rune int) int { |
|||
if rune >= 32 && rune < 127 { |
|||
return rune |
|||
} |
|||
return -1 |
|||
}, str) |
|||
} |
|||
const src = "déjà vu" + // precomposed unicode |
|||
"\n\000\037 \041\176\177\200\377\n" + // various boundary cases |
|||
"as⃝df̅" // unicode combining characters |
|||
func main() { |
|||
fmt.Println("source text:") |
|||
fmt.Println(src, "\n") |
|||
fmt.Println("as bytes, stripped of control codes:") |
|||
fmt.Println(stripCtlFromBytes(src), "\n") |
|||
fmt.Println("as bytes, stripped of control codes and extended characters:") |
|||
fmt.Println(stripCtlAndExtFromBytes(src), "\n") |
|||
fmt.Println("as UTF-8, stripped of control codes:") |
|||
fmt.Println(stripCtlFromUTF8(src), "\n") |
|||
fmt.Println("as UTF-8, stripped of control codes and extended characters:") |
|||
fmt.Println(stripCtlAndExtFromUTF8(src)) |
|||
} |
|||
</lang> |
|||
Output: (varies with display configuration) |
|||
<pre> |
|||
source text: |
|||
déjà vu |
|||
� !~�� |
|||
as⃝df̅ |
|||
as bytes, stripped of control codes: |
|||
déjà vu !~��as⃝df̅ |
|||
as bytes, stripped of control codes and extended characters: |
|||
dj vu !~asdf |
|||
as UTF-8, stripped of control codes: |
|||
déjà vu !~��as⃝df̅ |
|||
as UTF-8, stripped of control codes and extended characters: |
|||
dj vu !~asdf |
|||
</pre> |
|||
=={{header|Icon}} and {{header|Unicon}}== |
=={{header|Icon}} and {{header|Unicon}}== |