Strip control codes and extended characters from a string: Difference between revisions

Content added Content deleted
(Go solution)
Line 76: Line 76:
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~</lang>
!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~</lang>
=={{header|Go}}==
Go works for ASCII and non-ASCII systems. The first pair of functions below interpret strings as byte strings, presumably useful for strings consisting of ASCII and 8-bit extended ASCII data. The second pair of functions interpret strings as UTF-8.
<lang go>package main

import (
"fmt"
"strings"
)

// two byte-oriented functions identical except for operator comparing c to 127.
func stripCtlFromBytes(str string) string {
b := make([]byte, len(str))
var bl int
for i := 0; i < len(str); i++ {
c := str[i]
if c >= 32 && c != 127 {
b[bl] = c
bl++
}
}
return string(b[:bl])
}

func stripCtlAndExtFromBytes(str string) string {
b := make([]byte, len(str))
var bl int
for i := 0; i < len(str); i++ {
c := str[i]
if c >= 32 && c < 127 {
b[bl] = c
bl++
}
}
return string(b[:bl])
}

// two UTF-8 functions identical except for operator comparing c to 127
func stripCtlFromUTF8(str string) string {
return strings.Map(func(rune int) int {
if rune >= 32 && rune != 127 {
return rune
}
return -1
}, str)
}

func stripCtlAndExtFromUTF8(str string) string {
return strings.Map(func(rune int) int {
if rune >= 32 && rune < 127 {
return rune
}
return -1
}, str)
}

const src = "déjà vu" + // precomposed unicode
"\n\000\037 \041\176\177\200\377\n" + // various boundary cases
"as⃝df̅" // unicode combining characters

func main() {
fmt.Println("source text:")
fmt.Println(src, "\n")
fmt.Println("as bytes, stripped of control codes:")
fmt.Println(stripCtlFromBytes(src), "\n")
fmt.Println("as bytes, stripped of control codes and extended characters:")
fmt.Println(stripCtlAndExtFromBytes(src), "\n")
fmt.Println("as UTF-8, stripped of control codes:")
fmt.Println(stripCtlFromUTF8(src), "\n")
fmt.Println("as UTF-8, stripped of control codes and extended characters:")
fmt.Println(stripCtlAndExtFromUTF8(src))
}
</lang>
Output: (varies with display configuration)
<pre>
source text:
déjà vu
� !~��
as⃝df̅

as bytes, stripped of control codes:
déjà vu !~��as⃝df̅

as bytes, stripped of control codes and extended characters:
dj vu !~asdf

as UTF-8, stripped of control codes:
déjà vu !~��as⃝df̅

as UTF-8, stripped of control codes and extended characters:
dj vu !~asdf
</pre>


=={{header|Icon}} and {{header|Unicon}}==
=={{header|Icon}} and {{header|Unicon}}==