Strip control codes and extended characters from a string: Difference between revisions

Content added Content deleted

Inline

@@ Line 76: / Line 76: @@
  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
  !"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~</lang>
+=={{header|Go}}==
+Go works for ASCII and non-ASCII systems.  The first pair of functions below interpret strings as byte strings, presumably useful for strings consisting of ASCII and 8-bit extended ASCII data.  The second pair of functions interpret strings as UTF-8.
+<lang go>package main
+import (
+    "fmt"
+    "strings"
+)
+// two byte-oriented functions identical except for operator comparing c to 127.
+func stripCtlFromBytes(str string) string {
+    b := make([]byte, len(str))
+    var bl int
+    for i := 0; i < len(str); i++ {
+        c := str[i]
+        if c >= 32 && c != 127 {
+            b[bl] = c
+            bl++
+        }
+    }
+    return string(b[:bl])
+}
+func stripCtlAndExtFromBytes(str string) string {
+    b := make([]byte, len(str))
+    var bl int
+    for i := 0; i < len(str); i++ {
+        c := str[i]
+        if c >= 32 && c < 127 {
+            b[bl] = c
+            bl++
+        }
+    }
+    return string(b[:bl])
+}
+// two UTF-8 functions identical except for operator comparing c to 127
+func stripCtlFromUTF8(str string) string {
+    return strings.Map(func(rune int) int {
+        if rune >= 32 && rune != 127 {
+            return rune
+        }
+        return -1
+    }, str)
+}
+func stripCtlAndExtFromUTF8(str string) string {
+    return strings.Map(func(rune int) int {
+        if rune >= 32 && rune < 127 {
+            return rune
+        }
+        return -1
+    }, str)
+}
+const src = "déjà vu" +                    // precomposed unicode
+    "\n\000\037 \041\176\177\200\377\n" +  // various boundary cases
+    "as⃝df̅"                               // unicode combining characters
+func main() {
+    fmt.Println("source text:")
+    fmt.Println(src, "\n")
+    fmt.Println("as bytes, stripped of control codes:")
+    fmt.Println(stripCtlFromBytes(src), "\n")
+    fmt.Println("as bytes, stripped of control codes and extended characters:")
+    fmt.Println(stripCtlAndExtFromBytes(src), "\n")
+    fmt.Println("as UTF-8, stripped of control codes:")
+    fmt.Println(stripCtlFromUTF8(src), "\n")
+    fmt.Println("as UTF-8, stripped of control codes and extended characters:")
+    fmt.Println(stripCtlAndExtFromUTF8(src))
+}
+</lang>
+Output: (varies with display configuration)
+<pre>
+source text:
+déjà vu
+� !~��
+as⃝df̅
+as bytes, stripped of control codes:
+déjà vu !~��as⃝df̅
+as bytes, stripped of control codes and extended characters:
+dj vu !~asdf
+as UTF-8, stripped of control codes:
+déjà vu !~��as⃝df̅
+as UTF-8, stripped of control codes and extended characters:
+dj vu !~asdf
+</pre>
 =={{header|Icon}} and {{header|Unicon}}==