XXXX redacted: Difference between revisions
→{{header|Go}}: Added 'stretch' goal.
m (→{{header|Phix}}: made test string match task desc, to simplify output) |
(→{{header|Go}}: Added 'stretch' goal.) |
||
Line 63:
=={{header|Go}}==
The stretch goal has been achieved at the expense of assuming that each emoji grapheme is always followed by white-space or is at the end of the text. Go has a problem with zero width joiner (ZWJ) emojis such as the final one in the test string which is not recognized as a single 'character' by the language as it consists of five Unicode code-points (or 'runes') instead of one. This problem is aggravated (as here) when one of the constituents of the ZWJ emoji happens to be a 'normal' emoji contained within the same test string! To get the number of 'X's right where a ZWJ emoji is being replaced, the code looks for its first zero width joiner character (U+200d) and skips to the next white-space character, if there is one, after that.
<lang go>package main
Line 70 ⟶ 72:
"regexp"
"strings"
"unicode"
"unicode/utf8"
)
func findNextSpace(runes []rune) int {
for i, r := range runes {
if unicode.IsSpace(r) {
return i
}
}
return -1
}
func redact(text, word, opts string) {
Line 102 ⟶ 114:
case "[p|i|o]", "[w|i|o]":
exp = `(?i)\b\w*` + word + `\w*\b`
case "[w]":
exp = word + `(\s|$)`
}
rgx := regexp.MustCompile(exp)
if opts
rf := func(match string) string {
var res []rune
runes := []rune(match)
for i := 0; i < len(runes); i++ {
r := runes[i]
if r == '\u200d' { // zero width joiner character
ix := findNextSpace(runes[i+1:])
if ix == -1 {
break
} else {
i += ix + 1 // skip to next space
res = append(res, runes[i])
continue
}
}
if unicode.IsSpace(r) {
res = append(res, r)
} else {
res = append(res, 'X')
}
}
return string(res)
}
text = rgx.ReplaceAllStringFunc(text, rf)
} else if len(opts) == 7 && opts[5] == 'n' {
repl := strings.Repeat("X", utf8.RuneCountInString(word))
text = rgx.ReplaceAllLiteralString(text, repl)
Line 138 ⟶ 177:
redact(text, word, opts)
}
fmt.Println()
}
text = "🧑 👨 🧔 👨👩👦"
for _, word := range []string{"👨", "👨👩👦"} {
fmt.Printf("Redact '%s':\n", word)
redact(text, word, "[w]")
fmt.Println()
}
Line 202 ⟶ 247:
[p|i|o] XXX? XXXX XXXXXX XXXXXX is in his XXXXXXX while playing XXX "XXXXXXX" brand XXXXXXXX. XXXXXX so XXX.
XXXX very XXXXXX, XXXXX you XXXXX?
Redact '👨':
[w] 🧑 X 🧔 👨👩👦
Redact '👨👩👦':
[w] 🧑 👨 🧔 X
</pre>
|