Category talk:Wren-str: Difference between revisions

→‎Source code: Added 'unaccent' method.
(→‎Source code: Added Str.toNum method.)
(→‎Source code: Added 'unaccent' method.)
Line 236:
var words = s.split(" ")
return Strs.join(words.map { |w| capitalize(w) }.toList, " ")
}
 
// Removes accents and cedillas from all Latin-1 supplement characters in a string
// and also expands digraphs before returning the result.
static unaccent(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var accented = [
"àáâãäå", "ÀÁÂÃÄÅ", "ç", "Ç", "ð", "Ð", "èéêë", "ÈÉÊË", "ìíîï", "ÌÍÎÏ",
"ñ", "Ñ", "òóôõöø", "ÒÓÔÕÖØ", "ùúûü", "ÙÚÛÜ", "ýÿ", "Ý"
]
var unaccented = "aAcCdDeEiInNoOuUyY"
var digraphs = { "æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss" }
var r = ""
var chars = s.toList
var count = chars.count
var i = 0
for (c in s.codePoints) {
if (c >= 0xc0 && c <= 0xff) {
var found = false
for (j in 0...accented.count) {
if (accented[j].indexOf(chars[i]) >= 0) {
chars[i] = unaccented[j]
found = true
break
}
}
if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]]
}
i = i + 1
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
Line 913 ⟶ 945:
var words = s.split(" ")
return Strs.join(words.map { |w| capitalize(w) }.toList, " ")
}
 
// Removes accents and other diacritical marks from all characters in a string,
// expands digraphs and removes all combining characters before returning the result.
// As well as Latin-1 Supplement, coverage includes Latin Extended-A and various
// other characters found in modern European languages which use the Latin alphabet.
static unaccent(s) {
if (!(s is String)) s = "%(s)"
if (s == "") return s
var accented = [
"àáâãäåāăą", "ÀÁÂÃÄÅĀĂĄ", "ḃ", "Ḃ", "çćĉċč", "ÇĆĈĊČ", "ðďđḋ", "ÐĎĐḊ",
"èéêëēĕėęě", "ÈÉÊËĒĔĖĘĚ", "ḟ", "Ḟ", "ĝğġģ", "ĜĞĠĢ", "ĥħ", "ĤĦ",
"ìíîïĩīĭįı", "ÌÍÎÏĨĪĬĮİ", "Ĵ", "Ĵ", "ķĸ", "Ķ", "ĺļľŀł", "ĹĻĽĿŁ",
"ṁ", "Ṁ", "ñńņňʼn", "ÑŃŅŇ", "òóôõöøōŏő", "ÒÓÔÕÖØŌŎŐ", "ṗ", "Ṗ",
"ŕŗř", "ŔŖŘ", "śŝşšșſ", "ŚŜŞŠȘ", "ţťŧṱț", "ŢŤŦṰȚ", "ùúûüũūŭůűų",
"ÙÚÛÜŨŪŬŮŰŲ", "ŵẁẃẅ", "ŴẀẂẄ", "ýÿỳŷ", "ÝŸỲŶ", "źżž", "ŹŻŽ"
]
var unaccented = "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPrRsStTuUwWyYzZ"
var digraphs = {
"æ": "ae", "Æ": "AE", "þ": "th", "Þ": "TH", "ß": "ss", "ẞ": "SS",
"ij": "ij", "IJ": "IJ", "ŋ": "ng", "Ŋ": "NG", "œ": "OE", "Œ": "OE"
}
var r = ""
var chars = s.toList
var count = chars.count
var i = 0
for (c in s.codePoints) {
if ((c >= 0x00c0 && c <= 0x012B) || c >= 0x1e02 && c <= 0x1e9e) {
var found = false
for (j in 0...accented.count) {
if (accented[j].indexOf(chars[i]) >= 0) {
chars[i] = unaccented[j]
found = true
break
}
}
if (!found && digraphs.containsKey(chars[i])) chars[i] = digraphs[chars[i]]
} else if (c >= 0x0300 && c <= 0x036F) chars[i] = ""
i = i + 1
}
return (count < 1000) ? Strs.concat_(chars) : Strs.concat(chars, 1000)
}
 
9,476

edits