Category talk:Wren-str: Difference between revisions

→‎Source code: Added Utf8.fromWin1252 method.
(→‎Source code: Added Greek class.)
(→‎Source code: Added Utf8.fromWin1252 method.)
Line 560:
return (b0 & b4Mask) << 18 | (b[1] & mbMask) << 12 | (b[2] & mbMask) << 6 | (b[3] & mbMask)
}
}
 
// Converts a Windows-1252 encoded byte string to a UTF-8 encoded string.
static fromWin1252(win1252) {
if (!(win1252 is String)) System.print("Argument must be a byte string.")
if (win1252.count == 0) return ""
// mapping for Windows 1252 bytes 128-159.
// Unused bytes are mapped to the corresponding ISO-8859-1 control codes.
var bm = [
0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f,
0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178
]
var bytes = win1252.bytes
var utf8 = List.filled(bytes.count, 0)
for (i in 0...bytes.count) {
var b = bytes[i]
if (b < 128 || b > 159) {
utf8[i] = String.fromCodePoint(b)
} else {
utf8[i] = String.fromCodePoint(bm[b-128])
}
}
return utf8.join()
}
}
9,482

edits