Category talk:Wren-upc: Difference between revisions

Undo revision 308909 by PureFox (talk)
(→‎User-perceived characters: Added source code (first part))
(Undo revision 308909 by PureFox (talk))
Line 5:
 
Although the source code file is large by Wren library standards (over 1900 lines), approximately 1600 lines of this are needed to describe the property table which provides the raw material for text segmentation. In the interests of brevity, I have omitted the comments which accompanied the original table which should be referred to if any explanation is needed.
 
===Source code===
<lang ecmascript>/* Module "upc.wren" */
 
/*
Properties encapsulates the Unicode properties relevant to this module and, as well as, exposing
those properties themselves contains a single public method to return the relevant property
for a given Unicode codepoint.
*/
class Properties {
// Unicode properties relevant to this module.
static any { 0 }
static prepend { 1 }
static cr { 2 }
static lf { 3 }
static control { 4 }
static extend { 5 }
static regionalIndicator { 6 }
static spacingMark { 7 }
static l { 8 }
static v { 9 }
static t { 10 }
static lv { 11 }
static lvt { 12 }
static zwj { 13 }
static extendedPictographic { 14 }
 
// Private method to initialize codepoints table.
// The latter maps an inclusive range of codepoints to a Unicode property.
// The ranges are in order to enable a binary search of the table by codepoint.
static init_() {
__codePoints = [
[0x0000, 0x0009, control],
[0x000A, 0x000A, lf],
[0x000B, 0x000C, control],
[0x000D, 0x000D, cr],
[0x000E, 0x001F, control],
[0x007F, 0x009F, control],
[0x00A9, 0x00A9, extendedPictographic],
[0x00AD, 0x00AD, control],
[0x00AE, 0x00AE, extendedPictographic],
[0x0300, 0x036F, extend],
[0x0483, 0x0487, extend],
[0x0488, 0x0489, extend],
[0x0591, 0x05BD, extend],
[0x05BF, 0x05BF, extend],
[0x05C1, 0x05C2, extend],
[0x05C4, 0x05C5, extend],
[0x05C7, 0x05C7, extend],
[0x0600, 0x0605, prepend],
[0x0610, 0x061A, extend],
[0x061C, 0x061C, control],
[0x064B, 0x065F, extend],
[0x0670, 0x0670, extend],
[0x06D6, 0x06DC, extend],
[0x06DD, 0x06DD, prepend],
[0x06DF, 0x06E4, extend],
[0x06E7, 0x06E8, extend],
[0x06EA, 0x06ED, extend],
[0x070F, 0x070F, prepend],
[0x0711, 0x0711, extend],
[0x0730, 0x074A, extend],
[0x07A6, 0x07B0, extend],
[0x07EB, 0x07F3, extend],
[0x07FD, 0x07FD, extend],
[0x0816, 0x0819, extend],
[0x081B, 0x0823, extend],
[0x0825, 0x0827, extend],
[0x0829, 0x082D, extend],
[0x0859, 0x085B, extend],
[0x08D3, 0x08E1, extend],
[0x08E2, 0x08E2, prepend],
[0x08E3, 0x0902, extend],
[0x0903, 0x0903, spacingMark],
[0x093A, 0x093A, extend],
[0x093B, 0x093B, spacingMark],
[0x093C, 0x093C, extend],
[0x093E, 0x0940, spacingMark],
[0x0941, 0x0948, extend],
[0x0949, 0x094C, spacingMark],
[0x094D, 0x094D, extend],
[0x094E, 0x094F, spacingMark],
[0x0951, 0x0957, extend],
[0x0962, 0x0963, extend],
[0x0981, 0x0981, extend],
[0x0982, 0x0983, spacingMark],
[0x09BC, 0x09BC, extend],
[0x09BE, 0x09BE, extend],
[0x09BF, 0x09C0, spacingMark],
[0x09C1, 0x09C4, extend],
[0x09C7, 0x09C8, spacingMark],
[0x09CB, 0x09CC, spacingMark],
[0x09CD, 0x09CD, extend],
[0x09D7, 0x09D7, extend],
[0x09E2, 0x09E3, extend],
[0x09FE, 0x09FE, extend],
[0x0A01, 0x0A02, extend],
[0x0A03, 0x0A03, spacingMark],
[0x0A3C, 0x0A3C, extend],
[0x0A3E, 0x0A40, spacingMark],
[0x0A41, 0x0A42, extend],
[0x0A47, 0x0A48, extend],
[0x0A4B, 0x0A4D, extend],
[0x0A51, 0x0A51, extend],
[0x0A70, 0x0A71, extend],
[0x0A75, 0x0A75, extend],
[0x0A81, 0x0A82, extend],
[0x0A83, 0x0A83, spacingMark],
[0x0ABC, 0x0ABC, extend],
[0x0ABE, 0x0AC0, spacingMark],
[0x0AC1, 0x0AC5, extend],
[0x0AC7, 0x0AC8, extend],
[0x0AC9, 0x0AC9, spacingMark],
[0x0ACB, 0x0ACC, spacingMark],
[0x0ACD, 0x0ACD, extend],
[0x0AE2, 0x0AE3, extend],
[0x0AFA, 0x0AFF, extend],
[0x0B01, 0x0B01, extend],
[0x0B02, 0x0B03, spacingMark],
[0x0B3C, 0x0B3C, extend],
[0x0B3E, 0x0B3E, extend],
[0x0B3F, 0x0B3F, extend],
[0x0B40, 0x0B40, spacingMark],
[0x0B41, 0x0B44, extend],
[0x0B47, 0x0B48, spacingMark],
[0x0B4B, 0x0B4C, spacingMark],
[0x0B4D, 0x0B4D, extend],
[0x0B56, 0x0B56, extend],
[0x0B57, 0x0B57, extend],
[0x0B62, 0x0B63, extend],
[0x0B82, 0x0B82, extend],
[0x0BBE, 0x0BBE, extend],
[0x0BBF, 0x0BBF, spacingMark],
[0x0BC0, 0x0BC0, extend],
[0x0BC1, 0x0BC2, spacingMark],
[0x0BC6, 0x0BC8, spacingMark],
[0x0BCA, 0x0BCC, spacingMark],
[0x0BCD, 0x0BCD, extend],
[0x0BD7, 0x0BD7, extend],
[0x0C00, 0x0C00, extend],
[0x0C01, 0x0C03, spacingMark],
[0x0C04, 0x0C04, extend],
[0x0C3E, 0x0C40, extend],
[0x0C41, 0x0C44, spacingMark],
[0x0C46, 0x0C48, extend],
[0x0C4A, 0x0C4D, extend],
[0x0C55, 0x0C56, extend],
[0x0C62, 0x0C63, extend],
[0x0C81, 0x0C81, extend],
[0x0C82, 0x0C83, spacingMark],
[0x0CBC, 0x0CBC, extend],
[0x0CBE, 0x0CBE, spacingMark],
[0x0CBF, 0x0CBF, extend],
[0x0CC0, 0x0CC1, spacingMark],
[0x0CC2, 0x0CC2, extend],
[0x0CC3, 0x0CC4, spacingMark],
[0x0CC6, 0x0CC6, extend],
[0x0CC7, 0x0CC8, spacingMark],
[0x0CCA, 0x0CCB, spacingMark],
[0x0CCC, 0x0CCD, extend],
[0x0CD5, 0x0CD6, extend],
[0x0CE2, 0x0CE3, extend],
[0x0D00, 0x0D01, extend],
[0x0D02, 0x0D03, spacingMark],
[0x0D3B, 0x0D3C, extend],
[0x0D3E, 0x0D3E, extend],
[0x0D3F, 0x0D40, spacingMark],
[0x0D41, 0x0D44, extend],
[0x0D46, 0x0D48, spacingMark],
[0x0D4A, 0x0D4C, spacingMark],
[0x0D4D, 0x0D4D, extend],
[0x0D4E, 0x0D4E, prepend],
[0x0D57, 0x0D57, extend],
[0x0D62, 0x0D63, extend],
[0x0D82, 0x0D83, spacingMark],
[0x0DCA, 0x0DCA, extend],
[0x0DCF, 0x0DCF, extend],
[0x0DD0, 0x0DD1, spacingMark],
[0x0DD2, 0x0DD4, extend],
[0x0DD6, 0x0DD6, extend],
[0x0DD8, 0x0DDE, spacingMark],
[0x0DDF, 0x0DDF, extend],
[0x0DF2, 0x0DF3, spacingMark],
[0x0E31, 0x0E31, extend],
[0x0E33, 0x0E33, spacingMark],
[0x0E34, 0x0E3A, extend],
[0x0E47, 0x0E4E, extend],
[0x0EB1, 0x0EB1, extend],
[0x0EB3, 0x0EB3, spacingMark],
[0x0EB4, 0x0EBC, extend],
[0x0EC8, 0x0ECD, extend],
[0x0F18, 0x0F19, extend],
[0x0F35, 0x0F35, extend],
[0x0F37, 0x0F37, extend],
[0x0F39, 0x0F39, extend],
[0x0F3E, 0x0F3F, spacingMark],
[0x0F71, 0x0F7E, extend],
[0x0F7F, 0x0F7F, spacingMark],
[0x0F80, 0x0F84, extend],
[0x0F86, 0x0F87, extend],
[0x0F8D, 0x0F97, extend],
[0x0F99, 0x0FBC, extend],
[0x0FC6, 0x0FC6, extend],
[0x102D, 0x1030, extend],
[0x1031, 0x1031, spacingMark],
[0x1032, 0x1037, extend],
[0x1039, 0x103A, extend],
[0x103B, 0x103C, spacingMark],
[0x103D, 0x103E, extend],
[0x1056, 0x1057, spacingMark],
[0x1058, 0x1059, extend],
[0x105E, 0x1060, extend],
[0x1071, 0x1074, extend],
[0x1082, 0x1082, extend],
[0x1084, 0x1084, spacingMark],
[0x1085, 0x1086, extend],
[0x108D, 0x108D, extend],
[0x109D, 0x109D, extend],
[0x1100, 0x115F, l],
[0x1160, 0x11A7, v],
[0x11A8, 0x11FF, t],
[0x135D, 0x135F, extend],
[0x1712, 0x1714, extend],
[0x1732, 0x1734, extend],
[0x1752, 0x1753, extend],
[0x1772, 0x1773, extend],
[0x17B4, 0x17B5, extend],
[0x17B6, 0x17B6, spacingMark],
[0x17B7, 0x17BD, extend],
[0x17BE, 0x17C5, spacingMark],
[0x17C6, 0x17C6, extend],
[0x17C7, 0x17C8, spacingMark],
[0x17C9, 0x17D3, extend],
[0x17DD, 0x17DD, extend],
[0x180B, 0x180D, extend],
[0x180E, 0x180E, control],
[0x1885, 0x1886, extend],
[0x18A9, 0x18A9, extend],
[0x1920, 0x1922, extend],
[0x1923, 0x1926, spacingMark],
[0x1927, 0x1928, extend],
[0x1929, 0x192B, spacingMark],
[0x1930, 0x1931, spacingMark],
[0x1932, 0x1932, extend],
[0x1933, 0x1938, spacingMark],
[0x1939, 0x193B, extend],
[0x1A17, 0x1A18, extend],
[0x1A19, 0x1A1A, spacingMark],
[0x1A1B, 0x1A1B, extend],
[0x1A55, 0x1A55, spacingMark],
[0x1A56, 0x1A56, extend],
[0x1A57, 0x1A57, spacingMark],
[0x1A58, 0x1A5E, extend],
[0x1A60, 0x1A60, extend],
[0x1A62, 0x1A62, extend],
[0x1A65, 0x1A6C, extend],
[0x1A6D, 0x1A72, spacingMark],
[0x1A73, 0x1A7C, extend],
[0x1A7F, 0x1A7F, extend],
[0x1AB0, 0x1ABD, extend],
[0x1ABE, 0x1ABE, extend],
[0x1B00, 0x1B03, extend],
[0x1B04, 0x1B04, spacingMark],
[0x1B34, 0x1B34, extend],
[0x1B35, 0x1B35, extend],
[0x1B36, 0x1B3A, extend],
[0x1B3B, 0x1B3B, spacingMark],
[0x1B3C, 0x1B3C, extend],
[0x1B3D, 0x1B41, spacingMark],
[0x1B42, 0x1B42, extend],
[0x1B43, 0x1B44, spacingMark],
[0x1B6B, 0x1B73, extend],
[0x1B80, 0x1B81, extend],
[0x1B82, 0x1B82, spacingMark],
[0x1BA1, 0x1BA1, spacingMark],
[0x1BA2, 0x1BA5, extend],
[0x1BA6, 0x1BA7, spacingMark],
[0x1BA8, 0x1BA9, extend],
[0x1BAA, 0x1BAA, spacingMark],
[0x1BAB, 0x1BAD, extend],
[0x1BE6, 0x1BE6, extend],
[0x1BE7, 0x1BE7, spacingMark],
[0x1BE8, 0x1BE9, extend],
[0x1BEA, 0x1BEC, spacingMark],
[0x1BED, 0x1BED, extend],
[0x1BEE, 0x1BEE, spacingMark],
[0x1BEF, 0x1BF1, extend],
[0x1BF2, 0x1BF3, spacingMark],
[0x1C24, 0x1C2B, spacingMark],
[0x1C2C, 0x1C33, extend],
[0x1C34, 0x1C35, spacingMark],
[0x1C36, 0x1C37, extend],
[0x1CD0, 0x1CD2, extend],
[0x1CD4, 0x1CE0, extend],
[0x1CE1, 0x1CE1, spacingMark],
[0x1CE2, 0x1CE8, extend],
[0x1CED, 0x1CED, extend],
[0x1CF4, 0x1CF4, extend],
[0x1CF7, 0x1CF7, spacingMark],
[0x1CF8, 0x1CF9, extend],
[0x1DC0, 0x1DF9, extend],
[0x1DFB, 0x1DFF, extend],
[0x200B, 0x200B, control],
[0x200C, 0x200C, extend],
[0x200D, 0x200D, zwj],
[0x200E, 0x200F, control],
[0x2028, 0x2028, control],
[0x2029, 0x2029, control],
[0x202A, 0x202E, control],
[0x203C, 0x203C, extendedPictographic],
[0x2049, 0x2049, extendedPictographic],
[0x2060, 0x2064, control],
[0x2065, 0x2065, control],
[0x2066, 0x206F, control],
[0x20D0, 0x20DC, extend],
[0x20DD, 0x20E0, extend],
[0x20E1, 0x20E1, extend],
[0x20E2, 0x20E4, extend],
[0x20E5, 0x20F0, extend],
[0x2122, 0x2122, extendedPictographic],
[0x2139, 0x2139, extendedPictographic],
[0x2194, 0x2199, extendedPictographic],
[0x21A9, 0x21AA, extendedPictographic],
[0x231A, 0x231B, extendedPictographic],
[0x2328, 0x2328, extendedPictographic],
[0x2388, 0x2388, extendedPictographic],
[0x23CF, 0x23CF, extendedPictographic],
[0x23E9, 0x23F3, extendedPictographic],
[0x23F8, 0x23FA, extendedPictographic],
[0x24C2, 0x24C2, extendedPictographic],
[0x25AA, 0x25AB, extendedPictographic],
[0x25B6, 0x25B6, extendedPictographic],
[0x25C0, 0x25C0, extendedPictographic],
[0x25FB, 0x25FE, extendedPictographic],
[0x2600, 0x2605, extendedPictographic],
[0x2607, 0x2612, extendedPictographic],
[0x2614, 0x2615, extendedPictographic],
[0x2616, 0x2617, extendedPictographic],
[0x2618, 0x2618, extendedPictographic],
[0x2619, 0x2619, extendedPictographic],
[0x261A, 0x266F, extendedPictographic],
[0x2670, 0x2671, extendedPictographic],
[0x2672, 0x267D, extendedPictographic],
[0x267E, 0x267F, extendedPictographic],
[0x2680, 0x2685, extendedPictographic],
[0x2690, 0x2691, extendedPictographic],
[0x2692, 0x269C, extendedPictographic],
[0x269D, 0x269D, extendedPictographic],
[0x269E, 0x269F, extendedPictographic],
[0x26A0, 0x26A1, extendedPictographic],
[0x26A2, 0x26B1, extendedPictographic],
[0x26B2, 0x26B2, extendedPictographic],
[0x26B3, 0x26BC, extendedPictographic],
[0x26BD, 0x26BF, extendedPictographic],
[0x26C0, 0x26C3, extendedPictographic],
[0x26C4, 0x26CD, extendedPictographic],
[0x26CE, 0x26CE, extendedPictographic],
[0x26CF, 0x26E1, extendedPictographic],
[0x26E2, 0x26E2, extendedPictographic],
[0x26E3, 0x26E3, extendedPictographic],
[0x26E4, 0x26E7, extendedPictographic],
[0x26E8, 0x26FF, extendedPictographic],
[0x2700, 0x2700, extendedPictographic],
[0x2701, 0x2704, extendedPictographic],
[0x2705, 0x2705, extendedPictographic],
[0x2708, 0x2709, extendedPictographic],
[0x270A, 0x270B, extendedPictographic],
[0x270C, 0x2712, extendedPictographic],
[0x2714, 0x2714, extendedPictographic],
[0x2716, 0x2716, extendedPictographic],
[0x271D, 0x271D, extendedPictographic],
[0x2721, 0x2721, extendedPictographic],
[0x2728, 0x2728, extendedPictographic],
[0x2733, 0x2734, extendedPictographic],
[0x2744, 0x2744, extendedPictographic],
[0x2747, 0x2747, extendedPictographic],
[0x274C, 0x274C, extendedPictographic],
[0x274E, 0x274E, extendedPictographic],
[0x2753, 0x2755, extendedPictographic],
[0x2757, 0x2757, extendedPictographic],
[0x2763, 0x2767, extendedPictographic],
[0x2795, 0x2797, extendedPictographic],
[0x27A1, 0x27A1, extendedPictographic],
[0x27B0, 0x27B0, extendedPictographic],
[0x27BF, 0x27BF, extendedPictographic],
[0x2934, 0x2935, extendedPictographic],
[0x2B05, 0x2B07, extendedPictographic],
[0x2B1B, 0x2B1C, extendedPictographic],
[0x2B50, 0x2B50, extendedPictographic],
[0x2B55, 0x2B55, extendedPictographic],
[0x2CEF, 0x2CF1, extend],
[0x2D7F, 0x2D7F, extend],
[0x2DE0, 0x2DFF, extend],
[0x302A, 0x302D, extend],
[0x302E, 0x302F, extend],
[0x3030, 0x3030, extendedPictographic],
[0x303D, 0x303D, extendedPictographic],
[0x3099, 0x309A, extend],
[0x3297, 0x3297, extendedPictographic],
[0x3299, 0x3299, extendedPictographic],
 
}
</lang>
9,476

edits