Category talk:Wren-upc: Difference between revisions

From Rosetta Code
Content added Content deleted
(Added description for new 'Wren-upc' module.)
 
(→‎User-perceived characters: Added source code (first part))
Line 5: Line 5:


Although the source code file is large by Wren library standards (over 1900 lines), approximately 1600 lines of this are needed to describe the property table which provides the raw material for text segmentation. In the interests of brevity, I have omitted the comments which accompanied the original table which should be referred to if any explanation is needed.
Although the source code file is large by Wren library standards (over 1900 lines), approximately 1600 lines of this are needed to describe the property table which provides the raw material for text segmentation. In the interests of brevity, I have omitted the comments which accompanied the original table which should be referred to if any explanation is needed.

===Source code===
<lang ecmascript>/* Module "upc.wren" */

/*
Properties encapsulates the Unicode properties relevant to this module and, as well as, exposing
those properties themselves contains a single public method to return the relevant property
for a given Unicode codepoint.
*/
class Properties {
// Unicode properties relevant to this module.
static any { 0 }
static prepend { 1 }
static cr { 2 }
static lf { 3 }
static control { 4 }
static extend { 5 }
static regionalIndicator { 6 }
static spacingMark { 7 }
static l { 8 }
static v { 9 }
static t { 10 }
static lv { 11 }
static lvt { 12 }
static zwj { 13 }
static extendedPictographic { 14 }

// Private method to initialize codepoints table.
// The latter maps an inclusive range of codepoints to a Unicode property.
// The ranges are in order to enable a binary search of the table by codepoint.
static init_() {
__codePoints = [
[0x0000, 0x0009, control],
[0x000A, 0x000A, lf],
[0x000B, 0x000C, control],
[0x000D, 0x000D, cr],
[0x000E, 0x001F, control],
[0x007F, 0x009F, control],
[0x00A9, 0x00A9, extendedPictographic],
[0x00AD, 0x00AD, control],
[0x00AE, 0x00AE, extendedPictographic],
[0x0300, 0x036F, extend],
[0x0483, 0x0487, extend],
[0x0488, 0x0489, extend],
[0x0591, 0x05BD, extend],
[0x05BF, 0x05BF, extend],
[0x05C1, 0x05C2, extend],
[0x05C4, 0x05C5, extend],
[0x05C7, 0x05C7, extend],
[0x0600, 0x0605, prepend],
[0x0610, 0x061A, extend],
[0x061C, 0x061C, control],
[0x064B, 0x065F, extend],
[0x0670, 0x0670, extend],
[0x06D6, 0x06DC, extend],
[0x06DD, 0x06DD, prepend],
[0x06DF, 0x06E4, extend],
[0x06E7, 0x06E8, extend],
[0x06EA, 0x06ED, extend],
[0x070F, 0x070F, prepend],
[0x0711, 0x0711, extend],
[0x0730, 0x074A, extend],
[0x07A6, 0x07B0, extend],
[0x07EB, 0x07F3, extend],
[0x07FD, 0x07FD, extend],
[0x0816, 0x0819, extend],
[0x081B, 0x0823, extend],
[0x0825, 0x0827, extend],
[0x0829, 0x082D, extend],
[0x0859, 0x085B, extend],
[0x08D3, 0x08E1, extend],
[0x08E2, 0x08E2, prepend],
[0x08E3, 0x0902, extend],
[0x0903, 0x0903, spacingMark],
[0x093A, 0x093A, extend],
[0x093B, 0x093B, spacingMark],
[0x093C, 0x093C, extend],
[0x093E, 0x0940, spacingMark],
[0x0941, 0x0948, extend],
[0x0949, 0x094C, spacingMark],
[0x094D, 0x094D, extend],
[0x094E, 0x094F, spacingMark],
[0x0951, 0x0957, extend],
[0x0962, 0x0963, extend],
[0x0981, 0x0981, extend],
[0x0982, 0x0983, spacingMark],
[0x09BC, 0x09BC, extend],
[0x09BE, 0x09BE, extend],
[0x09BF, 0x09C0, spacingMark],
[0x09C1, 0x09C4, extend],
[0x09C7, 0x09C8, spacingMark],
[0x09CB, 0x09CC, spacingMark],
[0x09CD, 0x09CD, extend],
[0x09D7, 0x09D7, extend],
[0x09E2, 0x09E3, extend],
[0x09FE, 0x09FE, extend],
[0x0A01, 0x0A02, extend],
[0x0A03, 0x0A03, spacingMark],
[0x0A3C, 0x0A3C, extend],
[0x0A3E, 0x0A40, spacingMark],
[0x0A41, 0x0A42, extend],
[0x0A47, 0x0A48, extend],
[0x0A4B, 0x0A4D, extend],
[0x0A51, 0x0A51, extend],
[0x0A70, 0x0A71, extend],
[0x0A75, 0x0A75, extend],
[0x0A81, 0x0A82, extend],
[0x0A83, 0x0A83, spacingMark],
[0x0ABC, 0x0ABC, extend],
[0x0ABE, 0x0AC0, spacingMark],
[0x0AC1, 0x0AC5, extend],
[0x0AC7, 0x0AC8, extend],
[0x0AC9, 0x0AC9, spacingMark],
[0x0ACB, 0x0ACC, spacingMark],
[0x0ACD, 0x0ACD, extend],
[0x0AE2, 0x0AE3, extend],
[0x0AFA, 0x0AFF, extend],
[0x0B01, 0x0B01, extend],
[0x0B02, 0x0B03, spacingMark],
[0x0B3C, 0x0B3C, extend],
[0x0B3E, 0x0B3E, extend],
[0x0B3F, 0x0B3F, extend],
[0x0B40, 0x0B40, spacingMark],
[0x0B41, 0x0B44, extend],
[0x0B47, 0x0B48, spacingMark],
[0x0B4B, 0x0B4C, spacingMark],
[0x0B4D, 0x0B4D, extend],
[0x0B56, 0x0B56, extend],
[0x0B57, 0x0B57, extend],
[0x0B62, 0x0B63, extend],
[0x0B82, 0x0B82, extend],
[0x0BBE, 0x0BBE, extend],
[0x0BBF, 0x0BBF, spacingMark],
[0x0BC0, 0x0BC0, extend],
[0x0BC1, 0x0BC2, spacingMark],
[0x0BC6, 0x0BC8, spacingMark],
[0x0BCA, 0x0BCC, spacingMark],
[0x0BCD, 0x0BCD, extend],
[0x0BD7, 0x0BD7, extend],
[0x0C00, 0x0C00, extend],
[0x0C01, 0x0C03, spacingMark],
[0x0C04, 0x0C04, extend],
[0x0C3E, 0x0C40, extend],
[0x0C41, 0x0C44, spacingMark],
[0x0C46, 0x0C48, extend],
[0x0C4A, 0x0C4D, extend],
[0x0C55, 0x0C56, extend],
[0x0C62, 0x0C63, extend],
[0x0C81, 0x0C81, extend],
[0x0C82, 0x0C83, spacingMark],
[0x0CBC, 0x0CBC, extend],
[0x0CBE, 0x0CBE, spacingMark],
[0x0CBF, 0x0CBF, extend],
[0x0CC0, 0x0CC1, spacingMark],
[0x0CC2, 0x0CC2, extend],
[0x0CC3, 0x0CC4, spacingMark],
[0x0CC6, 0x0CC6, extend],
[0x0CC7, 0x0CC8, spacingMark],
[0x0CCA, 0x0CCB, spacingMark],
[0x0CCC, 0x0CCD, extend],
[0x0CD5, 0x0CD6, extend],
[0x0CE2, 0x0CE3, extend],
[0x0D00, 0x0D01, extend],
[0x0D02, 0x0D03, spacingMark],
[0x0D3B, 0x0D3C, extend],
[0x0D3E, 0x0D3E, extend],
[0x0D3F, 0x0D40, spacingMark],
[0x0D41, 0x0D44, extend],
[0x0D46, 0x0D48, spacingMark],
[0x0D4A, 0x0D4C, spacingMark],
[0x0D4D, 0x0D4D, extend],
[0x0D4E, 0x0D4E, prepend],
[0x0D57, 0x0D57, extend],
[0x0D62, 0x0D63, extend],
[0x0D82, 0x0D83, spacingMark],
[0x0DCA, 0x0DCA, extend],
[0x0DCF, 0x0DCF, extend],
[0x0DD0, 0x0DD1, spacingMark],
[0x0DD2, 0x0DD4, extend],
[0x0DD6, 0x0DD6, extend],
[0x0DD8, 0x0DDE, spacingMark],
[0x0DDF, 0x0DDF, extend],
[0x0DF2, 0x0DF3, spacingMark],
[0x0E31, 0x0E31, extend],
[0x0E33, 0x0E33, spacingMark],
[0x0E34, 0x0E3A, extend],
[0x0E47, 0x0E4E, extend],
[0x0EB1, 0x0EB1, extend],
[0x0EB3, 0x0EB3, spacingMark],
[0x0EB4, 0x0EBC, extend],
[0x0EC8, 0x0ECD, extend],
[0x0F18, 0x0F19, extend],
[0x0F35, 0x0F35, extend],
[0x0F37, 0x0F37, extend],
[0x0F39, 0x0F39, extend],
[0x0F3E, 0x0F3F, spacingMark],
[0x0F71, 0x0F7E, extend],
[0x0F7F, 0x0F7F, spacingMark],
[0x0F80, 0x0F84, extend],
[0x0F86, 0x0F87, extend],
[0x0F8D, 0x0F97, extend],
[0x0F99, 0x0FBC, extend],
[0x0FC6, 0x0FC6, extend],
[0x102D, 0x1030, extend],
[0x1031, 0x1031, spacingMark],
[0x1032, 0x1037, extend],
[0x1039, 0x103A, extend],
[0x103B, 0x103C, spacingMark],
[0x103D, 0x103E, extend],
[0x1056, 0x1057, spacingMark],
[0x1058, 0x1059, extend],
[0x105E, 0x1060, extend],
[0x1071, 0x1074, extend],
[0x1082, 0x1082, extend],
[0x1084, 0x1084, spacingMark],
[0x1085, 0x1086, extend],
[0x108D, 0x108D, extend],
[0x109D, 0x109D, extend],
[0x1100, 0x115F, l],
[0x1160, 0x11A7, v],
[0x11A8, 0x11FF, t],
[0x135D, 0x135F, extend],
[0x1712, 0x1714, extend],
[0x1732, 0x1734, extend],
[0x1752, 0x1753, extend],
[0x1772, 0x1773, extend],
[0x17B4, 0x17B5, extend],
[0x17B6, 0x17B6, spacingMark],
[0x17B7, 0x17BD, extend],
[0x17BE, 0x17C5, spacingMark],
[0x17C6, 0x17C6, extend],
[0x17C7, 0x17C8, spacingMark],
[0x17C9, 0x17D3, extend],
[0x17DD, 0x17DD, extend],
[0x180B, 0x180D, extend],
[0x180E, 0x180E, control],
[0x1885, 0x1886, extend],
[0x18A9, 0x18A9, extend],
[0x1920, 0x1922, extend],
[0x1923, 0x1926, spacingMark],
[0x1927, 0x1928, extend],
[0x1929, 0x192B, spacingMark],
[0x1930, 0x1931, spacingMark],
[0x1932, 0x1932, extend],
[0x1933, 0x1938, spacingMark],
[0x1939, 0x193B, extend],
[0x1A17, 0x1A18, extend],
[0x1A19, 0x1A1A, spacingMark],
[0x1A1B, 0x1A1B, extend],
[0x1A55, 0x1A55, spacingMark],
[0x1A56, 0x1A56, extend],
[0x1A57, 0x1A57, spacingMark],
[0x1A58, 0x1A5E, extend],
[0x1A60, 0x1A60, extend],
[0x1A62, 0x1A62, extend],
[0x1A65, 0x1A6C, extend],
[0x1A6D, 0x1A72, spacingMark],
[0x1A73, 0x1A7C, extend],
[0x1A7F, 0x1A7F, extend],
[0x1AB0, 0x1ABD, extend],
[0x1ABE, 0x1ABE, extend],
[0x1B00, 0x1B03, extend],
[0x1B04, 0x1B04, spacingMark],
[0x1B34, 0x1B34, extend],
[0x1B35, 0x1B35, extend],
[0x1B36, 0x1B3A, extend],
[0x1B3B, 0x1B3B, spacingMark],
[0x1B3C, 0x1B3C, extend],
[0x1B3D, 0x1B41, spacingMark],
[0x1B42, 0x1B42, extend],
[0x1B43, 0x1B44, spacingMark],
[0x1B6B, 0x1B73, extend],
[0x1B80, 0x1B81, extend],
[0x1B82, 0x1B82, spacingMark],
[0x1BA1, 0x1BA1, spacingMark],
[0x1BA2, 0x1BA5, extend],
[0x1BA6, 0x1BA7, spacingMark],
[0x1BA8, 0x1BA9, extend],
[0x1BAA, 0x1BAA, spacingMark],
[0x1BAB, 0x1BAD, extend],
[0x1BE6, 0x1BE6, extend],
[0x1BE7, 0x1BE7, spacingMark],
[0x1BE8, 0x1BE9, extend],
[0x1BEA, 0x1BEC, spacingMark],
[0x1BED, 0x1BED, extend],
[0x1BEE, 0x1BEE, spacingMark],
[0x1BEF, 0x1BF1, extend],
[0x1BF2, 0x1BF3, spacingMark],
[0x1C24, 0x1C2B, spacingMark],
[0x1C2C, 0x1C33, extend],
[0x1C34, 0x1C35, spacingMark],
[0x1C36, 0x1C37, extend],
[0x1CD0, 0x1CD2, extend],
[0x1CD4, 0x1CE0, extend],
[0x1CE1, 0x1CE1, spacingMark],
[0x1CE2, 0x1CE8, extend],
[0x1CED, 0x1CED, extend],
[0x1CF4, 0x1CF4, extend],
[0x1CF7, 0x1CF7, spacingMark],
[0x1CF8, 0x1CF9, extend],
[0x1DC0, 0x1DF9, extend],
[0x1DFB, 0x1DFF, extend],
[0x200B, 0x200B, control],
[0x200C, 0x200C, extend],
[0x200D, 0x200D, zwj],
[0x200E, 0x200F, control],
[0x2028, 0x2028, control],
[0x2029, 0x2029, control],
[0x202A, 0x202E, control],
[0x203C, 0x203C, extendedPictographic],
[0x2049, 0x2049, extendedPictographic],
[0x2060, 0x2064, control],
[0x2065, 0x2065, control],
[0x2066, 0x206F, control],
[0x20D0, 0x20DC, extend],
[0x20DD, 0x20E0, extend],
[0x20E1, 0x20E1, extend],
[0x20E2, 0x20E4, extend],
[0x20E5, 0x20F0, extend],
[0x2122, 0x2122, extendedPictographic],
[0x2139, 0x2139, extendedPictographic],
[0x2194, 0x2199, extendedPictographic],
[0x21A9, 0x21AA, extendedPictographic],
[0x231A, 0x231B, extendedPictographic],
[0x2328, 0x2328, extendedPictographic],
[0x2388, 0x2388, extendedPictographic],
[0x23CF, 0x23CF, extendedPictographic],
[0x23E9, 0x23F3, extendedPictographic],
[0x23F8, 0x23FA, extendedPictographic],
[0x24C2, 0x24C2, extendedPictographic],
[0x25AA, 0x25AB, extendedPictographic],
[0x25B6, 0x25B6, extendedPictographic],
[0x25C0, 0x25C0, extendedPictographic],
[0x25FB, 0x25FE, extendedPictographic],
[0x2600, 0x2605, extendedPictographic],
[0x2607, 0x2612, extendedPictographic],
[0x2614, 0x2615, extendedPictographic],
[0x2616, 0x2617, extendedPictographic],
[0x2618, 0x2618, extendedPictographic],
[0x2619, 0x2619, extendedPictographic],
[0x261A, 0x266F, extendedPictographic],
[0x2670, 0x2671, extendedPictographic],
[0x2672, 0x267D, extendedPictographic],
[0x267E, 0x267F, extendedPictographic],
[0x2680, 0x2685, extendedPictographic],
[0x2690, 0x2691, extendedPictographic],
[0x2692, 0x269C, extendedPictographic],
[0x269D, 0x269D, extendedPictographic],
[0x269E, 0x269F, extendedPictographic],
[0x26A0, 0x26A1, extendedPictographic],
[0x26A2, 0x26B1, extendedPictographic],
[0x26B2, 0x26B2, extendedPictographic],
[0x26B3, 0x26BC, extendedPictographic],
[0x26BD, 0x26BF, extendedPictographic],
[0x26C0, 0x26C3, extendedPictographic],
[0x26C4, 0x26CD, extendedPictographic],
[0x26CE, 0x26CE, extendedPictographic],
[0x26CF, 0x26E1, extendedPictographic],
[0x26E2, 0x26E2, extendedPictographic],
[0x26E3, 0x26E3, extendedPictographic],
[0x26E4, 0x26E7, extendedPictographic],
[0x26E8, 0x26FF, extendedPictographic],
[0x2700, 0x2700, extendedPictographic],
[0x2701, 0x2704, extendedPictographic],
[0x2705, 0x2705, extendedPictographic],
[0x2708, 0x2709, extendedPictographic],
[0x270A, 0x270B, extendedPictographic],
[0x270C, 0x2712, extendedPictographic],
[0x2714, 0x2714, extendedPictographic],
[0x2716, 0x2716, extendedPictographic],
[0x271D, 0x271D, extendedPictographic],
[0x2721, 0x2721, extendedPictographic],
[0x2728, 0x2728, extendedPictographic],
[0x2733, 0x2734, extendedPictographic],
[0x2744, 0x2744, extendedPictographic],
[0x2747, 0x2747, extendedPictographic],
[0x274C, 0x274C, extendedPictographic],
[0x274E, 0x274E, extendedPictographic],
[0x2753, 0x2755, extendedPictographic],
[0x2757, 0x2757, extendedPictographic],
[0x2763, 0x2767, extendedPictographic],
[0x2795, 0x2797, extendedPictographic],
[0x27A1, 0x27A1, extendedPictographic],
[0x27B0, 0x27B0, extendedPictographic],
[0x27BF, 0x27BF, extendedPictographic],
[0x2934, 0x2935, extendedPictographic],
[0x2B05, 0x2B07, extendedPictographic],
[0x2B1B, 0x2B1C, extendedPictographic],
[0x2B50, 0x2B50, extendedPictographic],
[0x2B55, 0x2B55, extendedPictographic],
[0x2CEF, 0x2CF1, extend],
[0x2D7F, 0x2D7F, extend],
[0x2DE0, 0x2DFF, extend],
[0x302A, 0x302D, extend],
[0x302E, 0x302F, extend],
[0x3030, 0x3030, extendedPictographic],
[0x303D, 0x303D, extendedPictographic],
[0x3099, 0x309A, extend],
[0x3297, 0x3297, extendedPictographic],
[0x3299, 0x3299, extendedPictographic],

}
</lang>

Revision as of 18:44, 15 July 2020

User-perceived characters

In Unicode a user-perceived character (or grapheme cluster) can comprise one or more codepoints and the process of splitting a string into such grapheme clusters is described in Unicode Standard Annex #29.

Given the complexity of this process, Wren doesn't have built-in support for it and this module aims to remedy that situation. It is based on Oliver Kuederle's Unicode Text Segmentation for Go library which is subject to the MIT License and is currently based on Unicode version 12.0.

Although the source code file is large by Wren library standards (over 1900 lines), approximately 1600 lines of this are needed to describe the property table which provides the raw material for text segmentation. In the interests of brevity, I have omitted the comments which accompanied the original table which should be referred to if any explanation is needed.

Source code

<lang ecmascript>/* Module "upc.wren" */

/*

   Properties encapsulates the Unicode properties relevant to this module and, as well as, exposing
   those properties themselves contains a single public method to return the relevant property
   for a given Unicode codepoint.
  • /

class Properties {

   // Unicode properties relevant to this module.
   static any                  {  0 }

static prepend { 1 } static cr { 2 } static lf { 3 } static control { 4 } static extend { 5 } static regionalIndicator { 6 } static spacingMark { 7 } static l { 8 } static v { 9 } static t { 10 } static lv { 11 } static lvt { 12 } static zwj { 13 } static extendedPictographic { 14 }

   // Private method to initialize codepoints table.
   // The latter maps an inclusive range of codepoints to a Unicode property.
   // The ranges are in order to enable a binary search of the table by codepoint.
   static init_() {
       __codePoints = [
           [0x0000, 0x0009, control],
           [0x000A, 0x000A, lf],
           [0x000B, 0x000C, control],
           [0x000D, 0x000D, cr],
           [0x000E, 0x001F, control],
           [0x007F, 0x009F, control],
           [0x00A9, 0x00A9, extendedPictographic],
           [0x00AD, 0x00AD, control],
           [0x00AE, 0x00AE, extendedPictographic],
           [0x0300, 0x036F, extend],
           [0x0483, 0x0487, extend],
           [0x0488, 0x0489, extend],
           [0x0591, 0x05BD, extend],
           [0x05BF, 0x05BF, extend],
           [0x05C1, 0x05C2, extend],
           [0x05C4, 0x05C5, extend],
           [0x05C7, 0x05C7, extend],
           [0x0600, 0x0605, prepend],
           [0x0610, 0x061A, extend],
           [0x061C, 0x061C, control],
           [0x064B, 0x065F, extend],
           [0x0670, 0x0670, extend],
           [0x06D6, 0x06DC, extend],
           [0x06DD, 0x06DD, prepend],
           [0x06DF, 0x06E4, extend],
           [0x06E7, 0x06E8, extend],
           [0x06EA, 0x06ED, extend],
           [0x070F, 0x070F, prepend],
           [0x0711, 0x0711, extend],
           [0x0730, 0x074A, extend],
           [0x07A6, 0x07B0, extend],
           [0x07EB, 0x07F3, extend],
           [0x07FD, 0x07FD, extend],
           [0x0816, 0x0819, extend],
           [0x081B, 0x0823, extend],
           [0x0825, 0x0827, extend],
           [0x0829, 0x082D, extend],
           [0x0859, 0x085B, extend],
           [0x08D3, 0x08E1, extend],
           [0x08E2, 0x08E2, prepend],
           [0x08E3, 0x0902, extend],
           [0x0903, 0x0903, spacingMark],
           [0x093A, 0x093A, extend],
           [0x093B, 0x093B, spacingMark],
           [0x093C, 0x093C, extend],
           [0x093E, 0x0940, spacingMark],
           [0x0941, 0x0948, extend],
           [0x0949, 0x094C, spacingMark],
           [0x094D, 0x094D, extend],
           [0x094E, 0x094F, spacingMark],
           [0x0951, 0x0957, extend],
           [0x0962, 0x0963, extend],
           [0x0981, 0x0981, extend],
           [0x0982, 0x0983, spacingMark],
           [0x09BC, 0x09BC, extend],
           [0x09BE, 0x09BE, extend],
           [0x09BF, 0x09C0, spacingMark],
           [0x09C1, 0x09C4, extend],
           [0x09C7, 0x09C8, spacingMark],
           [0x09CB, 0x09CC, spacingMark],
           [0x09CD, 0x09CD, extend],
           [0x09D7, 0x09D7, extend],
           [0x09E2, 0x09E3, extend],
           [0x09FE, 0x09FE, extend],
           [0x0A01, 0x0A02, extend],
           [0x0A03, 0x0A03, spacingMark],
           [0x0A3C, 0x0A3C, extend],
           [0x0A3E, 0x0A40, spacingMark],
           [0x0A41, 0x0A42, extend],
           [0x0A47, 0x0A48, extend],
           [0x0A4B, 0x0A4D, extend],
           [0x0A51, 0x0A51, extend],
           [0x0A70, 0x0A71, extend],
           [0x0A75, 0x0A75, extend],
           [0x0A81, 0x0A82, extend],
           [0x0A83, 0x0A83, spacingMark],
           [0x0ABC, 0x0ABC, extend],
           [0x0ABE, 0x0AC0, spacingMark],
           [0x0AC1, 0x0AC5, extend],
           [0x0AC7, 0x0AC8, extend],
           [0x0AC9, 0x0AC9, spacingMark],
           [0x0ACB, 0x0ACC, spacingMark],
           [0x0ACD, 0x0ACD, extend],
           [0x0AE2, 0x0AE3, extend],
           [0x0AFA, 0x0AFF, extend],
           [0x0B01, 0x0B01, extend],
           [0x0B02, 0x0B03, spacingMark],
           [0x0B3C, 0x0B3C, extend],
           [0x0B3E, 0x0B3E, extend],
           [0x0B3F, 0x0B3F, extend],
           [0x0B40, 0x0B40, spacingMark],
           [0x0B41, 0x0B44, extend],
           [0x0B47, 0x0B48, spacingMark],
           [0x0B4B, 0x0B4C, spacingMark],
           [0x0B4D, 0x0B4D, extend],
           [0x0B56, 0x0B56, extend],
           [0x0B57, 0x0B57, extend],
           [0x0B62, 0x0B63, extend],
           [0x0B82, 0x0B82, extend],
           [0x0BBE, 0x0BBE, extend],
           [0x0BBF, 0x0BBF, spacingMark],
           [0x0BC0, 0x0BC0, extend],
           [0x0BC1, 0x0BC2, spacingMark],
           [0x0BC6, 0x0BC8, spacingMark],
           [0x0BCA, 0x0BCC, spacingMark],
           [0x0BCD, 0x0BCD, extend],
           [0x0BD7, 0x0BD7, extend],
           [0x0C00, 0x0C00, extend],
           [0x0C01, 0x0C03, spacingMark],
           [0x0C04, 0x0C04, extend],
           [0x0C3E, 0x0C40, extend],
           [0x0C41, 0x0C44, spacingMark],
           [0x0C46, 0x0C48, extend],
           [0x0C4A, 0x0C4D, extend],
           [0x0C55, 0x0C56, extend],
           [0x0C62, 0x0C63, extend],
           [0x0C81, 0x0C81, extend],
           [0x0C82, 0x0C83, spacingMark],
           [0x0CBC, 0x0CBC, extend],
           [0x0CBE, 0x0CBE, spacingMark],
           [0x0CBF, 0x0CBF, extend],
           [0x0CC0, 0x0CC1, spacingMark],
           [0x0CC2, 0x0CC2, extend],
           [0x0CC3, 0x0CC4, spacingMark],
           [0x0CC6, 0x0CC6, extend],
           [0x0CC7, 0x0CC8, spacingMark],
           [0x0CCA, 0x0CCB, spacingMark],
           [0x0CCC, 0x0CCD, extend],
           [0x0CD5, 0x0CD6, extend],
           [0x0CE2, 0x0CE3, extend],
           [0x0D00, 0x0D01, extend],
           [0x0D02, 0x0D03, spacingMark],
           [0x0D3B, 0x0D3C, extend],
           [0x0D3E, 0x0D3E, extend],
           [0x0D3F, 0x0D40, spacingMark],
           [0x0D41, 0x0D44, extend],
           [0x0D46, 0x0D48, spacingMark],
           [0x0D4A, 0x0D4C, spacingMark],
           [0x0D4D, 0x0D4D, extend],
           [0x0D4E, 0x0D4E, prepend],
           [0x0D57, 0x0D57, extend],
           [0x0D62, 0x0D63, extend],
           [0x0D82, 0x0D83, spacingMark],
           [0x0DCA, 0x0DCA, extend],
           [0x0DCF, 0x0DCF, extend],
           [0x0DD0, 0x0DD1, spacingMark],
           [0x0DD2, 0x0DD4, extend],
           [0x0DD6, 0x0DD6, extend],
           [0x0DD8, 0x0DDE, spacingMark],
           [0x0DDF, 0x0DDF, extend],
           [0x0DF2, 0x0DF3, spacingMark],
           [0x0E31, 0x0E31, extend],
           [0x0E33, 0x0E33, spacingMark],
           [0x0E34, 0x0E3A, extend],
           [0x0E47, 0x0E4E, extend],
           [0x0EB1, 0x0EB1, extend],
           [0x0EB3, 0x0EB3, spacingMark],
           [0x0EB4, 0x0EBC, extend],
           [0x0EC8, 0x0ECD, extend],
           [0x0F18, 0x0F19, extend],
           [0x0F35, 0x0F35, extend],
           [0x0F37, 0x0F37, extend],
           [0x0F39, 0x0F39, extend],
           [0x0F3E, 0x0F3F, spacingMark],
           [0x0F71, 0x0F7E, extend],
           [0x0F7F, 0x0F7F, spacingMark],
           [0x0F80, 0x0F84, extend],
           [0x0F86, 0x0F87, extend],
           [0x0F8D, 0x0F97, extend],
           [0x0F99, 0x0FBC, extend],
           [0x0FC6, 0x0FC6, extend],
           [0x102D, 0x1030, extend],
           [0x1031, 0x1031, spacingMark],
           [0x1032, 0x1037, extend],
           [0x1039, 0x103A, extend],
           [0x103B, 0x103C, spacingMark],
           [0x103D, 0x103E, extend],
           [0x1056, 0x1057, spacingMark],
           [0x1058, 0x1059, extend],
           [0x105E, 0x1060, extend],
           [0x1071, 0x1074, extend],
           [0x1082, 0x1082, extend],
           [0x1084, 0x1084, spacingMark],
           [0x1085, 0x1086, extend],
           [0x108D, 0x108D, extend],
           [0x109D, 0x109D, extend],
           [0x1100, 0x115F, l],
           [0x1160, 0x11A7, v],
           [0x11A8, 0x11FF, t],
           [0x135D, 0x135F, extend],
           [0x1712, 0x1714, extend],
           [0x1732, 0x1734, extend],
           [0x1752, 0x1753, extend],
           [0x1772, 0x1773, extend],
           [0x17B4, 0x17B5, extend],
           [0x17B6, 0x17B6, spacingMark],
           [0x17B7, 0x17BD, extend],
           [0x17BE, 0x17C5, spacingMark],
           [0x17C6, 0x17C6, extend],
           [0x17C7, 0x17C8, spacingMark],
           [0x17C9, 0x17D3, extend],
           [0x17DD, 0x17DD, extend],
           [0x180B, 0x180D, extend],
           [0x180E, 0x180E, control],
           [0x1885, 0x1886, extend],
           [0x18A9, 0x18A9, extend],
           [0x1920, 0x1922, extend],
           [0x1923, 0x1926, spacingMark],
           [0x1927, 0x1928, extend],
           [0x1929, 0x192B, spacingMark],
           [0x1930, 0x1931, spacingMark],
           [0x1932, 0x1932, extend],
           [0x1933, 0x1938, spacingMark],
           [0x1939, 0x193B, extend],
           [0x1A17, 0x1A18, extend],
           [0x1A19, 0x1A1A, spacingMark],
           [0x1A1B, 0x1A1B, extend],
           [0x1A55, 0x1A55, spacingMark],
           [0x1A56, 0x1A56, extend],
           [0x1A57, 0x1A57, spacingMark],
           [0x1A58, 0x1A5E, extend],
           [0x1A60, 0x1A60, extend],
           [0x1A62, 0x1A62, extend],
           [0x1A65, 0x1A6C, extend],
           [0x1A6D, 0x1A72, spacingMark],
           [0x1A73, 0x1A7C, extend],
           [0x1A7F, 0x1A7F, extend],
           [0x1AB0, 0x1ABD, extend],
           [0x1ABE, 0x1ABE, extend],
           [0x1B00, 0x1B03, extend],
           [0x1B04, 0x1B04, spacingMark],
           [0x1B34, 0x1B34, extend],
           [0x1B35, 0x1B35, extend],
           [0x1B36, 0x1B3A, extend],
           [0x1B3B, 0x1B3B, spacingMark],
           [0x1B3C, 0x1B3C, extend],
           [0x1B3D, 0x1B41, spacingMark],
           [0x1B42, 0x1B42, extend],
           [0x1B43, 0x1B44, spacingMark],
           [0x1B6B, 0x1B73, extend],
           [0x1B80, 0x1B81, extend],
           [0x1B82, 0x1B82, spacingMark],
           [0x1BA1, 0x1BA1, spacingMark],
           [0x1BA2, 0x1BA5, extend],
           [0x1BA6, 0x1BA7, spacingMark],
           [0x1BA8, 0x1BA9, extend],
           [0x1BAA, 0x1BAA, spacingMark],
           [0x1BAB, 0x1BAD, extend],
           [0x1BE6, 0x1BE6, extend],
           [0x1BE7, 0x1BE7, spacingMark],
           [0x1BE8, 0x1BE9, extend],
           [0x1BEA, 0x1BEC, spacingMark],
           [0x1BED, 0x1BED, extend],
           [0x1BEE, 0x1BEE, spacingMark],
           [0x1BEF, 0x1BF1, extend],
           [0x1BF2, 0x1BF3, spacingMark],
           [0x1C24, 0x1C2B, spacingMark],
           [0x1C2C, 0x1C33, extend],
           [0x1C34, 0x1C35, spacingMark],
           [0x1C36, 0x1C37, extend],
           [0x1CD0, 0x1CD2, extend],
           [0x1CD4, 0x1CE0, extend],
           [0x1CE1, 0x1CE1, spacingMark],
           [0x1CE2, 0x1CE8, extend],
           [0x1CED, 0x1CED, extend],
           [0x1CF4, 0x1CF4, extend],
           [0x1CF7, 0x1CF7, spacingMark],
           [0x1CF8, 0x1CF9, extend],
           [0x1DC0, 0x1DF9, extend],
           [0x1DFB, 0x1DFF, extend],
           [0x200B, 0x200B, control],
           [0x200C, 0x200C, extend],
           [0x200D, 0x200D, zwj],
           [0x200E, 0x200F, control],
           [0x2028, 0x2028, control],
           [0x2029, 0x2029, control],
           [0x202A, 0x202E, control],
           [0x203C, 0x203C, extendedPictographic],
           [0x2049, 0x2049, extendedPictographic],
           [0x2060, 0x2064, control],
           [0x2065, 0x2065, control],
           [0x2066, 0x206F, control],
           [0x20D0, 0x20DC, extend],
           [0x20DD, 0x20E0, extend],
           [0x20E1, 0x20E1, extend],
           [0x20E2, 0x20E4, extend],
           [0x20E5, 0x20F0, extend],
           [0x2122, 0x2122, extendedPictographic],
           [0x2139, 0x2139, extendedPictographic],
           [0x2194, 0x2199, extendedPictographic],
           [0x21A9, 0x21AA, extendedPictographic],
           [0x231A, 0x231B, extendedPictographic],
           [0x2328, 0x2328, extendedPictographic],
           [0x2388, 0x2388, extendedPictographic],
           [0x23CF, 0x23CF, extendedPictographic],
           [0x23E9, 0x23F3, extendedPictographic],
           [0x23F8, 0x23FA, extendedPictographic],
           [0x24C2, 0x24C2, extendedPictographic],
           [0x25AA, 0x25AB, extendedPictographic],
           [0x25B6, 0x25B6, extendedPictographic],
           [0x25C0, 0x25C0, extendedPictographic],
           [0x25FB, 0x25FE, extendedPictographic],
           [0x2600, 0x2605, extendedPictographic],
           [0x2607, 0x2612, extendedPictographic],
           [0x2614, 0x2615, extendedPictographic],
           [0x2616, 0x2617, extendedPictographic],
           [0x2618, 0x2618, extendedPictographic],
           [0x2619, 0x2619, extendedPictographic],
           [0x261A, 0x266F, extendedPictographic],
           [0x2670, 0x2671, extendedPictographic],
           [0x2672, 0x267D, extendedPictographic],
           [0x267E, 0x267F, extendedPictographic],
           [0x2680, 0x2685, extendedPictographic],
           [0x2690, 0x2691, extendedPictographic],
           [0x2692, 0x269C, extendedPictographic],
           [0x269D, 0x269D, extendedPictographic],
           [0x269E, 0x269F, extendedPictographic],
           [0x26A0, 0x26A1, extendedPictographic],
           [0x26A2, 0x26B1, extendedPictographic],
           [0x26B2, 0x26B2, extendedPictographic],
           [0x26B3, 0x26BC, extendedPictographic],
           [0x26BD, 0x26BF, extendedPictographic],
           [0x26C0, 0x26C3, extendedPictographic],
           [0x26C4, 0x26CD, extendedPictographic],
           [0x26CE, 0x26CE, extendedPictographic],
           [0x26CF, 0x26E1, extendedPictographic],
           [0x26E2, 0x26E2, extendedPictographic],
           [0x26E3, 0x26E3, extendedPictographic],
           [0x26E4, 0x26E7, extendedPictographic],
           [0x26E8, 0x26FF, extendedPictographic],
           [0x2700, 0x2700, extendedPictographic],
           [0x2701, 0x2704, extendedPictographic],
           [0x2705, 0x2705, extendedPictographic],
           [0x2708, 0x2709, extendedPictographic],
           [0x270A, 0x270B, extendedPictographic],
           [0x270C, 0x2712, extendedPictographic],
           [0x2714, 0x2714, extendedPictographic],
           [0x2716, 0x2716, extendedPictographic],
           [0x271D, 0x271D, extendedPictographic],
           [0x2721, 0x2721, extendedPictographic],
           [0x2728, 0x2728, extendedPictographic],
           [0x2733, 0x2734, extendedPictographic],
           [0x2744, 0x2744, extendedPictographic],
           [0x2747, 0x2747, extendedPictographic],
           [0x274C, 0x274C, extendedPictographic],
           [0x274E, 0x274E, extendedPictographic],
           [0x2753, 0x2755, extendedPictographic],
           [0x2757, 0x2757, extendedPictographic],
           [0x2763, 0x2767, extendedPictographic],
           [0x2795, 0x2797, extendedPictographic],
           [0x27A1, 0x27A1, extendedPictographic],
           [0x27B0, 0x27B0, extendedPictographic],
           [0x27BF, 0x27BF, extendedPictographic],
           [0x2934, 0x2935, extendedPictographic],
           [0x2B05, 0x2B07, extendedPictographic],
           [0x2B1B, 0x2B1C, extendedPictographic],
           [0x2B50, 0x2B50, extendedPictographic],
           [0x2B55, 0x2B55, extendedPictographic],
           [0x2CEF, 0x2CF1, extend],
           [0x2D7F, 0x2D7F, extend],
           [0x2DE0, 0x2DFF, extend],
           [0x302A, 0x302D, extend],
           [0x302E, 0x302F, extend],
           [0x3030, 0x3030, extendedPictographic],
           [0x303D, 0x303D, extendedPictographic],
           [0x3099, 0x309A, extend],
           [0x3297, 0x3297, extendedPictographic],
           [0x3299, 0x3299, extendedPictographic],

} </lang>