diff options
author | ElOraiby <wael.eloraiby@gmail.com> | 2015-05-30 14:11:16 -0400 |
---|---|---|
committer | ElOraiby <wael.eloraiby@gmail.com> | 2015-05-30 14:11:16 -0400 |
commit | 5e1fd7a1ee767b6c8e0d7461b1044d93a42989a2 (patch) | |
tree | c0b85b9e60ca01765ddf98309d41472ffe2639e1 | |
parent | 8d4a1bb7f4f6c4f5abb6596172ad4b6b181e12e3 (diff) |
generalize the lam-alef case
-rw-r--r-- | arabic_test.txt | 13 | ||||
-rw-r--r-- | arabtype.c | 135 | ||||
-rw-r--r-- | mainwindow.cpp | 21 |
3 files changed, 104 insertions, 65 deletions
diff --git a/arabic_test.txt b/arabic_test.txt index a5d431d..2d49a48 100644 --- a/arabic_test.txt +++ b/arabic_test.txt @@ -1 +1,12 @@ -اعطني الناي و غني فالغناﺀ سر الوجود فأنين الناي يبقئ بعد ان يفنئ الوجود +أعطني الناي وغني فالغنا سر الوجود +وأنين الناي يبقى بعد أن يفنى الوجود +هل إتخذت الغاب مثلي منزلاً دون القصور +فتتبعت السواقي وتسلقت الصخور +هل تحممت بعطر وتنشفت بنور +وشربت الفجر خمراً فى كؤوس من أثير +هل جلست العصر مثلي بين جفنات العنب +والعناقيد تدلت كثريات الذهب +هل فرشت العشب ليلاً وتلحفت الفضاء +زاهداً في ما سيأتي ناسياً ما قد مضى +أعطني الناي وغني وانسى داء ودواء +إنما الناس سطورٌ كتبت لكن بماء @@ -4,6 +4,7 @@ */ #include <stdbool.h> +#include <stdio.h> #include "utf8.h" #include "arabtype.h" @@ -21,74 +22,88 @@ // 2: beginning form (if 0, it's a cutting type) // 3: middle form // -typedef uint32_t char_form[4]; +typedef uint32_t char_form_t[4]; +typedef char_form_t all_form_t[2]; -char_form arabic_forms_b[] = -{ - {0xFE80, 0xFE80, 0, 0}, // hamza (0) - {0xFE81, 0xFE82, 0, 0}, // 2alif madda (1) - {0xFE83, 0xFE84, 0, 0}, // 2alif hamza (2) - {0xFE85, 0xFE86, 0, 0}, // waw hamza (3) - {0xFE87, 0xFE88, 0, 0}, // 2alif hamza maksoura (4) - {0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, // 2alif maqsoura hamza (5) - {0xFE8D, 0xFE8E, 0, 0}, // 2alif (6) - {0xFE8F, 0xFE90, 0xFE91, 0xFE92}, // ba2 (7) - {0xFE93, 0xFE94, 0, 0}, // ta2 marbouta (8) - {0xFE95, 0xFE96, 0xFE97, 0xFE98}, // ta2 (9) - {0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, // tha2 (10) - {0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, // jim (11) - {0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, // 7a2 (12) - {0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, // kha2 (13) - {0xFEA9, 0xFEAA, 0, 0}, // dal (14) - {0xFEAB, 0xFEAC, 0, 0}, // dhal (15) - {0xFEAD, 0xFEAE, 0, 0}, // ra2 (16) - {0xFEAF, 0xFEB0, 0, 0}, // zayn (17) - {0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, // syn (18) - {0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, // shin (19) - {0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, // sad (20) - {0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, // dad (21) - {0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, // tah (22) - {0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, // thah (23) - {0xFEC9, 0xFECA, 0xFECB, 0xFECC}, // 3ayn (24) - {0xFECD, 0xFECE, 0xFECF, 0xFED0}, // ghayn (25) - {0, 0, 0, 0}, // (26) - {0, 0, 0, 0}, // (27) - {0, 0, 0, 0}, // (28) - {0, 0, 0, 0}, // (29) - {0, 0, 0, 0}, // (30) - {0x640, 0x640, 0x640, 0x640}, // wasla (31) - {0xFED1, 0xFED2, 0xFED3, 0xFED4}, // fa2 (32) - {0xFED5, 0xFED6, 0xFED7, 0xFED8}, // qaf (33) - {0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, // kaf (34) - {0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, // lam (35) - {0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, // mim (36) - {0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, // noon (37) - {0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, // ha2 (38) - {0xFEED, 0xFEEE, 0, 0}, // waw (39) - {0xFEFF, 0xFEF0, 0, 0}, // 2alif maksoura (40) - {0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4} // ya2 (41) +static all_form_t arabic_forms_b[] = { + { {0xFE80, 0xFE80, 0, 0}, {0, 0, 0, 0} }, // hamza (0) + { {0xFE81, 0xFE82, 0, 0}, {0, 0, 0xFEF5, 0xFEF6} }, // 2alif madda (1) + { {0xFE83, 0xFE84, 0, 0}, {0, 0, 0xFEF7, 0xFEF8} }, // 2alif hamza (2) + { {0xFE85, 0xFE86, 0, 0}, {0, 0, 0, 0} }, // waw hamza (3) + { {0xFE87, 0xFE88, 0, 0}, {0, 0, 0xFEF9, 0xFEFA} }, // 2alif hamza maksoura (4) + { {0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, {0, 0, 0, 0} }, // 2alif maqsoura hamza (5) + { {0xFE8D, 0xFE8E, 0, 0}, {0, 0, 0xFEFB, 0xFEFC} }, // 2alif (6) + { {0xFE8F, 0xFE90, 0xFE91, 0xFE92}, {0, 0, 0, 0} }, // ba2 (7) + { {0xFE93, 0xFE94, 0, 0}, {0, 0, 0, 0} }, // ta2 marbouta (8) + { {0xFE95, 0xFE96, 0xFE97, 0xFE98}, {0, 0, 0, 0} }, // ta2 (9) + { {0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, {0, 0, 0, 0} }, // tha2 (10) + { {0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, {0, 0, 0, 0} }, // jim (11) + { {0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, {0, 0, 0, 0} }, // 7a2 (12) + { {0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, {0, 0, 0, 0} }, // kha2 (13) + { {0xFEA9, 0xFEAA, 0, 0}, {0, 0, 0, 0} }, // dal (14) + { {0xFEAB, 0xFEAC, 0, 0}, {0, 0, 0, 0} }, // dhal (15) + { {0xFEAD, 0xFEAE, 0, 0}, {0, 0, 0, 0} }, // ra2 (16) + { {0xFEAF, 0xFEB0, 0, 0}, {0, 0, 0, 0} }, // zayn (17) + { {0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, {0, 0, 0, 0} }, // syn (18) + { {0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, {0, 0, 0, 0} }, // shin (19) + { {0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, {0, 0, 0, 0} }, // sad (20) + { {0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, {0, 0, 0, 0} }, // dad (21) + { {0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, {0, 0, 0, 0} }, // tah (22) + { {0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, {0, 0, 0, 0} }, // thah (23) + { {0xFEC9, 0xFECA, 0xFECB, 0xFECC}, {0, 0, 0, 0} }, // 3ayn (24) + { {0xFECD, 0xFECE, 0xFECF, 0xFED0}, {0, 0, 0, 0} }, // ghayn (25) + { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (26) + { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (27) + { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (28) + { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (29) + { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (30) + { { 0x640, 0x640, 0x640, 0x640}, {0, 0, 0, 0} }, // wasla (31) + { {0xFED1, 0xFED2, 0xFED3, 0xFED4}, {0, 0, 0, 0} }, // fa2 (32) + { {0xFED5, 0xFED6, 0xFED7, 0xFED8}, {0, 0, 0, 0} }, // qaf (33) + { {0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, {0, 0, 0, 0} }, // kaf (34) + { {0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, {0, 0, 0, 0} }, // lam (35) + { {0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, {0, 0, 0, 0} }, // mim (36) + { {0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, {0, 0, 0, 0} }, // noon (37) + { {0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, {0, 0, 0, 0} }, // ha2 (38) + { {0xFEED, 0xFEEE, 0, 0}, {0, 0, 0, 0} }, // waw (39) + { {0xFEFF, 0xFEF0, 0, 0}, {0, 0, 0, 0} }, // 2alif maksoura (40) + { {0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}, {0, 0, 0, 0} }, // ya2 (41) }; -bool is_arabic_letter(uint32_t cp) -{ - return ( cp >= ARABIC_LETTER_START && cp <= ARABIC_LETTER_END ); -} -bool is_linking_type(uint32_t cp) -{ - if( is_arabic_letter(cp) ) - if( arabic_forms_b[cp - ARABIC_LETTER_START][INITIAL] || arabic_forms_b[cp - ARABIC_LETTER_START][MEDIAL] ) - return true; - return false; + +static inline bool is_arabic_letter(uint32_t cp) { return ( cp >= ARABIC_LETTER_START && cp <= ARABIC_LETTER_END ); } +static inline bool is_lam_alef(uint32_t cp, uint32_t next) { return cp == 0x644 && + is_arabic_letter(next) && + arabic_forms_b[next - ARABIC_LETTER_START][1][INITIAL] != 0; } +static inline bool is_alef_prev_lam(uint32_t prev, uint32_t cp) { return prev == 0x644 && + is_arabic_letter(cp) && + arabic_forms_b[cp - ARABIC_LETTER_START][1][INITIAL] != 0; } +static inline bool is_linking_type(uint32_t cp) { + if( is_arabic_letter(cp) && ( arabic_forms_b[cp - ARABIC_LETTER_START][0][INITIAL] || arabic_forms_b[cp - ARABIC_LETTER_START][0][MEDIAL] ) ) { + return true; + } else { + return false; + } } -uint32_t get_presentation_form_b(uint32_t prev, uint32_t next, uint32_t cp) -{ - if( !is_arabic_letter(cp) ) +uint32_t get_presentation_form_b(uint32_t prev, uint32_t next, uint32_t cp) { + if( !is_arabic_letter(cp) ) { return cp; /* not an Arabic letter */ + } + + if( is_lam_alef(cp, next) ) { + uint32_t index = (is_linking_type(cp) << 1) | is_linking_type(prev); + return arabic_forms_b[next - ARABIC_LETTER_START][1][index]; - uint32_t index = ((is_arabic_letter(next) & is_linking_type(cp)) << 1) | is_linking_type(prev); + } else { + if( is_alef_prev_lam(prev, cp) ) { + return -1; // skip previously processed lam alef + } else { + uint32_t index = ((is_arabic_letter(next) & is_linking_type(cp)) << 1) | is_linking_type(prev); - return arabic_forms_b[cp - ARABIC_LETTER_START][index]; + return arabic_forms_b[cp - ARABIC_LETTER_START][0][index]; + } + } } diff --git a/mainwindow.cpp b/mainwindow.cpp index ea3a91f..60d9d51 100644 --- a/mainwindow.cpp +++ b/mainwindow.cpp @@ -97,17 +97,30 @@ MainWindow::MainWindow(QWidget *parent) : FT_Render_Mode render_flags = FT_RENDER_MODE_NORMAL; - data__ = new uchar[1024 * 256 * 4]; - memset(data__, 0, 1024 * 256 * 4); + data__ = new uchar[1024 * 1024 * 4]; + memset(data__, 0, 1024 * 1024 * 4); int col = 1024 - 1; int line = 100; - QImage img(data__, 1024, 256, QImage::Format_RGB32); + QImage img(data__, 1024, 1024, QImage::Format_RGB32); // render the arabic glyphs for( size_t idx = 0; idx < arabic_cp.size(); ++idx ) { uint ch = get_arabic_form(arabic_cp, idx); - if( ch == 0xA || ch == 0xC ) + if( ch == 0xA ) { + line += 30; + col = 1024 - 1; continue; + } + + if ( ch == 0xC ) { + continue; + } + + if ( ch == -1 ) { + continue; + } + + assert(ch != 0); int glyph_index = FT_Get_Char_Index(face, ch); assert( glyph_index && "invalid character" ); |