aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorElOraiby <wael.eloraiby@gmail.com>2015-05-30 14:11:16 -0400
committerElOraiby <wael.eloraiby@gmail.com>2015-05-30 14:11:16 -0400
commit5e1fd7a1ee767b6c8e0d7461b1044d93a42989a2 (patch)
treec0b85b9e60ca01765ddf98309d41472ffe2639e1
parent8d4a1bb7f4f6c4f5abb6596172ad4b6b181e12e3 (diff)
generalize the lam-alef case
-rw-r--r--arabic_test.txt13
-rw-r--r--arabtype.c135
-rw-r--r--mainwindow.cpp21
3 files changed, 104 insertions, 65 deletions
diff --git a/arabic_test.txt b/arabic_test.txt
index a5d431d..2d49a48 100644
--- a/arabic_test.txt
+++ b/arabic_test.txt
@@ -1 +1,12 @@
-اعطني الناي و غني فالغناﺀ سر الوجود فأنين الناي يبقئ بعد ان يفنئ الوجود
+أعطني الناي وغني فالغنا سر الوجود
+وأنين الناي يبقى بعد أن يفنى الوجود
+هل إتخذت الغاب مثلي منزلاً دون القصور
+فتتبعت السواقي وتسلقت الصخور
+هل تحممت بعطر وتنشفت بنور
+وشربت الفجر خمراً فى كؤوس من أثير
+هل جلست العصر مثلي بين جفنات العنب
+والعناقيد تدلت كثريات الذهب
+هل فرشت العشب ليلاً وتلحفت الفضاء
+زاهداً في ما سيأتي ناسياً ما قد مضى
+أعطني الناي وغني وانسى داء ودواء
+إنما الناس سطورٌ كتبت لكن بماء
diff --git a/arabtype.c b/arabtype.c
index 12c6b43..0136bc5 100644
--- a/arabtype.c
+++ b/arabtype.c
@@ -4,6 +4,7 @@
*/
#include <stdbool.h>
+#include <stdio.h>
#include "utf8.h"
#include "arabtype.h"
@@ -21,74 +22,88 @@
// 2: beginning form (if 0, it's a cutting type)
// 3: middle form
//
-typedef uint32_t char_form[4];
+typedef uint32_t char_form_t[4];
+typedef char_form_t all_form_t[2];
-char_form arabic_forms_b[] =
-{
- {0xFE80, 0xFE80, 0, 0}, // hamza (0)
- {0xFE81, 0xFE82, 0, 0}, // 2alif madda (1)
- {0xFE83, 0xFE84, 0, 0}, // 2alif hamza (2)
- {0xFE85, 0xFE86, 0, 0}, // waw hamza (3)
- {0xFE87, 0xFE88, 0, 0}, // 2alif hamza maksoura (4)
- {0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, // 2alif maqsoura hamza (5)
- {0xFE8D, 0xFE8E, 0, 0}, // 2alif (6)
- {0xFE8F, 0xFE90, 0xFE91, 0xFE92}, // ba2 (7)
- {0xFE93, 0xFE94, 0, 0}, // ta2 marbouta (8)
- {0xFE95, 0xFE96, 0xFE97, 0xFE98}, // ta2 (9)
- {0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, // tha2 (10)
- {0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, // jim (11)
- {0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, // 7a2 (12)
- {0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, // kha2 (13)
- {0xFEA9, 0xFEAA, 0, 0}, // dal (14)
- {0xFEAB, 0xFEAC, 0, 0}, // dhal (15)
- {0xFEAD, 0xFEAE, 0, 0}, // ra2 (16)
- {0xFEAF, 0xFEB0, 0, 0}, // zayn (17)
- {0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, // syn (18)
- {0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, // shin (19)
- {0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, // sad (20)
- {0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, // dad (21)
- {0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, // tah (22)
- {0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, // thah (23)
- {0xFEC9, 0xFECA, 0xFECB, 0xFECC}, // 3ayn (24)
- {0xFECD, 0xFECE, 0xFECF, 0xFED0}, // ghayn (25)
- {0, 0, 0, 0}, // (26)
- {0, 0, 0, 0}, // (27)
- {0, 0, 0, 0}, // (28)
- {0, 0, 0, 0}, // (29)
- {0, 0, 0, 0}, // (30)
- {0x640, 0x640, 0x640, 0x640}, // wasla (31)
- {0xFED1, 0xFED2, 0xFED3, 0xFED4}, // fa2 (32)
- {0xFED5, 0xFED6, 0xFED7, 0xFED8}, // qaf (33)
- {0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, // kaf (34)
- {0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, // lam (35)
- {0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, // mim (36)
- {0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, // noon (37)
- {0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, // ha2 (38)
- {0xFEED, 0xFEEE, 0, 0}, // waw (39)
- {0xFEFF, 0xFEF0, 0, 0}, // 2alif maksoura (40)
- {0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4} // ya2 (41)
+static all_form_t arabic_forms_b[] = {
+ { {0xFE80, 0xFE80, 0, 0}, {0, 0, 0, 0} }, // hamza (0)
+ { {0xFE81, 0xFE82, 0, 0}, {0, 0, 0xFEF5, 0xFEF6} }, // 2alif madda (1)
+ { {0xFE83, 0xFE84, 0, 0}, {0, 0, 0xFEF7, 0xFEF8} }, // 2alif hamza (2)
+ { {0xFE85, 0xFE86, 0, 0}, {0, 0, 0, 0} }, // waw hamza (3)
+ { {0xFE87, 0xFE88, 0, 0}, {0, 0, 0xFEF9, 0xFEFA} }, // 2alif hamza maksoura (4)
+ { {0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, {0, 0, 0, 0} }, // 2alif maqsoura hamza (5)
+ { {0xFE8D, 0xFE8E, 0, 0}, {0, 0, 0xFEFB, 0xFEFC} }, // 2alif (6)
+ { {0xFE8F, 0xFE90, 0xFE91, 0xFE92}, {0, 0, 0, 0} }, // ba2 (7)
+ { {0xFE93, 0xFE94, 0, 0}, {0, 0, 0, 0} }, // ta2 marbouta (8)
+ { {0xFE95, 0xFE96, 0xFE97, 0xFE98}, {0, 0, 0, 0} }, // ta2 (9)
+ { {0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, {0, 0, 0, 0} }, // tha2 (10)
+ { {0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, {0, 0, 0, 0} }, // jim (11)
+ { {0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, {0, 0, 0, 0} }, // 7a2 (12)
+ { {0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, {0, 0, 0, 0} }, // kha2 (13)
+ { {0xFEA9, 0xFEAA, 0, 0}, {0, 0, 0, 0} }, // dal (14)
+ { {0xFEAB, 0xFEAC, 0, 0}, {0, 0, 0, 0} }, // dhal (15)
+ { {0xFEAD, 0xFEAE, 0, 0}, {0, 0, 0, 0} }, // ra2 (16)
+ { {0xFEAF, 0xFEB0, 0, 0}, {0, 0, 0, 0} }, // zayn (17)
+ { {0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, {0, 0, 0, 0} }, // syn (18)
+ { {0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, {0, 0, 0, 0} }, // shin (19)
+ { {0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, {0, 0, 0, 0} }, // sad (20)
+ { {0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, {0, 0, 0, 0} }, // dad (21)
+ { {0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, {0, 0, 0, 0} }, // tah (22)
+ { {0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, {0, 0, 0, 0} }, // thah (23)
+ { {0xFEC9, 0xFECA, 0xFECB, 0xFECC}, {0, 0, 0, 0} }, // 3ayn (24)
+ { {0xFECD, 0xFECE, 0xFECF, 0xFED0}, {0, 0, 0, 0} }, // ghayn (25)
+ { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (26)
+ { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (27)
+ { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (28)
+ { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (29)
+ { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (30)
+ { { 0x640, 0x640, 0x640, 0x640}, {0, 0, 0, 0} }, // wasla (31)
+ { {0xFED1, 0xFED2, 0xFED3, 0xFED4}, {0, 0, 0, 0} }, // fa2 (32)
+ { {0xFED5, 0xFED6, 0xFED7, 0xFED8}, {0, 0, 0, 0} }, // qaf (33)
+ { {0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, {0, 0, 0, 0} }, // kaf (34)
+ { {0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, {0, 0, 0, 0} }, // lam (35)
+ { {0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, {0, 0, 0, 0} }, // mim (36)
+ { {0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, {0, 0, 0, 0} }, // noon (37)
+ { {0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, {0, 0, 0, 0} }, // ha2 (38)
+ { {0xFEED, 0xFEEE, 0, 0}, {0, 0, 0, 0} }, // waw (39)
+ { {0xFEFF, 0xFEF0, 0, 0}, {0, 0, 0, 0} }, // 2alif maksoura (40)
+ { {0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}, {0, 0, 0, 0} }, // ya2 (41)
};
-bool is_arabic_letter(uint32_t cp)
-{
- return ( cp >= ARABIC_LETTER_START && cp <= ARABIC_LETTER_END );
-}
-bool is_linking_type(uint32_t cp)
-{
- if( is_arabic_letter(cp) )
- if( arabic_forms_b[cp - ARABIC_LETTER_START][INITIAL] || arabic_forms_b[cp - ARABIC_LETTER_START][MEDIAL] )
- return true;
- return false;
+
+static inline bool is_arabic_letter(uint32_t cp) { return ( cp >= ARABIC_LETTER_START && cp <= ARABIC_LETTER_END ); }
+static inline bool is_lam_alef(uint32_t cp, uint32_t next) { return cp == 0x644 &&
+ is_arabic_letter(next) &&
+ arabic_forms_b[next - ARABIC_LETTER_START][1][INITIAL] != 0; }
+static inline bool is_alef_prev_lam(uint32_t prev, uint32_t cp) { return prev == 0x644 &&
+ is_arabic_letter(cp) &&
+ arabic_forms_b[cp - ARABIC_LETTER_START][1][INITIAL] != 0; }
+static inline bool is_linking_type(uint32_t cp) {
+ if( is_arabic_letter(cp) && ( arabic_forms_b[cp - ARABIC_LETTER_START][0][INITIAL] || arabic_forms_b[cp - ARABIC_LETTER_START][0][MEDIAL] ) ) {
+ return true;
+ } else {
+ return false;
+ }
}
-uint32_t get_presentation_form_b(uint32_t prev, uint32_t next, uint32_t cp)
-{
- if( !is_arabic_letter(cp) )
+uint32_t get_presentation_form_b(uint32_t prev, uint32_t next, uint32_t cp) {
+ if( !is_arabic_letter(cp) ) {
return cp; /* not an Arabic letter */
+ }
+
+ if( is_lam_alef(cp, next) ) {
+ uint32_t index = (is_linking_type(cp) << 1) | is_linking_type(prev);
+ return arabic_forms_b[next - ARABIC_LETTER_START][1][index];
- uint32_t index = ((is_arabic_letter(next) & is_linking_type(cp)) << 1) | is_linking_type(prev);
+ } else {
+ if( is_alef_prev_lam(prev, cp) ) {
+ return -1; // skip previously processed lam alef
+ } else {
+ uint32_t index = ((is_arabic_letter(next) & is_linking_type(cp)) << 1) | is_linking_type(prev);
- return arabic_forms_b[cp - ARABIC_LETTER_START][index];
+ return arabic_forms_b[cp - ARABIC_LETTER_START][0][index];
+ }
+ }
}
diff --git a/mainwindow.cpp b/mainwindow.cpp
index ea3a91f..60d9d51 100644
--- a/mainwindow.cpp
+++ b/mainwindow.cpp
@@ -97,17 +97,30 @@ MainWindow::MainWindow(QWidget *parent) :
FT_Render_Mode render_flags = FT_RENDER_MODE_NORMAL;
- data__ = new uchar[1024 * 256 * 4];
- memset(data__, 0, 1024 * 256 * 4);
+ data__ = new uchar[1024 * 1024 * 4];
+ memset(data__, 0, 1024 * 1024 * 4);
int col = 1024 - 1;
int line = 100;
- QImage img(data__, 1024, 256, QImage::Format_RGB32);
+ QImage img(data__, 1024, 1024, QImage::Format_RGB32);
// render the arabic glyphs
for( size_t idx = 0; idx < arabic_cp.size(); ++idx )
{
uint ch = get_arabic_form(arabic_cp, idx);
- if( ch == 0xA || ch == 0xC )
+ if( ch == 0xA ) {
+ line += 30;
+ col = 1024 - 1;
continue;
+ }
+
+ if ( ch == 0xC ) {
+ continue;
+ }
+
+ if ( ch == -1 ) {
+ continue;
+ }
+
+ assert(ch != 0);
int glyph_index = FT_Get_Char_Index(face, ch);
assert( glyph_index && "invalid character" );