aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorElOraiby <wael.eloraiby@gmail.com>2015-05-31 17:44:04 -0400
committerElOraiby <wael.eloraiby@gmail.com>2015-05-31 17:44:04 -0400
commit2b81c17e2ca89df942479f51b8057ac713abe2d4 (patch)
tree3c1e482212fdb4da136650bbcd0bf3b83472fd52
parentd6621fd7ef1971a4d5bc3e2dba97c83d6eff43f5 (diff)
optimized code (one pass no conditional switch)
-rw-r--r--arabic_test.txt1
-rw-r--r--arabtype.c117
-rw-r--r--mainwindow.cpp4
3 files changed, 66 insertions, 56 deletions
diff --git a/arabic_test.txt b/arabic_test.txt
index 2d49a48..5c014fa 100644
--- a/arabic_test.txt
+++ b/arabic_test.txt
@@ -10,3 +10,4 @@
زاهداً في ما سيأتي ناسياً ما قد مضى
أعطني الناي وغني وانسى داء ودواء
إنما الناس سطورٌ كتبت لكن بماء
+
diff --git a/arabtype.c b/arabtype.c
index 919e227..74180df 100644
--- a/arabtype.c
+++ b/arabtype.c
@@ -33,58 +33,59 @@
#define UNICODE_LAM 0x644
+
//
// 0: isolated form
// 1: ending form
// 2: beginning form (if 0, it's a cutting type)
// 3: middle form
//
-typedef uint32_t char_form_t[4];
+typedef uint32_t char_form_t[4];
typedef char_form_t all_form_t[2];
static all_form_t arabic_forms_b[] = {
- { {0xFE80, 0xFE80, 0, 0}, {0, 0, 0, 0} }, // hamza (0)
- { {0xFE81, 0xFE82, 0, 0}, {0, 0, 0xFEF5, 0xFEF6} }, // 2alif madda (1)
- { {0xFE83, 0xFE84, 0, 0}, {0, 0, 0xFEF7, 0xFEF8} }, // 2alif hamza (2)
- { {0xFE85, 0xFE86, 0, 0}, {0, 0, 0, 0} }, // waw hamza (3)
- { {0xFE87, 0xFE88, 0, 0}, {0, 0, 0xFEF9, 0xFEFA} }, // 2alif hamza maksoura (4)
- { {0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, {0, 0, 0, 0} }, // 2alif maqsoura hamza (5)
- { {0xFE8D, 0xFE8E, 0, 0}, {0, 0, 0xFEFB, 0xFEFC} }, // 2alif (6)
- { {0xFE8F, 0xFE90, 0xFE91, 0xFE92}, {0, 0, 0, 0} }, // ba2 (7)
- { {0xFE93, 0xFE94, 0, 0}, {0, 0, 0, 0} }, // ta2 marbouta (8)
- { {0xFE95, 0xFE96, 0xFE97, 0xFE98}, {0, 0, 0, 0} }, // ta2 (9)
- { {0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, {0, 0, 0, 0} }, // tha2 (10)
- { {0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, {0, 0, 0, 0} }, // jim (11)
- { {0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, {0, 0, 0, 0} }, // 7a2 (12)
- { {0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, {0, 0, 0, 0} }, // kha2 (13)
- { {0xFEA9, 0xFEAA, 0, 0}, {0, 0, 0, 0} }, // dal (14)
- { {0xFEAB, 0xFEAC, 0, 0}, {0, 0, 0, 0} }, // dhal (15)
- { {0xFEAD, 0xFEAE, 0, 0}, {0, 0, 0, 0} }, // ra2 (16)
- { {0xFEAF, 0xFEB0, 0, 0}, {0, 0, 0, 0} }, // zayn (17)
- { {0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, {0, 0, 0, 0} }, // syn (18)
- { {0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, {0, 0, 0, 0} }, // shin (19)
- { {0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, {0, 0, 0, 0} }, // sad (20)
- { {0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, {0, 0, 0, 0} }, // dad (21)
- { {0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, {0, 0, 0, 0} }, // tah (22)
- { {0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, {0, 0, 0, 0} }, // thah (23)
- { {0xFEC9, 0xFECA, 0xFECB, 0xFECC}, {0, 0, 0, 0} }, // 3ayn (24)
- { {0xFECD, 0xFECE, 0xFECF, 0xFED0}, {0, 0, 0, 0} }, // ghayn (25)
- { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (26)
- { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (27)
- { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (28)
- { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (29)
- { { 0, 0, 0, 0}, {0, 0, 0, 0} }, // (30)
- { { 0x640, 0x640, 0x640, 0x640}, {0, 0, 0, 0} }, // wasla (31)
- { {0xFED1, 0xFED2, 0xFED3, 0xFED4}, {0, 0, 0, 0} }, // fa2 (32)
- { {0xFED5, 0xFED6, 0xFED7, 0xFED8}, {0, 0, 0, 0} }, // qaf (33)
- { {0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, {0, 0, 0, 0} }, // kaf (34)
- { {0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, {0, 0, 0, 0} }, // lam (35)
- { {0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, {0, 0, 0, 0} }, // mim (36)
- { {0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, {0, 0, 0, 0} }, // noon (37)
- { {0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, {0, 0, 0, 0} }, // ha2 (38)
- { {0xFEED, 0xFEEE, 0, 0}, {0, 0, 0, 0} }, // waw (39)
- { {0xFEFF, 0xFEF0, 0, 0}, {0, 0, 0, 0} }, // 2alif maksoura (40)
- { {0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}, {0, 0, 0, 0} }, // ya2 (41)
+ { {0xFE80, 0xFE80, 0, 0}, {-1, -1, 0, 0} }, // hamza (0)
+ { {0xFE81, 0xFE82, 0, 0}, {-1, -1, 0xFEF5, 0xFEF6} }, // 2alif madda (1)
+ { {0xFE83, 0xFE84, 0, 0}, {-1, -1, 0xFEF7, 0xFEF8} }, // 2alif hamza (2)
+ { {0xFE85, 0xFE86, 0, 0}, {-1, -1, 0, 0} }, // waw hamza (3)
+ { {0xFE87, 0xFE88, 0, 0}, {-1, -1, 0xFEF9, 0xFEFA} }, // 2alif hamza maksoura (4)
+ { {0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, {-1, -1, 0, 0} }, // 2alif maqsoura hamza (5)
+ { {0xFE8D, 0xFE8E, 0, 0}, {-1, -1, 0xFEFB, 0xFEFC} }, // 2alif (6)
+ { {0xFE8F, 0xFE90, 0xFE91, 0xFE92}, {-1, -1, 0, 0} }, // ba2 (7)
+ { {0xFE93, 0xFE94, 0, 0}, {-1, -1, 0, 0} }, // ta2 marbouta (8)
+ { {0xFE95, 0xFE96, 0xFE97, 0xFE98}, {-1, -1, 0, 0} }, // ta2 (9)
+ { {0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, {-1, -1, 0, 0} }, // tha2 (10)
+ { {0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, {-1, -1, 0, 0} }, // jim (11)
+ { {0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, {-1, -1, 0, 0} }, // 7a2 (12)
+ { {0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, {-1, -1, 0, 0} }, // kha2 (13)
+ { {0xFEA9, 0xFEAA, 0, 0}, {-1, -1, 0, 0} }, // dal (14)
+ { {0xFEAB, 0xFEAC, 0, 0}, {-1, -1, 0, 0} }, // dhal (15)
+ { {0xFEAD, 0xFEAE, 0, 0}, {-1, -1, 0, 0} }, // ra2 (16)
+ { {0xFEAF, 0xFEB0, 0, 0}, {-1, -1, 0, 0} }, // zayn (17)
+ { {0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, {-1, -1, 0, 0} }, // syn (18)
+ { {0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, {-1, -1, 0, 0} }, // shin (19)
+ { {0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, {-1, -1, 0, 0} }, // sad (20)
+ { {0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, {-1, -1, 0, 0} }, // dad (21)
+ { {0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, {-1, -1, 0, 0} }, // tah (22)
+ { {0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, {-1, -1, 0, 0} }, // thah (23)
+ { {0xFEC9, 0xFECA, 0xFECB, 0xFECC}, {-1, -1, 0, 0} }, // 3ayn (24)
+ { {0xFECD, 0xFECE, 0xFECF, 0xFED0}, {-1, -1, 0, 0} }, // ghayn (25)
+ { { 0, 0, 0, 0}, {-1, -1, 0, 0} }, // (26)
+ { { 0, 0, 0, 0}, {-1, -1, 0, 0} }, // (27)
+ { { 0, 0, 0, 0}, {-1, -1, 0, 0} }, // (28)
+ { { 0, 0, 0, 0}, {-1, -1, 0, 0} }, // (29)
+ { { 0, 0, 0, 0}, {-1, -1, 0, 0} }, // (30)
+ { { 0x640, 0x640, 0x640, 0x640}, {-1, -1, 0, 0} }, // wasla (31)
+ { {0xFED1, 0xFED2, 0xFED3, 0xFED4}, {-1, -1, 0, 0} }, // fa2 (32)
+ { {0xFED5, 0xFED6, 0xFED7, 0xFED8}, {-1, -1, 0, 0} }, // qaf (33)
+ { {0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, {-1, -1, 0, 0} }, // kaf (34)
+ { {0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, {-1, -1, 0, 0} }, // lam (35)
+ { {0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, {-1, -1, 0, 0} }, // mim (36)
+ { {0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, {-1, -1, 0, 0} }, // noon (37)
+ { {0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, {-1, -1, 0, 0} }, // ha2 (38)
+ { {0xFEED, 0xFEEE, 0, 0}, {-1, -1, 0, 0} }, // waw (39)
+ { {0xFEFF, 0xFEF0, 0, 0}, {-1, -1, 0, 0} }, // 2alif maksoura (40)
+ { {0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}, {-1, -1, 0, 0} }, // ya2 (41)
};
@@ -96,31 +97,39 @@ static inline bool is_lam_alef(uint32_t cp, uint32_t next) { return cp == UNICOD
static inline bool is_alef_prev_lam(uint32_t prev, uint32_t cp) { return prev == UNICODE_LAM &&
is_arabic_letter(cp) &&
arabic_forms_b[cp - ARABIC_LETTER_START][1][INITIAL] != 0; }
-static inline bool is_linking_type(uint32_t cp) {
- if( is_arabic_letter(cp) && ( arabic_forms_b[cp - ARABIC_LETTER_START][0][INITIAL] || arabic_forms_b[cp - ARABIC_LETTER_START][0][MEDIAL] ) ) {
- return true;
- } else {
- return false;
- }
-}
+
+static inline bool is_linking_type(uint32_t cp) { return is_arabic_letter(cp) && arabic_forms_b[cp - ARABIC_LETTER_START][0][MEDIAL] != 0; }
uint32_t get_presentation_form_b(uint32_t prev, uint32_t next, uint32_t cp) {
+
if( !is_arabic_letter(cp) ) {
return cp; /* not an Arabic letter */
}
- if( is_lam_alef(cp, next) ) {
+ bool is_la = is_lam_alef(cp, next);
+ bool is_apl = is_alef_prev_lam(prev, cp);
+
+ bool is_lapl = is_la | is_apl;
+
+#ifdef CLEAR_CODE
+ if( is_lapl ) {
uint32_t index = (is_linking_type(cp) << 1) | is_linking_type(prev);
- return arabic_forms_b[next - ARABIC_LETTER_START][1][index];
+ return arabic_forms_b[next - ARABIC_LETTER_START][is_lapl][index];
} else {
- if( is_alef_prev_lam(prev, cp) ) {
+ if( is_apl ) {
return -1; // skip previously processed lam alef
} else {
uint32_t index = ((is_arabic_letter(next) & is_linking_type(cp)) << 1) | is_linking_type(prev);
- return arabic_forms_b[cp - ARABIC_LETTER_START][0][index];
+ return arabic_forms_b[cp - ARABIC_LETTER_START][is_lapl][index];
}
}
+#else
+ // optimized code
+ uint32_t index = (((is_lapl | is_arabic_letter(next)) & is_linking_type(cp)) << 1) | is_linking_type(prev);
+ uint32_t ref = next * is_la + cp * (1 - is_la) - ARABIC_LETTER_START;
+ return arabic_forms_b[ref][is_lapl][index];
+#endif
}
diff --git a/mainwindow.cpp b/mainwindow.cpp
index 5173fda..14a2871 100644
--- a/mainwindow.cpp
+++ b/mainwindow.cpp
@@ -58,7 +58,7 @@ MainWindow::MainWindow(QWidget *parent) :
ui__->setupUi(this);
int width = 480;
- int height = 320;
+ int height = 392;
int font_size = 24;
std::ifstream ifs("arabic_test.txt");
@@ -117,7 +117,7 @@ MainWindow::MainWindow(QWidget *parent) :
{
uint ch = get_arabic_form(arabic_cp, idx);
if( ch == 0xA ) {
- line += font_size;
+ line += font_size + 5;
col = width - font_size;
continue;
}