aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arabtype.c38
1 files changed, 26 insertions, 12 deletions
diff --git a/arabtype.c b/arabtype.c
index 76355ad..8e237db 100644
--- a/arabtype.c
+++ b/arabtype.c
@@ -23,13 +23,16 @@
#include "utf8.h"
#include "arabtype.h"
-#define ARABIC_LETTER_START 0x621
-#define ARABIC_LETTER_END 0x64A
+#define ARABIC_LETTER_START 0x621
+#define ARABIC_LETTER_END 0x64A
-#define ISOLATED 0
-#define ENDING 1
-#define INITIAL 2
-#define MEDIAL 3
+#define TASHKIL_LETTER_START 0x64B
+#define TASHKIL_LETTER_END 0x652
+
+#define ISOLATED 0
+#define ENDING 1
+#define INITIAL 2
+#define MEDIAL 3
#define UNICODE_LAM 0x644
@@ -91,6 +94,7 @@ static all_form_t arabic_forms_b[] = {
static inline bool is_arabic_letter(uint32_t cp) { return ( cp >= ARABIC_LETTER_START && cp <= ARABIC_LETTER_END ); }
+static inline bool is_tashkil_letter(uint32_t cp) { return ( cp >= TASHKIL_LETTER_START && cp <= TASHKIL_LETTER_END ); }
static inline bool is_lam_alef(uint32_t cp, uint32_t next) { return cp == UNICODE_LAM &&
is_arabic_letter(next) &&
arabic_forms_b[next - ARABIC_LETTER_START][1][INITIAL] != 0; }
@@ -159,14 +163,24 @@ get_presentation_form_b(size_t in_len, unsigned char* in_str, size_t out_len, ui
size_t s = 0;
for( o = 0; o < cp_count; ++o) {
- uint32_t cp = out_cp[o];
- uint32_t next = o < cp_count - 1 ? out_cp[o + 1] : 0;
- uint32_t tcp = get_presentation_form_b_of_char(prev, next, cp);
- if( tcp != (uint32_t)-1 ) {
- out_cp[s] = tcp;
+ uint32_t cp = out_cp[o];
+
+ if (!is_tashkil_letter(cp)) {
+ uint32_t next = o < cp_count - 1 ? out_cp[o + 1] : 0;
+
+ if (is_tashkil_letter(next))
+ next = (o + 1) < cp_count - 1 ? out_cp[o + 2] : 0;
+
+ uint32_t tcp = get_presentation_form_b_of_char(prev, next, cp);
+ if( tcp != (uint32_t)-1 ) {
+ out_cp[s] = tcp;
+ ++s;
+ }
+ prev = cp;
+ } else {
+ out_cp[s] = cp;
++s;
}
- prev = cp;
}
return o;