Open Source Repository

Home /itextpdf/itextpdf-5.1.2 | Repository Home



com/itextpdf/text/pdf/ArabicLigaturizer.java
/*
 * $Id: ArabicLigaturizer.java 4784 2011-03-15 08:33:00Z blowagie $
 *
 * This file is part of the iText (R) project.
 * Copyright (c) 1998-2011 1T3XT BVBA
 * Authors: Bruno Lowagie, Paulo Soares, et al.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License version 3
 * as published by the Free Software Foundation with the addition of the
 * following permission added to Section 15 as permitted in Section 7(a):
 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT,
 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Affero General Public License for more details.
 * You should have received a copy of the GNU Affero General Public License
 * along with this program; if not, see http://www.gnu.org/licenses or write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
 * http://itextpdf.com/terms-of-use/
 *
 * The interactive user interfaces in modified source and object code versions
 * of this program must display Appropriate Legal Notices, as required under
 * Section 5 of the GNU Affero General Public License.
 *
 * In accordance with Section 7(b) of the GNU Affero General Public License,
 * a covered work must retain the producer line in every PDF that is created
 * or manipulated using iText.
 *
 * You can be released from the requirements of the license by purchasing
 * a commercial license. Buying such a license is mandatory as soon as you
 * develop commercial activities involving the iText software without
 * disclosing the source code of your own applications.
 * These activities include: offering paid services to customers as an ASP,
 * serving PDFs on the fly in a web application, shipping iText with a closed
 * source product.
 *
 * For more information, please contact iText Software Corp. at this
 * address: [email protected]
 */
package com.itextpdf.text.pdf;

/**
 * Shape arabic characters. This code was inspired by an LGPL'ed C library:
 * Pango ( see http://www.pango.com/ ). Note that the code of this class is
 * the original work of Paulo Soares.
 *
 @author Paulo Soares
 */
public class ArabicLigaturizer {
    
    static boolean isVowel(char s) {
        return ((s >= 0x064B&& (s <= 0x0655)) || (s == 0x0670);
    }

    static char charshape(char s, int which)
    /* which 0=isolated 1=final 2=initial 3=medial */
    {
        int l, r, m;
        if ((s >= 0x0621&& (s <= 0x06D3)) {
            l = 0;
            r = chartable.length - 1;
            while (l <= r) {
                m = (l + r2;
                if (s == chartable[m][0]) {
                    return chartable[m][which + 1];
                }
                else if (s < chartable[m][0]) {
                    r = m - 1;
                }
                else {
                    l = m + 1;
                }
            }
        }
        else if (s >= 0xfef5 && s <= 0xfefb)
            return (char)(s + which);
        return s;
    }

    static int shapecount(char s) {
        int l, r, m;
        if ((s >= 0x0621&& (s <= 0x06D3&& !isVowel(s)) {
            l = 0;
            r = chartable.length - 1;
            while (l <= r) {
                m = (l + r2;
                if (s == chartable[m][0]) {
                    return chartable[m].length - 1;
                }
                else if (s < chartable[m][0]) {
                    r = m - 1;
                }
                else {
                    l = m + 1;
                }
            }
        }
        else if (s == ZWJ) {
            return 4;
        }
        return 1;
    }
    
    static int ligature(char newchar, charstruct oldchar) {
    /* 0 == no ligature possible; 1 == vowel; 2 == two chars; 3 == Lam+Alef */
        int retval = 0;
        
        if (oldchar.basechar == 0)
            return 0;
        if (isVowel(newchar)) {
            retval = 1;
            if ((oldchar.vowel != 0&& (newchar != SHADDA)) {
                retval = 2;           /* we eliminate the old vowel .. */
            }
            switch (newchar) {
                case SHADDA:
                    if (oldchar.mark1 == 0) {
                        oldchar.mark1 = SHADDA;
                    }
                    else {
                        return 0;         /* no ligature possible */
                    }
                    break;
                case HAMZABELOW:
                    switch (oldchar.basechar) {
                        case ALEF:
                            oldchar.basechar = ALEFHAMZABELOW;
                            retval = 2;
                            break;
                        case LAM_ALEF:
                            oldchar.basechar = LAM_ALEFHAMZABELOW;
                            retval = 2;
                            break;
                        default:
                            oldchar.mark1 = HAMZABELOW;
                            break;
                    }
                    break;
                case HAMZAABOVE:
                    switch (oldchar.basechar) {
                        case ALEF:
                            oldchar.basechar = ALEFHAMZA;
                            retval = 2;
                            break;
                        case LAM_ALEF:
                            oldchar.basechar = LAM_ALEFHAMZA;
                            retval = 2;
                            break;
                        case WAW:
                            oldchar.basechar = WAWHAMZA;
                            retval = 2;
                            break;
                        case YEH:
                        case ALEFMAKSURA:
                        case FARSIYEH:
                            oldchar.basechar = YEHHAMZA;
                            retval = 2;
                            break;
                        default:           /* whatever sense this may make .. */
                            oldchar.mark1 = HAMZAABOVE;
                            break;
                    }
                    break;
                case MADDA:
                    switch (oldchar.basechar) {
                        case ALEF:
                            oldchar.basechar = ALEFMADDA;
                            retval = 2;
                            break;
                    }
                    break;
                default:
                    oldchar.vowel = newchar;
                    break;
            }
            if (retval == 1) {
                oldchar.lignum++;
            }
            return retval;
        }
        if (oldchar.vowel != 0) {  /* if we already joined a vowel, we can't join a Hamza */
            return 0;
        }
        
        switch (oldchar.basechar) {
            case LAM:
                switch (newchar) {
                    case ALEF:
                        oldchar.basechar = LAM_ALEF;
                        oldchar.numshapes = 2;
                        retval = 3;
                        break;
                    case ALEFHAMZA:
                        oldchar.basechar = LAM_ALEFHAMZA;
                        oldchar.numshapes = 2;
                        retval = 3;
                        break;
                    case ALEFHAMZABELOW:
                        oldchar.basechar = LAM_ALEFHAMZABELOW;
                        oldchar.numshapes = 2;
                        retval = 3;
                        break;
                    case ALEFMADDA:
                        oldchar.basechar = LAM_ALEFMADDA;
                        oldchar.numshapes = 2;
                        retval = 3;
                        break;
                }
                break;
            case 0:
                oldchar.basechar = newchar;
                oldchar.numshapes = shapecount(newchar);
                retval = 1;
                break;
        }
        return retval;
    }
    
    static void copycstostring(StringBuffer string, charstruct s, int level) {
    /* s is a shaped charstruct; i is the index into the string */
        if (s.basechar == 0)
            return;
        
        string.append(s.basechar);
        s.lignum--;
        if (s.mark1 != 0) {
            if ((level & ar_novowel== 0) {
                string.append(s.mark1);
                s.lignum--;
            }
            else {
                s.lignum--;
            }
        }
        if (s.vowel != 0) {
            if ((level & ar_novowel== 0) {
                string.append(s.vowel);
                s.lignum--;
            }
            else {                       /* vowel elimination */
                s.lignum--;
            }
        }
//        while (s.lignum > 0) {                           /* NULL-insertion for Langbox-font */
//            string[i] = 0;
//            i++;
//            (s.lignum)--;
//        }
//        return i;
    }

    // return len
    static void doublelig(StringBuffer string, int level)
    /* Ok. We have presentation ligatures in our font. */
    {
        int len;
        int olen = len = string.length();
        int j = 0, si = 1;
        char lapresult;
        
        while (si < olen) {
            lapresult = 0;
            if ((level & ar_composedtashkeel!= 0) {
                switch (string.charAt(j)) {
                    case SHADDA:
                        switch (string.charAt(si)) {
                            case KASRA:
                                lapresult = 0xFC62;
                                break;
                            case FATHA:
                                lapresult = 0xFC60;
                                break;
                            case DAMMA:
                                lapresult = 0xFC61;
                                break;
                            case 0x064C:
                                lapresult = 0xFC5E;
                                break;
                            case 0x064D:
                                lapresult = 0xFC5F;
                                break;
                        }
                        break;
                    case KASRA:
                        if (string.charAt(si== SHADDA)
                            lapresult = 0xFC62;
                        break;
                    case FATHA:
                        if (string.charAt(si== SHADDA)
                            lapresult = 0xFC60;
                        break;
                    case DAMMA:
                        if (string.charAt(si== SHADDA)
                            lapresult = 0xFC61;
                        break;
                }
            }
            
            if ((level & ar_lig!= 0) {
                switch (string.charAt(j)) {
                    case 0xFEDF:       /* LAM initial */
                        switch (string.charAt(si)) {
                            case 0xFE9E:
                                lapresult = 0xFC3F;
                                break;        /* JEEM final */
                            case 0xFEA0:
                                lapresult = 0xFCC9;
                                break;        /* JEEM medial */
                            case 0xFEA2:
                                lapresult = 0xFC40;
                                break;        /* HAH final */
                            case 0xFEA4:
                                lapresult = 0xFCCA;
                                break;        /* HAH medial */
                            case 0xFEA6:
                                lapresult = 0xFC41;
                                break;        /* KHAH final */
                            case 0xFEA8:
                                lapresult = 0xFCCB;
                                break;        /* KHAH medial */
                            case 0xFEE2:
                                lapresult = 0xFC42;
                                break;        /* MEEM final */
                            case 0xFEE4:
                                lapresult = 0xFCCC;
                                break;        /* MEEM medial */
                        }
                        break;
                    case 0xFE97:       /* TEH inital */
                        switch (string.charAt(si)) {
                            case 0xFEA0:
                                lapresult = 0xFCA1;
                                break;        /* JEEM medial */
                            case 0xFEA4:
                                lapresult = 0xFCA2;
                                break;        /* HAH medial */
                            case 0xFEA8:
                                lapresult = 0xFCA3;
                                break;        /* KHAH medial */
                        }
                        break;
                    case 0xFE91:       /* BEH inital */
                        switch (string.charAt(si)) {
                            case 0xFEA0:
                                lapresult = 0xFC9C;
                                break;        /* JEEM medial */
                            case 0xFEA4:
                                lapresult = 0xFC9D;
                                break;        /* HAH medial */
                            case 0xFEA8:
                                lapresult = 0xFC9E;
                                break;        /* KHAH medial */
                        }
                        break;
                    case 0xFEE7:       /* NOON inital */
                        switch (string.charAt(si)) {
                            case 0xFEA0:
                                lapresult = 0xFCD2;
                                break;        /* JEEM initial */
                            case 0xFEA4:
                                lapresult = 0xFCD3;
                                break;        /* HAH medial */
                            case 0xFEA8:
                                lapresult = 0xFCD4;
                                break;        /* KHAH medial */
                        }
                        break;
                        
                    case 0xFEE8:       /* NOON medial */
                        switch (string.charAt(si)) {
                            case 0xFEAE:
                                lapresult = 0xFC8A;
                                break;        /* REH final  */
                            case 0xFEB0:
                                lapresult = 0xFC8B;
                                break;        /* ZAIN final */
                        }
                        break;
                    case 0xFEE3:       /* MEEM initial */
                        switch (string.charAt(si)) {
                            case 0xFEA0:
                                lapresult = 0xFCCE;
                                break;        /* JEEM medial */
                            case 0xFEA4:
                                lapresult = 0xFCCF;
                                break;        /* HAH medial */
                            case 0xFEA8:
                                lapresult = 0xFCD0;
                                break;        /* KHAH medial */
                            case 0xFEE4:
                                lapresult = 0xFCD1;
                                break;        /* MEEM medial */
                        }
                        break;
                        
                    case 0xFED3:       /* FEH initial */
                        switch (string.charAt(si)) {
                            case 0xFEF2:
                                lapresult = 0xFC32;
                                break;        /* YEH final */
                        }
                        break;
                        
                    default:
                        break;
                }                   /* end switch string[si] */
            }
            if (lapresult != 0) {
                string.setCharAt(j, lapresult);
                len--;
                si++;                 /* jump over one character */
                /* we'll have to change this, too. */
            }
            else {
                j++;
                string.setCharAt(j, string.charAt(si));
                si++;
            }
        }
        string.setLength(len);
    }

    static boolean connects_to_left(charstruct a) {
        return a.numshapes > 2;
    }
    
    static void shape(char text[], StringBuffer string, int level) {
  /* string is assumed to be empty and big enough.
   * text is the original text.
   * This routine does the basic arabic reshaping.
   * *len the number of non-null characters.
   *
   * Note: We have to unshape each character first!
   */
        int join;
        int which;
        char nextletter;
        
        int p = 0;                     /* initialize for output */
        charstruct oldchar = new charstruct();
        charstruct curchar = new charstruct();
        while (p < text.length) {
            nextletter = text[p++];
            //nextletter = unshape (nextletter);
            
            join = ligature(nextletter, curchar);
            if (join == 0) {                       /* shape curchar */
                int nc = shapecount(nextletter);
                //(*len)++;
                if (nc == 1) {
                    which = 0;        /* final or isolated */
                }
                else {
                    which = 2;        /* medial or initial */
                }
                if (connects_to_left(oldchar)) {
                    which++;
                }
                
                which = which % (curchar.numshapes);
                curchar.basechar = charshape(curchar.basechar, which);
                
                /* get rid of oldchar */
                copycstostring(string, oldchar, level);
                oldchar = curchar;    /* new values in oldchar */
                
                /* init new curchar */
                curchar = new charstruct();
                curchar.basechar = nextletter;
                curchar.numshapes = nc;
                curchar.lignum++;
                //          (*len) += unligature (&curchar, level);
            }
            else if (join == 1) {
            }
            //      else
            //        {
            //          (*len) += unligature (&curchar, level);
            //        }
            //      p = g_utf8_next_char (p);
        }
        
        /* Handle last char */
        if (connects_to_left(oldchar))
            which = 1;
        else
            which = 0;
        which = which % (curchar.numshapes);
        curchar.basechar = charshape(curchar.basechar, which);
        
        /* get rid of oldchar */
        copycstostring(string, oldchar, level);
        copycstostring(string, curchar, level);
    }

    static int arabic_shape(char src[]int srcoffset, int srclength, char dest[]int destoffset, int destlength, int level) {
        char str[] new char[srclength];
        for (int k = srclength + srcoffset - 1; k >= srcoffset; --k)
            str[k - srcoffset= src[k];
        StringBuffer string = new StringBuffer(srclength);
        shape(str, string, level);
        if ((level & (ar_composedtashkeel | ar_lig)) != 0)
            doublelig(string, level);
//        string.reverse();
        System.arraycopy(string.toString().toCharArray()0, dest, destoffset, string.length());
        return string.length();
    }

    static void processNumbers(char text[]int offset, int length, int options) {
        int limit = offset + length;
        if ((options & DIGITS_MASK!= 0) {
            char digitBase = '\u0030'// European digits
            switch (options & DIGIT_TYPE_MASK) {
                case DIGIT_TYPE_AN:
                    digitBase = '\u0660';  // Arabic-Indic digits
                    break;
                    
                case DIGIT_TYPE_AN_EXTENDED:
                    digitBase = '\u06f0';  // Eastern Arabic-Indic digits (Persian and Urdu)
                    break;
                    
                default:
                    break;
            }
            
            switch (options & DIGITS_MASK) {
                case DIGITS_EN2AN: {
                    int digitDelta = digitBase - '\u0030';
                    for (int i = offset; i < limit; ++i) {
                        char ch = text[i];
                        if (ch <= '\u0039' && ch >= '\u0030') {
                            text[i+= digitDelta;
                        }
                    }
                }
                break;
                
                case DIGITS_AN2EN: {
                    char digitTop = (char)(digitBase + 9);
                    int digitDelta = '\u0030' - digitBase;
                    for (int i = offset; i < limit; ++i) {
                        char ch = text[i];
                        if (ch <= digitTop && ch >= digitBase) {
                            text[i+= digitDelta;
                        }
                    }
                }
                break;
                
                case DIGITS_EN2AN_INIT_LR:
                    shapeToArabicDigitsWithContext(text, 0, length, digitBase, false);
                    break;
                    
                case DIGITS_EN2AN_INIT_AL:
                    shapeToArabicDigitsWithContext(text, 0, length, digitBase, true);
                    break;
                    
                default:
                    break;
            }
        }
    }
    
    static void shapeToArabicDigitsWithContext(char[] dest, int start, int length, char digitBase,  boolean lastStrongWasAL) {
        digitBase -= '0'// move common adjustment out of loop
 
        int limit = start + length;
        for(int i = start; i < limit; ++i) {
            char ch = dest[i];
            switch (BidiOrder.getDirection(ch)) {
            case BidiOrder.L:
            case BidiOrder.R:
                lastStrongWasAL = false;
                break;
            case BidiOrder.AL:
                lastStrongWasAL = true;
                break;
            case BidiOrder.EN:
                if (lastStrongWasAL && ch <= '\u0039') {
                    dest[i(char)(ch + digitBase);
                }
                break;
            default:
                break;
            }
        }
    }

    private static final char ALEF = 0x0627;
    private static final char ALEFHAMZA = 0x0623;
    private static final char ALEFHAMZABELOW = 0x0625;
    private static final char ALEFMADDA = 0x0622;
    private static final char LAM = 0x0644;
    private static final char HAMZA = 0x0621;
    private static final char TATWEEL = 0x0640;
    private static final char ZWJ = 0x200D;

    private static final char HAMZAABOVE = 0x0654;
    private static final char HAMZABELOW = 0x0655;

    private static final char WAWHAMZA = 0x0624;
    private static final char YEHHAMZA = 0x0626;
    private static final char WAW = 0x0648;
    private static final char ALEFMAKSURA = 0x0649;
    private static final char YEH = 0x064A;
    private static final char FARSIYEH = 0x06CC;

    private static final char SHADDA = 0x0651;
    private static final char KASRA = 0x0650;
    private static final char FATHA = 0x064E;
    private static final char DAMMA = 0x064F;
    private static final char MADDA = 0x0653;

    private static final char LAM_ALEF = 0xFEFB;
    private static final char LAM_ALEFHAMZA = 0xFEF7;
    private static final char LAM_ALEFHAMZABELOW = 0xFEF9;
    private static final char LAM_ALEFMADDA = 0xFEF5;

    private static final char chartable[][] {
        {0x06210xFE80}/* HAMZA */
        {0x06220xFE810xFE82}/* ALEF WITH MADDA ABOVE */
        {0x06230xFE830xFE84}/* ALEF WITH HAMZA ABOVE */
        {0x06240xFE850xFE86}/* WAW WITH HAMZA ABOVE */
        {0x06250xFE870xFE88}/* ALEF WITH HAMZA BELOW */
        {0x06260xFE890xFE8A0xFE8B0xFE8C}/* YEH WITH HAMZA ABOVE */
        {0x06270xFE8D0xFE8E}/* ALEF */
        {0x06280xFE8F0xFE900xFE910xFE92}/* BEH */
        {0x06290xFE930xFE94}/* TEH MARBUTA */
        {0x062A0xFE950xFE960xFE970xFE98}/* TEH */
        {0x062B0xFE990xFE9A0xFE9B0xFE9C}/* THEH */
        {0x062C0xFE9D0xFE9E0xFE9F0xFEA0}/* JEEM */
        {0x062D0xFEA10xFEA20xFEA30xFEA4}/* HAH */
        {0x062E0xFEA50xFEA60xFEA70xFEA8}/* KHAH */
        {0x062F0xFEA90xFEAA}/* DAL */
        {0x06300xFEAB0xFEAC}/* THAL */
        {0x06310xFEAD0xFEAE}/* REH */
        {0x06320xFEAF0xFEB0}/* ZAIN */
        {0x06330xFEB10xFEB20xFEB30xFEB4}/* SEEN */
        {0x06340xFEB50xFEB60xFEB70xFEB8}/* SHEEN */
        {0x06350xFEB90xFEBA0xFEBB0xFEBC}/* SAD */
        {0x06360xFEBD0xFEBE0xFEBF0xFEC0}/* DAD */
        {0x06370xFEC10xFEC20xFEC30xFEC4}/* TAH */
        {0x06380xFEC50xFEC60xFEC70xFEC8}/* ZAH */
        {0x06390xFEC90xFECA0xFECB0xFECC}/* AIN */
        {0x063A0xFECD0xFECE0xFECF0xFED0}/* GHAIN */
        {0x06400x06400x06400x06400x0640}/* TATWEEL */
        {0x06410xFED10xFED20xFED30xFED4}/* FEH */
        {0x06420xFED50xFED60xFED70xFED8}/* QAF */
        {0x06430xFED90xFEDA0xFEDB0xFEDC}/* KAF */
        {0x06440xFEDD0xFEDE0xFEDF0xFEE0}/* LAM */
        {0x06450xFEE10xFEE20xFEE30xFEE4}/* MEEM */
        {0x06460xFEE50xFEE60xFEE70xFEE8}/* NOON */
        {0x06470xFEE90xFEEA0xFEEB0xFEEC}/* HEH */
        {0x06480xFEED0xFEEE}/* WAW */
        {0x06490xFEEF0xFEF00xFBE80xFBE9}/* ALEF MAKSURA */
        {0x064A0xFEF10xFEF20xFEF30xFEF4}/* YEH */
        {0x06710xFB500xFB51}/* ALEF WASLA */
        {0x06790xFB660xFB670xFB680xFB69}/* TTEH */
        {0x067A0xFB5E0xFB5F0xFB600xFB61}/* TTEHEH */
        {0x067B0xFB520xFB530xFB540xFB55}/* BEEH */
        {0x067E0xFB560xFB570xFB580xFB59}/* PEH */
        {0x067F0xFB620xFB630xFB640xFB65}/* TEHEH */
        {0x06800xFB5A0xFB5B0xFB5C0xFB5D}/* BEHEH */
        {0x06830xFB760xFB770xFB780xFB79}/* NYEH */
        {0x06840xFB720xFB730xFB740xFB75}/* DYEH */
        {0x06860xFB7A0xFB7B0xFB7C0xFB7D}/* TCHEH */
        {0x06870xFB7E0xFB7F0xFB800xFB81}/* TCHEHEH */
        {0x06880xFB880xFB89}/* DDAL */
        {0x068C0xFB840xFB85}/* DAHAL */
        {0x068D0xFB820xFB83}/* DDAHAL */
        {0x068E0xFB860xFB87}/* DUL */
        {0x06910xFB8C0xFB8D}/* RREH */
        {0x06980xFB8A0xFB8B}/* JEH */
        {0x06A40xFB6A0xFB6B0xFB6C0xFB6D}/* VEH */
        {0x06A60xFB6E0xFB6F0xFB700xFB71}/* PEHEH */
        {0x06A90xFB8E0xFB8F0xFB900xFB91}/* KEHEH */
        {0x06AD0xFBD30xFBD40xFBD50xFBD6}/* NG */
        {0x06AF0xFB920xFB930xFB940xFB95}/* GAF */
        {0x06B10xFB9A0xFB9B0xFB9C0xFB9D}/* NGOEH */
        {0x06B30xFB960xFB970xFB980xFB99}/* GUEH */
        {0x06BA0xFB9E0xFB9F}/* NOON GHUNNA */
        {0x06BB0xFBA00xFBA10xFBA20xFBA3}/* RNOON */
        {0x06BE0xFBAA0xFBAB0xFBAC0xFBAD}/* HEH DOACHASHMEE */
        {0x06C00xFBA40xFBA5}/* HEH WITH YEH ABOVE */
        {0x06C10xFBA60xFBA70xFBA80xFBA9}/* HEH GOAL */
        {0x06C50xFBE00xFBE1}/* KIRGHIZ OE */
        {0x06C60xFBD90xFBDA}/* OE */
        {0x06C70xFBD70xFBD8}/* U */
        {0x06C80xFBDB0xFBDC}/* YU */
        {0x06C90xFBE20xFBE3}/* KIRGHIZ YU */
        {0x06CB0xFBDE0xFBDF}/* VE */
        {0x06CC0xFBFC0xFBFD0xFBFE0xFBFF}/* FARSI YEH */
        {0x06D00xFBE40xFBE50xFBE60xFBE7}/* E */
        {0x06D20xFBAE0xFBAF}/* YEH BARREE */
        {0x06D30xFBB00xFBB1/* YEH BARREE WITH HAMZA ABOVE */
        };

        public static final int ar_nothing  = 0x0;
        public static final int ar_novowel = 0x1;
        public static final int ar_composedtashkeel = 0x4;
        public static final int ar_lig = 0x8;
        /**
         * Digit shaping option: Replace European digits (U+0030...U+0039) by Arabic-Indic digits.
         */
        public static final int DIGITS_EN2AN = 0x20;
        
        /**
         * Digit shaping option: Replace Arabic-Indic digits by European digits (U+0030...U+0039).
         */
        public static final int DIGITS_AN2EN = 0x40;
        
        /**
         * Digit shaping option:
         * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
         * if the most recent strongly directional character
         * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
         * The initial state at the start of the text is assumed to be not an Arabic,
         * letter, so European digits at the start of the text will not change.
         * Compare to DIGITS_ALEN2AN_INIT_AL.
         */
        public static final int DIGITS_EN2AN_INIT_LR = 0x60;
        
        /**
         * Digit shaping option:
         * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
         * if the most recent strongly directional character
         * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
         * The initial state at the start of the text is assumed to be an Arabic,
         * letter, so European digits at the start of the text will change.
         * Compare to DIGITS_ALEN2AN_INT_LR.
         */
        public static final int DIGITS_EN2AN_INIT_AL = 0x80;
        
        /** Not a valid option value. */
        private static final int DIGITS_RESERVED = 0xa0;
        
        /**
         * Bit mask for digit shaping options.
         */
        public static final int DIGITS_MASK = 0xe0;
        
        /**
         * Digit type option: Use Arabic-Indic digits (U+0660...U+0669).
         */
        public static final int DIGIT_TYPE_AN = 0;
        
        /**
         * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9).
         */
        public static final int DIGIT_TYPE_AN_EXTENDED = 0x100;

        /**
         * Bit mask for digit type options.
         */
        public static final int DIGIT_TYPE_MASK = 0x0100// 0x3f00?

        static class charstruct {
            char basechar;
            char mark1;               /* has to be initialized to zero */
            char vowel;
            int lignum;           /* is a ligature with lignum aditional characters */
            int numshapes = 1;
        };


}