[luatex] Arabic alphabets are not connected
Yannis Haralambous
yharalambous at me.com
Sun Feb 8 17:00:01 CET 2009
Le 8 févr. 09 à 16:50, وفا خلیقی a écrit :
> Is there a sample code that I can study?
Here is an OTP file I wrote in 1994. It works in the following way: it
takes input coded in Unicode and translates codepoints depending on
the form:
initial form is translated by DC00
medial form is translated by DE00
final form is translated by E000
isolated form is translated by DA00
short vowels (called ACCENT in the file) are translated by DA00
short vowels + shadda are translated by DA90
The lam-alif and alif-lam-lam-hah ligatures are treated in a different
OTP.
%%% ====================================================================
%%% @OmegaTranslationProcess-file{
%%% author = "Yannis Haralambous",
%%% version = "1.0",
%%% date = "16/07/94"
%%% time = "9:53:26 MET"
%%% filename = "UnicodeToContUnicode.otp",
%%% address = "187, rue Nationale,
%%% 59800 Lille, France"
%%% telephone = "",
%%% FAX = "+33 20402864",
%%% checksum = "",
%%% email = "Yannis.Haralambous at univ-lille1.fr",
%%% codetable = "ISO/ASCII",
%%% keywords = "",
%%% supported = "yes",
%%% docstring = "This file is part of the public domain
%%% Arabic-script Omega system.
%%% Omega is a TeX extension (C) John Plaice
%%% and Yannis Haralambous, 1994"
%%% }
%%% ====================================================================
input: 2;
output: 2;
states: MEDIAL,NUMERAL;
aliases:
SHADDA = (@"FFFF); % was 0651
UNIFORM = (@"0621 | @"0674 | @"066E | @"066F | @"06EF | @"063F);
SPECIAL = (@"FDF2-@"FDF4) ;
BIFORM = (@"0605 | @"0606 | @"0613 | @"0622-@"0625 | @"0627 |
@"0629 | @"062F-@"0632 | @"0648 |
@"065D | @"065E |
@"0671-@"0673 | @"0675-@"0677 | @"0688-@"069A |
@"06BA | @"06C0-@"06CB | @"06CD | @"06D2 | @"06D3 |
@"06FF | @"0710 | @"0715-@"0719 | @"071E | @"0724 |
@"0728 | @"072A | @"072C | @"072F | @"0750);
QUADRIFORM = (@"0600-@"0604 | @"0607-@"060B | @"060C-@"0612 | @"0616 |
@"0626 | @"0628 | @"062A-@"062E | @"0633-@"063A |
@"0640-@"0647 |
@"0649 | @"064A |
%%% @"0655-@"0657 | @"065B | @"065C |
@"0659 |
@"0678-@"0687 | @"069A-@"06B7 |
@"06BB-@"06BF | @"06CC | @"06CE | @"06D0 | @"06D1 |
@"06FE | @"0712-@"0714 | @"071A-@"071D | @"071F-@"0723 |
@"0725-@"0727 | @"0729 | @"072B | @"0751-@"0757);
ACCENT = (@"064B-@"0658 | @"0670 | @"0711 | @"0730-@"074F);
ARABIC_LETTER = ({BIFORM} | {QUADRIFORM});
NOT_ARABIC_LETTER = ^(@"0600-@"060B | @"060D-@"0613 | @"0616 | @"0621-
@"065F | @"0670-@"06D3 | @"0710-@"075F);
NOT_ARABIC_OR_UNI = ({NOT_ARABIC_LETTER}|{UNIFORM});
ARABIC_NUMBER = (@"0030-@"0039 | @"0660-@"0669 | @"06F0-@"06F9);
NOT_ARABIC_NUMBER = ^(@"0030-@"0039 | @"0660-@"0669 | @"06F0-@"06F9);
LAM_LIKE = (@"0644 | @"06B5-@"06B7 | @"06FE);
ALIF_LIKE = (@"0622|@"0623|@"0625|@"0627|@"0671-@"0673);
expressions:
{UNIFORM}{SHADDA}{ACCENT}
=> #(\1 + @"DA00) #(\3 + @"DA90)
;
{UNIFORM}{ACCENT}
=> #(\1 + @"DA00) #(\2 + @"DA00)
;
{UNIFORM}
=> #(\1 + @"DA00)
;
{SPECIAL}{SHADDA}{ACCENT}
=> \1 #(\3 + @"DA90)
;
{SPECIAL}{ACCENT}
=> \1 #(\2 + @"DA00)
;
{SPECIAL}
=> \1
;
<NUMERAL>{ARABIC_NUMBER} end:
=> #(\1) "\endL{}"
<pop:>
;
<NUMERAL>{ARABIC_NUMBER}
=> #(\1)
;
<NUMERAL>(@"002B|@"002D|@"002E|@"066B|@"066C){ARABIC_NUMBER} end:
=> #(\1) #(\2) "\endL{}"
<pop:>
;
<NUMERAL>(@"002B|@"002D|@"002E|@"066B|@"066C){ARABIC_NUMBER}
=> #(\1) #(\2)
;
<NUMERAL>{NOT_ARABIC_NUMBER}
=> "\endL{}"
<= #(\1)
<pop:>
;
(@"002B|@"002D|@"002E){ARABIC_NUMBER} end:
=> "\beginL{}" #(\1) #(\2) "\endL{}"
;
(@"002B|@"002D|@"002E){ARABIC_NUMBER}
=> "\beginL{}" #(\1) #(\2)
<push: NUMERAL>
;
{ARABIC_NUMBER} end:
=> #(\1)
;
{ARABIC_NUMBER}
=> "\beginL{}" #(\1)
<push: NUMERAL>
;
{ACCENT}
=> #(\1 + @"DA00)
;
{NOT_ARABIC_LETTER}
=> #(\1)
;
{QUADRIFORM}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"DA00) <= \2
;
{QUADRIFORM} end:
=> #(\1 + @"DA00)
;
{QUADRIFORM}{SHADDA}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"DA00) #(\3 + @"DA90)
<= #(\4)
;
{QUADRIFORM}{ACCENT}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
<= #(\4)
;
{QUADRIFORM}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"DA00) #(\2 + @"DA00)
<= #(\3)
;
{QUADRIFORM}{SHADDA}{ACCENT} end:
=> #(\1 + @"DA00) #(\3 + @"DA90)
;
{QUADRIFORM}{ACCENT}{ACCENT} end:
=> #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
;
{QUADRIFORM}{ACCENT} end:
=> #(\1 + @"DA00) #(\2 + @"DA00)
;
% @"0620 is our internal keshideh (not Unicode keshideh which is @"0640)
{QUADRIFORM}{SHADDA}{ACCENT}
=> #(\1 + @"DC00) #(\3 + @"DA90) @"0620
<push: MEDIAL>
;
{QUADRIFORM}{ACCENT}{ACCENT}
=> #(\1 + @"DC00) #(\2 + @"DA00) #(\3 + @"DA00) @"0620
<push: MEDIAL>
;
{QUADRIFORM}{ACCENT}
=> #(\1 + @"DC00) #(\2 + @"DA00) @"0620
<push: MEDIAL>
;
{QUADRIFORM}
=> #(\1 + @"DC00) @"0620
<push: MEDIAL>
;
{BIFORM}{SHADDA}{ACCENT}
=> #(\1 + @"DA00) #(\3 + @"DA90)
;
{BIFORM}{ACCENT}{ACCENT}
=> #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
;
{BIFORM}{ACCENT}
=> #(\1 + @"DA00) #(\2 + @"DA00)
;
{BIFORM}
=> #(\1 + @"DA00)
;
<MEDIAL>{QUADRIFORM}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"E000)
<= #(\2)
<pop:>
;
<MEDIAL>{QUADRIFORM} end:
=> #(\1 + @"E000)
<pop:>
;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"E000) #(\3 + @"DA90)
<= #(\4)
<pop:>
;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
<= #(\4)
<pop:>
;
<MEDIAL>{QUADRIFORM}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"E000) #(\2 + @"DA00)
<= #(\3)
<pop:>
;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT} end:
=> #(\1 + @"E000) #(\3 + @"DA90)
<pop:>
;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT} end:
=> #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
<pop:>
;
<MEDIAL>{QUADRIFORM}{ACCENT} end:
=> #(\1 + @"E000) #(\2 + @"DA00)
<pop:>
;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT}
=> #(\1 + @"DE00) #(\3 + @"DA90) @"0620
;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT}
=> #(\1 + @"DE00) #(\2 + @"DA00) #(\3 + @"DA00) @"0620
;
<MEDIAL>{QUADRIFORM}{ACCENT}
=> #(\1 + @"DE00) #(\2 + @"DA00) @"0620
;
<MEDIAL>{QUADRIFORM}
=> #(\1 + @"DE00) @"0620
;
<MEDIAL>{BIFORM}{SHADDA}{ACCENT}
=> #(\1 + @"E000) #(\3 + @"DA90)
<pop:>
;
<MEDIAL>{BIFORM}{ACCENT}{ACCENT}
=> #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
<pop:>
;
<MEDIAL>{BIFORM}{ACCENT}
=> #(\1 + @"E000) #(\2 + @"DA00)
<pop:>
;
<MEDIAL>{BIFORM}
=> #(\1 + @"E000)
<pop:>
;
@"F000-@"F07F => \1 ;
More information about the luatex
mailing list