[luatex] Arabic alphabets are not connected

Yannis Haralambous yharalambous at me.com
Sun Feb 8 17:00:01 CET 2009


Le 8 févr. 09 à 16:50, وفا خلیقی a écrit :

> Is there a sample code that I can study?

Here is an OTP file I wrote in 1994. It works in the following way: it  
takes input coded in Unicode and translates codepoints depending on  
the form:

initial form is translated by DC00
medial form is translated by DE00
final form is translated by E000
isolated form is translated by DA00

short vowels (called ACCENT in the file) are translated by DA00
short vowels + shadda are translated by DA90

The lam-alif and alif-lam-lam-hah ligatures are treated in a different  
OTP.


%%% ====================================================================
%%%  @OmegaTranslationProcess-file{
%%%     author          = "Yannis Haralambous",
%%%     version         = "1.0",
%%%     date            = "16/07/94"
%%%     time            = "9:53:26 MET"
%%%     filename        = "UnicodeToContUnicode.otp",
%%%     address         = "187, rue Nationale,
%%%                        59800 Lille, France"
%%%     telephone       = "",
%%%     FAX             = "+33 20402864",
%%%     checksum        = "",
%%%     email           = "Yannis.Haralambous at univ-lille1.fr",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "",
%%%     supported       = "yes",
%%%     docstring       = "This file is part of the public domain
%%%                        Arabic-script Omega system.
%%%                        Omega is a TeX extension (C) John Plaice
%%%                        and Yannis Haralambous, 1994"
%%%  }
%%% ====================================================================
input: 2;
output: 2;

states: MEDIAL,NUMERAL;

aliases:

SHADDA		= (@"FFFF); % was 0651
UNIFORM		  = (@"0621 | @"0674 | @"066E | @"066F | @"06EF | @"063F);
SPECIAL           = (@"FDF2-@"FDF4) ;
BIFORM		  = (@"0605 | @"0606 | @"0613 | @"0622-@"0625 | @"0627 |  
@"0629 | @"062F-@"0632 | @"0648 |
		     @"065D | @"065E |
		     @"0671-@"0673 | @"0675-@"0677 | @"0688-@"069A |
		     @"06BA | @"06C0-@"06CB | @"06CD | @"06D2 | @"06D3 |
		     @"06FF | @"0710 | @"0715-@"0719 | @"071E | @"0724 |
		     @"0728 | @"072A | @"072C | @"072F | @"0750);
QUADRIFORM	  = (@"0600-@"0604 | @"0607-@"060B | @"060C-@"0612 | @"0616 |
			 @"0626 | @"0628 | @"062A-@"062E | @"0633-@"063A |
		     @"0640-@"0647 |
		     @"0649 | @"064A |
%%%		     @"0655-@"0657 | @"065B | @"065C |
              @"0659 |
		     @"0678-@"0687 | @"069A-@"06B7 |
		     @"06BB-@"06BF | @"06CC | @"06CE | @"06D0 | @"06D1 |
		     @"06FE | @"0712-@"0714 | @"071A-@"071D | @"071F-@"0723 |
		     @"0725-@"0727 | @"0729 | @"072B | @"0751-@"0757);
ACCENT		  = (@"064B-@"0658 | @"0670 | @"0711 | @"0730-@"074F);
ARABIC_LETTER	  = ({BIFORM} | {QUADRIFORM});
NOT_ARABIC_LETTER = ^(@"0600-@"060B | @"060D-@"0613 | @"0616 | @"0621- 
@"065F | @"0670-@"06D3 | @"0710-@"075F);
NOT_ARABIC_OR_UNI = ({NOT_ARABIC_LETTER}|{UNIFORM});
ARABIC_NUMBER	  = (@"0030-@"0039 | @"0660-@"0669 | @"06F0-@"06F9);
NOT_ARABIC_NUMBER = ^(@"0030-@"0039 | @"0660-@"0669 | @"06F0-@"06F9);
LAM_LIKE = (@"0644 | @"06B5-@"06B7 | @"06FE);
ALIF_LIKE = (@"0622|@"0623|@"0625|@"0627|@"0671-@"0673);

expressions:

{UNIFORM}{SHADDA}{ACCENT}
	=> #(\1 + @"DA00) #(\3 + @"DA90)
	;
{UNIFORM}{ACCENT}
	=> #(\1 + @"DA00) #(\2 + @"DA00)
	;
{UNIFORM}
	=> #(\1 + @"DA00)
	;
{SPECIAL}{SHADDA}{ACCENT}
	=> \1 #(\3 + @"DA90)
	;
{SPECIAL}{ACCENT}
	=> \1 #(\2 + @"DA00)
	;
{SPECIAL}
	=> \1
	;
<NUMERAL>{ARABIC_NUMBER} end:
	=> #(\1) "\endL{}"
	<pop:>
	;
<NUMERAL>{ARABIC_NUMBER}
	=> #(\1)
	;
<NUMERAL>(@"002B|@"002D|@"002E|@"066B|@"066C){ARABIC_NUMBER} end:
	=> #(\1) #(\2) "\endL{}"
	<pop:>
	;
<NUMERAL>(@"002B|@"002D|@"002E|@"066B|@"066C){ARABIC_NUMBER}
	=> #(\1) #(\2)
	;
<NUMERAL>{NOT_ARABIC_NUMBER}
	=> "\endL{}"
	<= #(\1)
	<pop:>
	;
(@"002B|@"002D|@"002E){ARABIC_NUMBER} end:
	=> "\beginL{}" #(\1) #(\2) "\endL{}"
	;
(@"002B|@"002D|@"002E){ARABIC_NUMBER}
	=> "\beginL{}" #(\1) #(\2)
	<push: NUMERAL>
	;
{ARABIC_NUMBER} end:
	=> #(\1)
	;
{ARABIC_NUMBER}
	=> "\beginL{}" #(\1)
	<push: NUMERAL>
	;
{ACCENT}
	=> #(\1 + @"DA00)
	;
{NOT_ARABIC_LETTER}
	=> #(\1)
	;
{QUADRIFORM}{NOT_ARABIC_OR_UNI}
	=> #(\1 + @"DA00) <= \2
	;
{QUADRIFORM} end:
	=> #(\1 + @"DA00)
	;
{QUADRIFORM}{SHADDA}{ACCENT}{NOT_ARABIC_OR_UNI}
	=> #(\1 + @"DA00) #(\3 + @"DA90)
	<= #(\4)
	;
{QUADRIFORM}{ACCENT}{ACCENT}{NOT_ARABIC_OR_UNI}
	=> #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
	<= #(\4)
	;
{QUADRIFORM}{ACCENT}{NOT_ARABIC_OR_UNI}
	=> #(\1 + @"DA00) #(\2 + @"DA00)
	<= #(\3)
	;
{QUADRIFORM}{SHADDA}{ACCENT} end:
	=> #(\1 + @"DA00) #(\3 + @"DA90)
	;
{QUADRIFORM}{ACCENT}{ACCENT} end:
	=> #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
	;
{QUADRIFORM}{ACCENT} end:
	=> #(\1 + @"DA00) #(\2 + @"DA00)
	;
	
% @"0620 is our internal keshideh (not Unicode keshideh which is @"0640)

{QUADRIFORM}{SHADDA}{ACCENT}
	=> #(\1 + @"DC00) #(\3 + @"DA90) @"0620
	<push: MEDIAL>
	;
{QUADRIFORM}{ACCENT}{ACCENT}
	=> #(\1 + @"DC00) #(\2 + @"DA00) #(\3 + @"DA00) @"0620
	<push: MEDIAL>
	;
{QUADRIFORM}{ACCENT}
	=> #(\1 + @"DC00) #(\2 + @"DA00) @"0620
	<push: MEDIAL>
	;
{QUADRIFORM}
	=> #(\1 + @"DC00) @"0620
	<push: MEDIAL>
	;
{BIFORM}{SHADDA}{ACCENT}
	=> #(\1 + @"DA00) #(\3 + @"DA90)
	;
{BIFORM}{ACCENT}{ACCENT}
	=> #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
	;
{BIFORM}{ACCENT}
	=> #(\1 + @"DA00) #(\2 + @"DA00)
	;
{BIFORM}
	=> #(\1 + @"DA00)
	;
<MEDIAL>{QUADRIFORM}{NOT_ARABIC_OR_UNI}
	=> #(\1 + @"E000)
	<= #(\2)
	<pop:>
	;
<MEDIAL>{QUADRIFORM} end:
	=> #(\1 + @"E000)
	<pop:>
	;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT}{NOT_ARABIC_OR_UNI}
	=> #(\1 + @"E000) #(\3 + @"DA90)
	<= #(\4)
	<pop:>
	;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT}{NOT_ARABIC_OR_UNI}
	=> #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
	<= #(\4)
	<pop:>
	;
<MEDIAL>{QUADRIFORM}{ACCENT}{NOT_ARABIC_OR_UNI}
	=> #(\1 + @"E000) #(\2 + @"DA00)
	<= #(\3)
	<pop:>
	;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT} end:
	=> #(\1 + @"E000) #(\3 + @"DA90)
	<pop:>
	;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT} end:
	=> #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
	<pop:>
	;
<MEDIAL>{QUADRIFORM}{ACCENT} end:
	=> #(\1 + @"E000) #(\2 + @"DA00)
	<pop:>
	;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT}
	=> #(\1 + @"DE00) #(\3 + @"DA90) @"0620
	;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT}
	=> #(\1 + @"DE00) #(\2 + @"DA00) #(\3 + @"DA00) @"0620
	;
<MEDIAL>{QUADRIFORM}{ACCENT}
	=> #(\1 + @"DE00) #(\2 + @"DA00) @"0620
	;
<MEDIAL>{QUADRIFORM}
	=> #(\1 + @"DE00) @"0620
	;
<MEDIAL>{BIFORM}{SHADDA}{ACCENT}
	=> #(\1 + @"E000) #(\3 + @"DA90)
	<pop:>
	;
<MEDIAL>{BIFORM}{ACCENT}{ACCENT}
	=> #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
	<pop:>
	;
<MEDIAL>{BIFORM}{ACCENT}
	=> #(\1 + @"E000) #(\2 + @"DA00)
	<pop:>
	;
<MEDIAL>{BIFORM}
	=> #(\1 + @"E000)
	<pop:>
	;
	
@"F000-@"F07F => \1 ;



More information about the luatex mailing list