[tex-hyphen] extending `hyph-zh-latn-pinyin.tex'

Werner LEMBERG wl at gnu.org
Sun Nov 25 13:05:06 CET 2018


>> To be serious: If patgen produces those patterns, I think they are
>> *really* necessary.
>
>   Do you still have the source file somewhere?

Thanks for insisting.  I've now recompiled the C source file and found
entries like

  xi'-an

in the generated patterns which look strange to me (no idea why it
didn't look strange to me 15 years back).  And indeed, after some
searching in the internet I found out that the apostrophe must vanish
after a line break!  In other words, an entry like

  Tien'anmen

becomes

  Tien-
  anmen.

Sigh.  So I've completely rewritten pinyin support, see

  http://git.savannah.gnu.org/gitweb/?p=cjk.git;a=commit;h=e058ceae5561a89e33828f593b094c8b57f6a72e

Attached are the resulting pattern files, which can now be easily
generated with `make' :-)

I will eventually submit a Babel language file for pinyin to CTAN,
too.

Note that I think it's best to handle the `tonepinyin' variant as the
standard, demoting to `pinyin' only on systems that are not
Unicode-aware.  We thus have

  xetex, luatex, uptex -> hyph-zh-latn-tonepinyin.tex
  all other engines    -> hyph-zh-latn-pinyin.tex

which simplifies the Babel interface since I don't need to add a
switch.

Hopefully, this can be easily managed within `hyph-utf8'.


    Werner
-------------- next part --------------
% title: Hyphenation patterns for Chinese pinyin (Mandarin), without tone
%        markers
%
% version: 2018-11-25
%
% authors:
%   -
%     name:    Werner Lemberg
%     contact: wl (at) gnu.org
%
% copyright: Copyright (C) 1994-2018 Werner Lemberg
%
% licence:
%     name: MIT
%     url:  http://opensource.org/licenses/mit-license.php
%     text: >
%           Permission is hereby granted, free of charge, to any person
%           obtaining a copy of this software and associated documentation
%           files (the ?Software?), to deal in the Software without
%           restriction, including without limitation the rights to use,
%           copy, modify, merge, publish, distribute, sublicense, and/or
%           sell copies of the Software, and to permit persons to whom the
%           Software is furnished to do so, subject to the following
%           conditions:
%
%           The above copyright notice and this permission notice shall be
%           included in all copies or substantial portions of the Software.
%
%           THE SOFTWARE IS PROVIDED ?AS IS?, WITHOUT WARRANTY OF ANY KIND,
%           EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
%           OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
%           NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
%           HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
%           WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
%           FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
%           OTHER DEALINGS IN THE SOFTWARE.
%
% source: http://git.savannah.gnu.org/gitweb/?p=cjk.git;a=commit;h=e058ceae5561a89e33828f593b094c8b57f6a72e
%
% language:
%     name: Mandarin Chinese, pinyin transliteration (without tone markers)
%     tag:  zh-latn-pinyin
%
% hyphenmins:
%     generation:
%         left:  1
%         right: 2
%     typesetting:
%         left:  1
%         right: 2

\message{Pinyin Hyphenation Patterns (without tone markers) 2018-11-25 (WL)}

%
% The used patgen parameters are
%
%   1 1 | 2 5 | 1 1 1
%   2 2 | 2 5 | 1 2 1
%

\patterns{
a1b
a1c
a1d
a1f
a1g
a1h
a1j
a1k
a1l
a1m
a1p
a1q
a1r
a1s
a1t
a1w
a1x
a1y
a1z
e1b
e1c
e1d
e1f
e1g
e1h
e1j
e1k
e1l
e1m
e1p
e1q
e1r
e1s
e1t
e1w
e1x
e1y
e1z
1ga
g1b
g1c
g1d
1ge
g1f
g1g
g1h
g1j
g1k
g1l
g1m
g1n
1go
g1p
g1q
g1r
g1s
g1t
1gu
g1w
g1x
g1y
g1z
i1b
i1c
i1d
i1f
i1g
i1h
i1j
i1k
i1l
i1m
i1p
i1q
i1r
i1s
i1t
i1w
i1x
i1y
i1z
1na
n1b
n1c
n1d
1ne
n1f
n1h
1ni
n1j
n1k
n1l
n1m
n1n
1no
n1p
n1q
n1r
n1s
n1t
1nu
1n?
n1w
n1x
n1y
n1z
o1b
o1c
o1d
o1f
o1g
o1h
o1j
o1k
o1l
o1m
o1p
o1q
o1r
o1s
o1t
o1w
o1x
o1y
o1z
2r1b
2r1c
2r1d
2r1f
2r1g
2r1h
2r1j
2r1k
2r1l
2r1m
2r1n
2r1p
2r1q
2r1r
2r1s
2r1t
2r1w
2r1x
2r1y
2r1z
u1b
u1c
u1d
u1f
u1g
u1h
u1j
u1k
u1l
u1m
u1p
u1q
u1r
u1s
u1t
u1w
u1x
u1y
u1z
?1b
?1c
?1d
?1f
?1g
?1h
?1j
?1k
?1l
?1m
?1n
?1p
?1q
?1r
?1s
?1t
?1w
?1x
?1y
?1z
}

\endinput

%%% Local Variables:
%%% mode: tex
%%% coding: utf-8
%%% End:
-------------- next part --------------
% title: Hyphenation patterns for Chinese pinyin (Mandarin), with tone
%        markers
%
% version: 2018-11-25
%
% authors:
%   -
%     name:    Werner Lemberg
%     contact: wl (at) gnu.org
%
% copyright: Copyright (C) 1994-2018 Werner Lemberg
%
% licence:
%     name: MIT
%     url:  http://opensource.org/licenses/mit-license.php
%     text: >
%           Permission is hereby granted, free of charge, to any person
%           obtaining a copy of this software and associated documentation
%           files (the ?Software?), to deal in the Software without
%           restriction, including without limitation the rights to use,
%           copy, modify, merge, publish, distribute, sublicense, and/or
%           sell copies of the Software, and to permit persons to whom the
%           Software is furnished to do so, subject to the following
%           conditions:
%
%           The above copyright notice and this permission notice shall be
%           included in all copies or substantial portions of the Software.
%
%           THE SOFTWARE IS PROVIDED ?AS IS?, WITHOUT WARRANTY OF ANY KIND,
%           EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
%           OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
%           NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
%           HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
%           WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
%           FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
%           OTHER DEALINGS IN THE SOFTWARE.
%
% source: http://git.savannah.gnu.org/gitweb/?p=cjk.git;a=commit;h=e058ceae5561a89e33828f593b094c8b57f6a72e
%
% language:
%     name: Mandarin Chinese, pinyin transliteration (with tone markers)
%     tag:  zh-latn-pinyin
%
% hyphenmins:
%     generation:
%         left:  1
%         right: 2
%     typesetting:
%         left:  1
%         right: 2

\message{Pinyin Hyphenation Patterns (with tone markers) 2018-11-25 (WL)}

%
% The used patgen parameters are
%
%   1 1 | 2 5 | 1 1 1
%   2 2 | 2 5 | 1 2 1
%

\patterns{
a1b ?1b ?1b ?1b ?1b
a1c ?1c ?1c ?1c ?1c
a1d ?1d ?1d ?1d ?1d
a1f ?1f ?1f ?1f ?1f
a1g ?1g ?1g ?1g ?1g
a1h ?1h ?1h ?1h ?1h
a1j ?1j ?1j ?1j ?1j
a1k ?1k ?1k ?1k ?1k
a1l ?1l ?1l ?1l ?1l
a1m ?1m ?1m ?1m ?1m
a1p ?1p ?1p ?1p ?1p
a1q ?1q ?1q ?1q ?1q
a1r ?1r ?1r ?1r ?1r
a1s ?1s ?1s ?1s ?1s
a1t ?1t ?1t ?1t ?1t
a1w ?1w ?1w ?1w ?1w
a1x ?1x ?1x ?1x ?1x
a1y ?1y ?1y ?1y ?1y
a1z ?1z ?1z ?1z ?1z
e1b ?1b ?1b ?1b ?1b
e1c ?1c ?1c ?1c ?1c
e1d ?1d ?1d ?1d ?1d
e1f ?1f ?1f ?1f ?1f
e1g ?1g ?1g ?1g ?1g
e1h ?1h ?1h ?1h ?1h
e1j ?1j ?1j ?1j ?1j
e1k ?1k ?1k ?1k ?1k
e1l ?1l ?1l ?1l ?1l
e1m ?1m ?1m ?1m ?1m
e1p ?1p ?1p ?1p ?1p
e1q ?1q ?1q ?1q ?1q
e1r ?1r ?1r ?1r ?1r
e1s ?1s ?1s ?1s ?1s
e1t ?1t ?1t ?1t ?1t
e1w ?1w ?1w ?1w ?1w
e1x ?1x ?1x ?1x ?1x
e1y ?1y ?1y ?1y ?1y
e1z ?1z ?1z ?1z ?1z
1ga 1g? 1g? 1g? 1g?
g1b
g1c
g1d
1ge 1g? 1g? 1g? 1g?
g1f
g1g
g1h
g1j
g1k
g1l
g1m
g1n
1go 1g? 1g? 1g? 1g?
g1p
g1q
g1r
g1s
g1t
1gu 1g? 1g? 1g? 1g?
g1w
g1x
g1y
g1z
i1b ?1b ?1b ?1b ?1b
i1c ?1c ?1c ?1c ?1c
i1d ?1d ?1d ?1d ?1d
i1f ?1f ?1f ?1f ?1f
i1g ?1g ?1g ?1g ?1g
i1h ?1h ?1h ?1h ?1h
i1j ?1j ?1j ?1j ?1j
i1k ?1k ?1k ?1k ?1k
i1l ?1l ?1l ?1l ?1l
i1m ?1m ?1m ?1m ?1m
i1p ?1p ?1p ?1p ?1p
i1q ?1q ?1q ?1q ?1q
i1r ?1r ?1r ?1r ?1r
i1s ?1s ?1s ?1s ?1s
i1t ?1t ?1t ?1t ?1t
i1w ?1w ?1w ?1w ?1w
i1x ?1x ?1x ?1x ?1x
i1y ?1y ?1y ?1y ?1y
i1z ?1z ?1z ?1z ?1z
1na 1n? 1n? 1n? 1n?
n1b
n1c
n1d
1ne 1n? 1n? 1n? 1n?
n1f
n1h
1ni 1n? 1n? 1n? 1n?
n1j
n1k
n1l
n1m
n1n
1no 1n? 1n? 1n? 1n?
n1p
n1q
n1r
n1s
n1t
1nu 1n? 1n? 1n? 1n?
1n? 1n? 1n? 1n? 1n?
n1w
n1x
n1y
n1z
o1b ?1b ?1b ?1b ?1b
o1c ?1c ?1c ?1c ?1c
o1d ?1d ?1d ?1d ?1d
o1f ?1f ?1f ?1f ?1f
o1g ?1g ?1g ?1g ?1g
o1h ?1h ?1h ?1h ?1h
o1j ?1j ?1j ?1j ?1j
o1k ?1k ?1k ?1k ?1k
o1l ?1l ?1l ?1l ?1l
o1m ?1m ?1m ?1m ?1m
o1p ?1p ?1p ?1p ?1p
o1q ?1q ?1q ?1q ?1q
o1r ?1r ?1r ?1r ?1r
o1s ?1s ?1s ?1s ?1s
o1t ?1t ?1t ?1t ?1t
o1w ?1w ?1w ?1w ?1w
o1x ?1x ?1x ?1x ?1x
o1y ?1y ?1y ?1y ?1y
o1z ?1z ?1z ?1z ?1z
2r1b
2r1c
2r1d
2r1f
2r1g
2r1h
2r1j
2r1k
2r1l
2r1m
2r1n
2r1p
2r1q
2r1r
2r1s
2r1t
2r1w
2r1x
2r1y
2r1z
u1b ?1b ?1b ?1b ?1b
u1c ?1c ?1c ?1c ?1c
u1d ?1d ?1d ?1d ?1d
u1f ?1f ?1f ?1f ?1f
u1g ?1g ?1g ?1g ?1g
u1h ?1h ?1h ?1h ?1h
u1j ?1j ?1j ?1j ?1j
u1k ?1k ?1k ?1k ?1k
u1l ?1l ?1l ?1l ?1l
u1m ?1m ?1m ?1m ?1m
u1p ?1p ?1p ?1p ?1p
u1q ?1q ?1q ?1q ?1q
u1r ?1r ?1r ?1r ?1r
u1s ?1s ?1s ?1s ?1s
u1t ?1t ?1t ?1t ?1t
u1w ?1w ?1w ?1w ?1w
u1x ?1x ?1x ?1x ?1x
u1y ?1y ?1y ?1y ?1y
u1z ?1z ?1z ?1z ?1z
?1b ?1b ?1b ?1b ?1b
?1c ?1c ?1c ?1c ?1c
?1d ?1d ?1d ?1d ?1d
?1f ?1f ?1f ?1f ?1f
?1g ?1g ?1g ?1g ?1g
?1h ?1h ?1h ?1h ?1h
?1j ?1j ?1j ?1j ?1j
?1k ?1k ?1k ?1k ?1k
?1l ?1l ?1l ?1l ?1l
?1m ?1m ?1m ?1m ?1m
?1n ?1n ?1n ?1n ?1n
?1p ?1p ?1p ?1p ?1p
?1q ?1q ?1q ?1q ?1q
?1r ?1r ?1r ?1r ?1r
?1s ?1s ?1s ?1s ?1s
?1t ?1t ?1t ?1t ?1t
?1w ?1w ?1w ?1w ?1w
?1x ?1x ?1x ?1x ?1x
?1y ?1y ?1y ?1y ?1y
?1z ?1z ?1z ?1z ?1z
}

\endinput

%%% Local Variables:
%%% mode: tex
%%% coding: utf-8
%%% End:


More information about the tex-hyphen mailing list