[latex3-commits] [git/LaTeX3-latex3-latex3] text-map: Load Unicode grapheme data (63e2fdcb9)

Joseph Wright joseph.wright at morningstar2.co.uk
Thu Aug 4 15:11:21 CEST 2022


Repository : https://github.com/latex3/latex3
On branch  : text-map
Link       : https://github.com/latex3/latex3/commit/63e2fdcb927955b5f9024d8c4ba0c5161af58e78

>---------------------------------------------------------------

commit 63e2fdcb927955b5f9024d8c4ba0c5161af58e78
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Thu Aug 4 14:10:10 2022 +0100

    Load Unicode grapheme data


>---------------------------------------------------------------

63e2fdcb927955b5f9024d8c4ba0c5161af58e78
 l3kernel/l3unicode.dtx | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/l3kernel/l3unicode.dtx b/l3kernel/l3unicode.dtx
index fbf1d0bd6..3a5a0d88b 100644
--- a/l3kernel/l3unicode.dtx
+++ b/l3kernel/l3unicode.dtx
@@ -252,6 +252,53 @@
 %    \end{macrocode}
 %
 %    \begin{macrocode}
+%<@@=text>
+%    \end{macrocode}
+%
+%  Read the Unicode grapheme data. This is quite easy to handle and we only need
+%  codepoints, not characters, so there is no need to worry about the engine in use.
+%  As reading as a string is most convenient, we have to do some work to remove
+%  spaces: the hardest part of the entire process!
+%    \begin{macrocode}
+\ior_new:N \g_@@_data_ior
+\group_begin:
+  \ior_open:Nn \g_@@_data_ior { GraphemeBreakProperty.txt }
+  \cs_set_nopar:Npn \l_@@_tmpa_str { }
+  \cs_set_nopar:Npn \l_@@_tmpb_str { }
+  \cs_set_protected:Npn \@@_data_auxi:w #1 ;~ #2 ~ #3 \q_stop
+    {
+      \str_if_eq:VnF \l_@@_tmpb_str {#2}
+        {
+          \str_if_empty:NF \l_@@_tmpb_str
+            {
+              \tl_const:cx { c_@@_grapheme_ \l_@@_tmpb_str _tl }
+                { \exp_after:wN \use_none:n \l_@@_tmpa_str }
+            }
+          \cs_set_nopar:Npn \l_@@_tmpb_str {#2}
+        }
+      \@@_data_auxii:w #1 .. #1 .. #1 \q_stop
+    }
+  \cs_set_protected:Npn \@@_data_auxii:w #1 .. #2 .. #3 \q_stop
+    {
+      \cs_set_nopar:Npx \l_@@_tmpa_str
+        {
+          \l_@@_tmpa_str ,
+          \tl_trim_spaces:n {#1} .. \tl_trim_spaces:n {#2}
+        }
+    }
+  \ior_str_map_inline:Nn \g_@@_data_ior
+    {
+      \str_if_eq:eeF { \tl_head:w #1 \c_hash_str \q_stop } { \c_hash_str }
+        {
+          \tl_if_blank:nF {#1}
+            { \@@_data_auxi:w #1 \q_stop }
+        }
+    }
+  \ior_close:N \g_@@_data_ior
+\group_end:    
+%    \end{macrocode}
+%
+%    \begin{macrocode}
 %</package>
 %    \end{macrocode}
 %





More information about the latex3-commits mailing list.