[latex3-commits] [latex3/latex3] general-category: Add \codepoint_category:n (b17342b6b)

github at latex-project.org github at latex-project.org
Sun Jun 18 23:01:20 CEST 2023


Repository : https://github.com/latex3/latex3
On branch  : general-category
Link       : https://github.com/latex3/latex3/commit/b17342b6b623408798b27ed2fa21308d7dfb9c4b

>---------------------------------------------------------------

commit b17342b6b623408798b27ed2fa21308d7dfb9c4b
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Sun Jun 18 21:04:37 2023 +0100

    Add \codepoint_category:n


>---------------------------------------------------------------

b17342b6b623408798b27ed2fa21308d7dfb9c4b
 l3kernel/CHANGELOG.md                      |  2 ++
 l3kernel/l3unicode.dtx                     | 30 ++++++++++++++++++++++++++++++
 l3kernel/testfiles/m3unicode001.luatex.tlg | 14 ++++++++++++++
 l3kernel/testfiles/m3unicode001.lvt        | 14 ++++++++++++++
 l3kernel/testfiles/m3unicode001.tlg        | 14 ++++++++++++++
 l3kernel/testfiles/m3unicode001.xetex.tlg  | 14 ++++++++++++++
 6 files changed, 88 insertions(+)

diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 1be249308..21d7e4099 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -7,6 +7,8 @@ this project uses date-based 'snapshot' version identifiers.
 
 ## [Unreleased]
 
+### Added
+- `\codepoint_category:n`
 ## [2023-06-16]
 
 ### Changed
diff --git a/l3kernel/l3unicode.dtx b/l3kernel/l3unicode.dtx
index 0facb2b43..cc7f50d7d 100644
--- a/l3kernel/l3unicode.dtx
+++ b/l3kernel/l3unicode.dtx
@@ -106,6 +106,21 @@
 %   category code $10$.
 % \end{function}
 %
+% \begin{function}[added = 2023-06-18, EXP]{\codepoint_category:n}
+%   \begin{syntax}
+%     \cs{codepoint_category:n} \Arg{codepoint}
+%   \end{syntax}
+%   Expands to the Unicode general category identifier of the \meta{codepoint}.
+%   The general category identifier is a string made up of two letter
+%   characters, the first uppercase and the second lowercase. The uppercase
+%   letters divide codepoints into broader groups, which are then refined
+%   by the lowercase letter. For example, codepoints representing letters
+%   all have identifiers starting \texttt{L}, for example \texttt{Lu}
+%   (uppercase letter), \texttt{Lt} (titlecase letter), \emph{etc.}
+%   Full details are available in the documentation provided by the Unicode
+%   Consortium.
+% \end{function}
+%
 % \begin{function}[added = 2022-10-09, EXP]{\codepoint_to_nfd:n}
 %   \begin{syntax}
 %     \cs{codepoint_to_nfd:n} \Arg{codepoint}
@@ -347,6 +362,21 @@
 % \end{macro}
 % \end{macro}
 %
+% \begin{macro}[EXP]{\codepoint_category:n}
+%   Get the value and convert back to the string.
+%    \begin{macrocode}
+\cs_new:Npn \codepoint_category:n #1
+  {
+    \cs:w
+      c_@@_category_
+      \tex_romannumeral:D 
+        \__kernel_codepoint_data:nn { category } {#1}
+      _str
+    \cs_end:
+  }
+%    \end{macrocode}
+% \end{macro}
+%
 % \begin{macro}[EXP]{\codepoint_to_nfd:n, \@@_to_nfd:n}
 % \begin{macro}[EXP]{\@@_to_nfd:nn}
 % \begin{macro}[EXP]{\@@_to_nfd:nnn}
diff --git a/l3kernel/testfiles/m3unicode001.luatex.tlg b/l3kernel/testfiles/m3unicode001.luatex.tlg
index 355dd0eb0..0d361f978 100644
--- a/l3kernel/testfiles/m3unicode001.luatex.tlg
+++ b/l3kernel/testfiles/m3unicode001.luatex.tlg
@@ -33,3 +33,17 @@ TEST 4: Non-ASCII to ASCII mapping
 ============================================================
 ;
 ============================================================
+============================================================
+TEST 5: General category
+============================================================
+Cc
+Zs
+Po
+Lu
+Pe
+Ll
+Lo
+Lo
+Lo
+So
+============================================================
diff --git a/l3kernel/testfiles/m3unicode001.lvt b/l3kernel/testfiles/m3unicode001.lvt
index 327c582e6..bcd5715ae 100644
--- a/l3kernel/testfiles/m3unicode001.lvt
+++ b/l3kernel/testfiles/m3unicode001.lvt
@@ -54,4 +54,18 @@
     \codepoint_to_nfd:n { "037E } %  ; -> ; (Greek question mark to semicolon)
   }
 
+\TESTEXP { General~category }
+  {
+    \codepoint_category:n { "0000 } \NEWLINE
+    \codepoint_category:n { "0020 } \NEWLINE
+    \codepoint_category:n { "0022 } \NEWLINE
+    \codepoint_category:n { "0041 } \NEWLINE
+    \codepoint_category:n { "007D } \NEWLINE
+    \codepoint_category:n { "013A } \NEWLINE
+    \codepoint_category:n { "3400 } \NEWLINE
+    \codepoint_category:n { "3401 } \NEWLINE
+    \codepoint_category:n { "4DBF } \NEWLINE
+    \codepoint_category:n { "4DC0 } \NEWLINE
+  }
+
 \END
\ No newline at end of file
diff --git a/l3kernel/testfiles/m3unicode001.tlg b/l3kernel/testfiles/m3unicode001.tlg
index 78c5a1d72..429141a3e 100644
--- a/l3kernel/testfiles/m3unicode001.tlg
+++ b/l3kernel/testfiles/m3unicode001.tlg
@@ -33,3 +33,17 @@ TEST 4: Non-ASCII to ASCII mapping
 ============================================================
 ;
 ============================================================
+============================================================
+TEST 5: General category
+============================================================
+Cc
+Zs
+Po
+Lu
+Pe
+Ll
+Lo
+Lo
+Lo
+So
+============================================================
diff --git a/l3kernel/testfiles/m3unicode001.xetex.tlg b/l3kernel/testfiles/m3unicode001.xetex.tlg
index 355dd0eb0..0d361f978 100644
--- a/l3kernel/testfiles/m3unicode001.xetex.tlg
+++ b/l3kernel/testfiles/m3unicode001.xetex.tlg
@@ -33,3 +33,17 @@ TEST 4: Non-ASCII to ASCII mapping
 ============================================================
 ;
 ============================================================
+============================================================
+TEST 5: General category
+============================================================
+Cc
+Zs
+Po
+Lu
+Pe
+Ll
+Lo
+Lo
+Lo
+So
+============================================================





More information about the latex3-commits mailing list.