[latex3-commits] [latex3/latex3] general-category: Add \codepoint_category:n (b17342b6b)
github at latex-project.org
github at latex-project.org
Sun Jun 18 23:01:20 CEST 2023
Repository : https://github.com/latex3/latex3
On branch : general-category
Link : https://github.com/latex3/latex3/commit/b17342b6b623408798b27ed2fa21308d7dfb9c4b
>---------------------------------------------------------------
commit b17342b6b623408798b27ed2fa21308d7dfb9c4b
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Sun Jun 18 21:04:37 2023 +0100
Add \codepoint_category:n
>---------------------------------------------------------------
b17342b6b623408798b27ed2fa21308d7dfb9c4b
l3kernel/CHANGELOG.md | 2 ++
l3kernel/l3unicode.dtx | 30 ++++++++++++++++++++++++++++++
l3kernel/testfiles/m3unicode001.luatex.tlg | 14 ++++++++++++++
l3kernel/testfiles/m3unicode001.lvt | 14 ++++++++++++++
l3kernel/testfiles/m3unicode001.tlg | 14 ++++++++++++++
l3kernel/testfiles/m3unicode001.xetex.tlg | 14 ++++++++++++++
6 files changed, 88 insertions(+)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 1be249308..21d7e4099 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -7,6 +7,8 @@ this project uses date-based 'snapshot' version identifiers.
## [Unreleased]
+### Added
+- `\codepoint_category:n`
## [2023-06-16]
### Changed
diff --git a/l3kernel/l3unicode.dtx b/l3kernel/l3unicode.dtx
index 0facb2b43..cc7f50d7d 100644
--- a/l3kernel/l3unicode.dtx
+++ b/l3kernel/l3unicode.dtx
@@ -106,6 +106,21 @@
% category code $10$.
% \end{function}
%
+% \begin{function}[added = 2023-06-18, EXP]{\codepoint_category:n}
+% \begin{syntax}
+% \cs{codepoint_category:n} \Arg{codepoint}
+% \end{syntax}
+% Expands to the Unicode general category identifier of the \meta{codepoint}.
+% The general category identifier is a string made up of two letter
+% characters, the first uppercase and the second lowercase. The uppercase
+% letters divide codepoints into broader groups, which are then refined
+% by the lowercase letter. For example, codepoints representing letters
+% all have identifiers starting \texttt{L}, for example \texttt{Lu}
+% (uppercase letter), \texttt{Lt} (titlecase letter), \emph{etc.}
+% Full details are available in the documentation provided by the Unicode
+% Consortium.
+% \end{function}
+%
% \begin{function}[added = 2022-10-09, EXP]{\codepoint_to_nfd:n}
% \begin{syntax}
% \cs{codepoint_to_nfd:n} \Arg{codepoint}
@@ -347,6 +362,21 @@
% \end{macro}
% \end{macro}
%
+% \begin{macro}[EXP]{\codepoint_category:n}
+% Get the value and convert back to the string.
+% \begin{macrocode}
+\cs_new:Npn \codepoint_category:n #1
+ {
+ \cs:w
+ c_@@_category_
+ \tex_romannumeral:D
+ \__kernel_codepoint_data:nn { category } {#1}
+ _str
+ \cs_end:
+ }
+% \end{macrocode}
+% \end{macro}
+%
% \begin{macro}[EXP]{\codepoint_to_nfd:n, \@@_to_nfd:n}
% \begin{macro}[EXP]{\@@_to_nfd:nn}
% \begin{macro}[EXP]{\@@_to_nfd:nnn}
diff --git a/l3kernel/testfiles/m3unicode001.luatex.tlg b/l3kernel/testfiles/m3unicode001.luatex.tlg
index 355dd0eb0..0d361f978 100644
--- a/l3kernel/testfiles/m3unicode001.luatex.tlg
+++ b/l3kernel/testfiles/m3unicode001.luatex.tlg
@@ -33,3 +33,17 @@ TEST 4: Non-ASCII to ASCII mapping
============================================================
;
============================================================
+============================================================
+TEST 5: General category
+============================================================
+Cc
+Zs
+Po
+Lu
+Pe
+Ll
+Lo
+Lo
+Lo
+So
+============================================================
diff --git a/l3kernel/testfiles/m3unicode001.lvt b/l3kernel/testfiles/m3unicode001.lvt
index 327c582e6..bcd5715ae 100644
--- a/l3kernel/testfiles/m3unicode001.lvt
+++ b/l3kernel/testfiles/m3unicode001.lvt
@@ -54,4 +54,18 @@
\codepoint_to_nfd:n { "037E } % ; -> ; (Greek question mark to semicolon)
}
+\TESTEXP { General~category }
+ {
+ \codepoint_category:n { "0000 } \NEWLINE
+ \codepoint_category:n { "0020 } \NEWLINE
+ \codepoint_category:n { "0022 } \NEWLINE
+ \codepoint_category:n { "0041 } \NEWLINE
+ \codepoint_category:n { "007D } \NEWLINE
+ \codepoint_category:n { "013A } \NEWLINE
+ \codepoint_category:n { "3400 } \NEWLINE
+ \codepoint_category:n { "3401 } \NEWLINE
+ \codepoint_category:n { "4DBF } \NEWLINE
+ \codepoint_category:n { "4DC0 } \NEWLINE
+ }
+
\END
\ No newline at end of file
diff --git a/l3kernel/testfiles/m3unicode001.tlg b/l3kernel/testfiles/m3unicode001.tlg
index 78c5a1d72..429141a3e 100644
--- a/l3kernel/testfiles/m3unicode001.tlg
+++ b/l3kernel/testfiles/m3unicode001.tlg
@@ -33,3 +33,17 @@ TEST 4: Non-ASCII to ASCII mapping
============================================================
;
============================================================
+============================================================
+TEST 5: General category
+============================================================
+Cc
+Zs
+Po
+Lu
+Pe
+Ll
+Lo
+Lo
+Lo
+So
+============================================================
diff --git a/l3kernel/testfiles/m3unicode001.xetex.tlg b/l3kernel/testfiles/m3unicode001.xetex.tlg
index 355dd0eb0..0d361f978 100644
--- a/l3kernel/testfiles/m3unicode001.xetex.tlg
+++ b/l3kernel/testfiles/m3unicode001.xetex.tlg
@@ -33,3 +33,17 @@ TEST 4: Non-ASCII to ASCII mapping
============================================================
;
============================================================
+============================================================
+TEST 5: General category
+============================================================
+Cc
+Zs
+Po
+Lu
+Pe
+Ll
+Lo
+Lo
+Lo
+So
+============================================================
More information about the latex3-commits
mailing list.