[latex3-commits] [latex3/latex3] gh890: Add wrapper functions for regex concepts (c568af530)
github at latex-project.org
github at latex-project.org
Tue Nov 26 18:27:28 CET 2024
Repository : https://github.com/latex3/latex3
On branch : gh890
Link : https://github.com/latex3/latex3/commit/c568af530295df45760d3f9e5fb989bd68481ddc
>---------------------------------------------------------------
commit c568af530295df45760d3f9e5fb989bd68481ddc
Author: Joseph Wright <joseph at texdev.net>
Date: Tue Nov 26 17:10:32 2024 +0000
Add wrapper functions for regex concepts
>---------------------------------------------------------------
c568af530295df45760d3f9e5fb989bd68481ddc
l3kernel/CHANGELOG.md | 7 +++
l3kernel/l3int.dtx | 66 ++++++++++++++++++++++++++
l3kernel/l3seq.dtx | 97 +++++++++++++++++++++++++++++++++++++
l3kernel/l3tl.dtx | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 299 insertions(+)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index ce71252de..3673a0c88 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -11,6 +11,13 @@ this project uses date-based 'snapshot' version identifiers.
- `\bitset_use:N` and `\clist_use:N`: this clarifies that bitsets and clists
can be used with `V`-type expansion
- `\sys_if_engine_opentype:(TF)`
+- Add equivalent interfaces for regex functions:
+ - `\int_(g)set_regex_count:Nnn`, `\int_(g)set_regex_count:NNn`
+ - `\seq_(g)set_regex_extract:Nnn`, `\seq_(g)set_regex_extract:NNn`
+ - `\seq_(g)set_regex_split:Nnn`, `\seq_(g)set_regex_split:NNn`
+ - `\tl_if_regex_match:nn(TF)`, `\tl_if_regex_match:nN(TF)`
+ - `\tl_regex_(g)replace_once:Nnn`, `\tl_regex_(g)replace_once:NNn`
+ - `\tl_regex_(g)replace_all:Nnn`, `\tl_regex_(g)replace_all:NNn`
### Removed
- `\c_catcode_active_tl`: was missing a `_`, always intended to be internal
diff --git a/l3kernel/l3int.dtx b/l3kernel/l3int.dtx
index 7f3399429..08a827463 100644
--- a/l3kernel/l3int.dtx
+++ b/l3kernel/l3int.dtx
@@ -307,6 +307,33 @@
% \cs{int_eval:n}).
% \end{function}
%
+% \begin{function}[added = 2024-11-26]
+% {
+% \int_set_regex_count:Nnn, \int_set_regex_count:cnn,
+% \int_set_regex_count:NNn, \int_set_regex_count:cNn,
+% \int_gset_regex_count:Nnn, \int_gset_regex_count:cnn,
+% \int_gset_regex_count:NNn, \int_gset_regex_count:cNn,
+% }
+% \begin{syntax}
+% \cs{int_set_regex_count:Nnn} \meta{int var} \Arg{regular expression} \Arg{token list}
+% \cs{int_set_regex_count:NNn} \meta{int var} \Arg{compiled regex} \Arg{token list}
+% \end{syntax}
+% Sets \meta{int var} equal to the number of times
+% \meta{regular expression} appears in \meta{token list}.
+% The search starts by finding the left-most longest match,
+% respecting greedy and lazy (non-greedy) operators. Then the search
+% starts again from the character following the last character
+% of the previous match, until reaching the end of the token list.
+% Infinite loops are prevented in the case where the regular expression
+% can match an empty token list: then we count one match between each
+% pair of characters.
+% For instance,
+% \begin{verbatim}
+% \int_set_regex_count:Nnn \l_foo_int { (b+|c) } { abbababcbb }
+% \end{verbatim}
+% results in \cs[no-index]{l_foo_int} taking the value $5$.
+% \end{function}
+%
% \begin{function}[updated = 2011-10-22]
% {\int_sub:Nn, \int_sub:cn, \int_gsub:Nn, \int_gsub:cn}
% \begin{syntax}
@@ -1495,6 +1522,45 @@
% \end{macro}
% \end{macro}
%
+% \begin{macro}
+% {
+% \int_set_regex_count:Nnn, \int_set_regex_count:cnn,
+% \int_gset_regex_count:Nnn, \int_gset_regex_count:cnn
+% }
+% \begin{macro}
+% {
+% \int_set_regex_count:NNn, \int_set_regex_count:cNn,
+% \int_gset_regex_count:NNn, \int_set_gregex_count:cNn
+% }
+% \begin{macrocode}
+\cs_new_protected:Npn \int_set_regex_count:Nnn #1#2#3
+ { \regex_count:nnN {#3} {#2} #1 }
+\cs_generate_variant:Nn \int_set_regex_count:Nnn { c }
+\cs_new_protected:Npn \int_gset_regex_count:Nnn #1#2#3
+ {
+ \group_begin:
+ \int_set_eq:NN \exp_not:N \l_@@_internal_a_int #1
+ \regex_count:nnN {#2} {#3} \l_@@_internal_a_int
+ \int_gset_eq:NN #1 \l_@@_internal_a_int
+ \group_end:
+ }
+\cs_generate_variant:Nn \int_gset_regex_count:Nnn { c }
+\cs_new_protected:Npn \int_set_regex_count:NNn #1#2#3
+ { \regex_count:NnN {#3} {#2} #1 }
+\cs_generate_variant:Nn \int_set_regex_count:NNn { c }
+\cs_new_protected:Npn \int_gset_regex_count:NNn #1#2#3
+ {
+ \group_begin:
+ \int_set_eq:NN \exp_not:N \l_@@_internal_a_int #1
+ \regex_count:NnN #2 {#3} \l_@@_internal_a_int
+ \int_gset_eq:NN #1 \l_@@_internal_a_int
+ \group_end:
+ }
+\cs_generate_variant:Nn \int_gset_regex_count:Nnn { c }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
% \subsection{Using integers}
%
% \begin{macro}{\int_use:N, \int_use:c}
diff --git a/l3kernel/l3seq.dtx b/l3kernel/l3seq.dtx
index 120081197..885a044e1 100644
--- a/l3kernel/l3seq.dtx
+++ b/l3kernel/l3seq.dtx
@@ -189,6 +189,45 @@
% \end{texnote}
% \end{function}
%
+% \begin{function}[added = 2024-11-26]
+% {
+% \seq_set_regex_extract:Nnn, \seq_set_regex_extract:cnn,
+% \seq_set_regex_extract:NNn, \seq_set_regex_extract:cNn,
+% \seq_gset_regex_extract:Nnn, \seq_gset_regex_extract:cnn,
+% \seq_gset_regex_extract:NNn, \seq_gset_regex_extract:cNn,
+% }
+% \begin{syntax}
+% \cs{seq_set_regex_extract:Nnn} \meta{seq~var} \Arg{regular expression} \Arg{token list}
+% \cs{seq_set_regex_extract:NNn} \meta{seq~var} \Arg{compiled regex} \Arg{token list}
+% \end{syntax}
+% Finds the first match of the \meta{regular expression} in the
+% \meta{token list}. If it exists, the match is stored as the first
+% item of the \meta{seq~var}, and further items are the contents of
+% capturing groups, in the order of their opening parenthesis. If
+% there is no match, the \meta{seq~var} is cleared.
+% \end{function}
+%
+% \begin{function}[added = 2024-11-26]
+% {
+% \seq_set_regex_split:Nnn, \seq_set_regex_split:cnn,
+% \seq_set_regex_split:NNn, \seq_set_regex_split:cNn,
+% \seq_gset_regex_split:Nnn, \seq_gset_regex_split:cnn,
+% \seq_gset_regex_split:NNn, \seq_gset_regex_split:cNn,
+% }
+% \begin{syntax}
+% \cs{seq_set_regex_split:Nnn} \meta{seq~var} \Arg{regular expression} \Arg{token list}
+% \cs{seq_set_regex_split:NNn} \meta{seq~var} \Arg{compiled regex} \Arg{token list}
+% \end{syntax}
+% Splits the \meta{token list} into a sequence of parts, delimited by
+% matches of the \meta{regular expression}. If the \meta{regular expression}
+% has capturing groups, then the token lists that they match are stored as
+% items of the sequence as well. The assignment to \meta{seq~var} is local.
+% If no match is found the resulting \meta{seq~var} has the
+% \meta{token list} as its sole item. If the \meta{regular expression}
+% matches the empty token list, then the \meta{token list} is split
+% into single tokens.
+% \end{function}
+%
% \begin{function}
% {\seq_concat:NNN, \seq_concat:ccc, \seq_gconcat:NNN, \seq_gconcat:ccc}
% \begin{syntax}
@@ -1411,6 +1450,64 @@
% \end{macro}
% \end{macro}
%
+% \begin{macro}
+% {
+% \seq_set_regex_extract:Nnn, \seq_set_regex_extract:cnn,
+% \seq_gset_regex_extract:Nnn, \seq_gset_regex_extract:cnn
+% }
+% \begin{macro}
+% {
+% \seq_set_regex_extract:NNn, \seq_set_regex_extract:cNn,
+% \seq_gset_regex_extract:NNn, \seq_gset_regex_extract:cNn
+% }
+% \begin{macro}
+% {
+% \seq_set_regex_split:Nnn, \seq_set_regex_split:cnn,
+% \seq_gset_regex_split:Nnn, \seq_gset_regex_split:cnn
+% }
+% \begin{macro}
+% {
+% \seq_set_regex_split:NNn, \seq_set_regex_split:cNn,
+% \seq_gset_regex_split:NNn, \seq_gset_regex_split:cNn
+% }
+% \begin{macrocode}
+\cs_new_protected:Npn \seq_set_regex_extract:Nnn #1#2#3
+ { \regex_extract:nnN {#3} {#2} #1 }
+\cs_generate_variant:Nn \seq_set_regex_extract:Nnn { c }
+\cs_new_protected:Npn \seq_set_regex_extract:NNn #1#2#3
+ { \regex_extract:NnN {#3} {#2} #1 }
+\cs_generate_variant:Nn \seq_set_regex_extract:NNn { c }
+\cs_new_protected:Npn \seq_set_regex_split:Nnn #1#2#3
+ { \regex_split:nnN {#3} {#2} #1 }
+\cs_generate_variant:Nn \seq_set_regex_split:Nnn { c }
+\cs_new_protected:Npn \seq_set_regex_split:NNn #1#2#3
+ { \regex_split:NnN {#3} {#2} #1 }
+\cs_generate_variant:Nn \seq_set_regex_split:NNn { c }
+\group_begin:
+ \cs_set_protected:Npn \@@_tmp:w #1#2#3
+ {
+ \cs_new_protected:cpe { seq_gset_regex_ #1 :N #2 n } ##1##2##3
+ {
+ \group_begin:
+ \tl_set_eq:NN \exp_not:N \l_@@_internal_a_tl ##1
+ \exp_not:c { regex_ #1 :Nn #2 }
+ #3 {##2} {##3} \exp_not:N \l_@@_internal_a_tl
+ \seq_gset_eq:NN ##1 \exp_not:N \l_@@_internal_a_tl
+ \group_end:
+ }
+ \cs_generate_variant:cn { seq_gset_regex_ #1 : N #2 n } { c }
+ }
+ \@@_tmp:w { extract } n { }
+ \@@_tmp:w { extract } N \use:n
+ \@@_tmp:w { split } n { }
+ \@@_tmp:w { split } N \use:n
+\group_end:
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
% \begin{macro}{\seq_concat:NNN, \seq_concat:ccc}
% \UnitTested
% \begin{macro}{\seq_gconcat:NNN, \seq_gconcat:ccc}
diff --git a/l3kernel/l3tl.dtx b/l3kernel/l3tl.dtx
index 3d08f2354..c40b0aa66 100644
--- a/l3kernel/l3tl.dtx
+++ b/l3kernel/l3tl.dtx
@@ -376,6 +376,24 @@
% Token groups (|{|\ldots|}|) are not single tokens.
% \end{function}
%
+% \begin{function}[TF, added = 2024-11-26]
+% {
+% \tl_if_regex_match:nn, \tl_if_regex_match:Vn,
+% \tl_if_regex_match:nN, \tl_if_regex_match:VN,
+% }
+% \begin{syntax}
+% \cs{tl_if_regex_match:nnTF} \Arg{token list} \Arg{regular expression} \Arg{true code} \Arg{false code}
+% \cs{tl_if_regex_match:nNTF} \Arg{token list} \Arg{compiled regex} \Arg{true code} \Arg{false code}
+% \end{syntax}
+% Tests whether the \meta{regular expression} matches any part
+% of the \meta{token list}. For instance,
+% \begin{verbatim}
+% \regex_match:nnTF { b [cde]* } { abecdcx } { TRUE } { FALSE }
+% \regex_match:nnTF { [b-dq-w] } { example } { TRUE } { FALSE }
+% \end{verbatim}
+% leaves \texttt{TRUE} then \texttt{FALSE} in the input stream.
+% \end{function}
+%
% \subsection{Testing the first token}
%
% \begin{function}[updated = 2012-07-09, EXP, pTF]
@@ -1158,6 +1176,43 @@
% for an example).
% \end{function}
%
+% \begin{function}[added = 2024-11-26]
+% {
+% \tl_regex_replace_once:Nnn, \tl_regex_replace_once:cnn,
+% \tl_regex_replace_once:NNn, \tl_regex_replace_once:cNn,
+% \tl_regex_greplace_once:Nnn, \tl_regex_greplace_once:cnn,
+% \tl_regex_greplace_once:NNn, \tl_regex_greplace_once:cNn
+% }
+% \begin{syntax}
+% \cs{tl_regex_replace_once:Nnn} \meta{tl~var} \Arg{regular expression} \Arg{replacement}
+% \cs{tl_regex_replace_once:NNn} \meta{tl~var} \Arg{compiled regex} \Arg{replacement}
+% \end{syntax}
+% Searches for the \meta{regular expression} in the contents of the
+% \meta{tl~var} and replaces the first match with the
+% \meta{replacement}. In the \meta{replacement},
+% |\0| represents the full match, |\1| represent the contents of the
+% first capturing group, |\2| of the second, \emph{etc.}
+% \end{function}
+%
+% \begin{function}[added = 2024-11-26]
+% {
+% \tl_regex_replace_all:Nnn, \tl_regex_replace_all:cnn,
+% \tl_regex_replace_all:NNn, \tl_regex_replace_all:cNn,
+% \tl_regex_greplace_all:Nnn, \tl_regex_greplace_all:cnn,
+% \tl_regex_greplace_all:NNn, \tl_regex_greplace_all:cNn
+% }
+% \begin{syntax}
+% \cs{tl_regex_replace_all:Nnn} \meta{tl~var} \Arg{regular expression} \Arg{replacement}
+% \cs{tl_regex_replace_all:NNn} \meta{tl~var} \Arg{compiled regex} \Arg{replacement}
+% \end{syntax}
+% Replaces all occurrences of the \meta{regular expression} in the
+% contents of the \meta{tl~var}
+% by the \meta{replacement}, where |\0| represents
+% the full match, |\1| represent the contents of the first capturing
+% group, |\2| of the second, \emph{etc.} Every match is treated
+% independently, and matches cannot overlap.
+% \end{function}
+%
% \begin{function}[updated = 2011-08-11]
% {
% \tl_remove_once:Nn, \tl_remove_once:NV, \tl_remove_once:Ne,
@@ -2249,6 +2304,57 @@
%
% \begin{macro}
% {
+% \tl_regex_replace_once:Nnn, \tl_regex_replace_once:cnn,
+% \tl_regex_replace_once:NNn, \tl_regex_replace_once:cNn,
+% \tl_regex_greplace_once:Nnn, \tl_regex_greplace_once:cnn,
+% \tl_regex_greplace_once:NNn, \tl_regex_greplace_once:cNn
+% }
+% \begin{macro}
+% {
+% \tl_regex_replace_all:Nnn, \tl_regex_replace_all:cnn,
+% \tl_regex_replace_all:NNn, \tl_regex_replace_all:cNn,
+% \tl_regex_greplace_all:Nnn, \tl_regex_greplace_all:cnn,
+% \tl_regex_greplace_all:NNn, \tl_regex_greplace_all:cNn
+% }
+% Wrappers.
+% \begin{macrocode}
+\cs_new_protected:Npn \tl_regex_replace_once:Nnn #1#2#3
+ { \regex_replace_once:nnN {#2} {#3} #1 }
+\cs_generate_variant:Nn \tl_regex_replace_once:Nnn { c }
+\cs_new_protected:Npn \tl_regex_replace_once:NNn #1#2#3
+ { \regex_replace_once:NnN {#2} {#3} #1 }
+\cs_generate_variant:Nn \tl_regex_replace_once:NNn { c }
+\cs_new_protected:Npn \tl_regex_replace_all:Nnn #1#2#3
+ { \regex_replace_all:nnN {#2} {#3} #1 }
+\cs_generate_variant:Nn \tl_regex_replace_all:Nnn { c }
+\cs_new_protected:Npn \tl_regex_replace_all:NNn #1#2#3
+ { \regex_replace_all:NnN {#2} {#3} #1 }
+\cs_generate_variant:Nn \tl_regex_replace_all:NNn { c }
+\group_begin:
+ \cs_set_protected:Npn \@@_tmp:w #1#2#3
+ {
+ \cs_new_protected:cpe { tl_regex_greplace_ #1 :N #2 n } ##1##2##3
+ {
+ \group_begin:
+ \tl_set_eq:NN \exp_not:N \l_@@_internal_a_tl ##1
+ \exp_not:c { regex_replace_ #1 :Nn #2 }
+ #3 {##2} {##3} \exp_not:N \l_@@_internal_a_tl
+ \tl_gset_eq:NN ##1 \exp_not:N \l_@@_internal_a_tl
+ \group_end:
+ }
+ \cs_generate_variant:cn { tl_regex_greplace_ #1 :N #2 n } { c }
+ }
+ \@@_tmp:w { once } n { }
+ \@@_tmp:w { once } N \use:n
+ \@@_tmp:w { all } n { }
+ \@@_tmp:w { all } N \use:n
+\group_end:
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}
+% {
% \tl_remove_once:Nn, \tl_remove_once:NV, \tl_remove_once:Ne,
% \tl_remove_once:cn, \tl_remove_once:cV, \tl_remove_once:ce
% }
@@ -2611,6 +2717,29 @@
% \end{macrocode}
% \end{macro}
%
+% \begin{macro}
+% {
+% \tl_if_regex_match:nn, \tl_if_regex_match:Vn,
+% \tl_if_regex_match:nN, \tl_if_regex_match:VN,
+% }
+% \begin{macrocode}
+\prg_new_protected_conditional:Npnn \tl_if_regex_match:nn #1#2 { TF , T , F }
+ {
+ \regex_match:nnTF {#2} {#1}
+ \prg_return_true: \prg_return_false:
+ }
+\prg_generate_conditional_variant:Nnn \tl_if_regex_match:nn
+ { V } { TF , T , F }
+\prg_new_protected_conditional:Npnn \tl_if_regex_match:nN #1#2 { TF , T , F }
+ {
+ \regex_match:nNTF {#2} {#1}
+ \prg_return_true: \prg_return_false:
+ }
+\prg_generate_conditional_variant:Nnn \tl_if_regex_match:nN
+ { V } { TF , T , F }
+% \end{macrocode}
+% \end{macro}
+%
% \subsection{Mapping over token lists}
%
% \begin{macro}
More information about the latex3-commits
mailing list.