texlive[44483] Master/texmf-dist: latex3 (30may17)
commits+karl at tug.org
commits+karl at tug.org
Tue Jun 6 01:17:09 CEST 2017
Revision: 44483
http://tug.org/svn/texlive?view=revision&revision=44483
Author: karl
Date: 2017-06-06 01:17:08 +0200 (Tue, 06 Jun 2017)
Log Message:
-----------
latex3 (30may17)
Modified Paths:
--------------
trunk/Master/texmf-dist/doc/latex/l3build/README.md
trunk/Master/texmf-dist/doc/latex/l3build/l3build.pdf
trunk/Master/texmf-dist/doc/latex/l3experimental/README.md
trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3str-convert.pdf
trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3str-format.pdf
trunk/Master/texmf-dist/doc/latex/l3experimental/xcoffins/xcoffins.pdf
trunk/Master/texmf-dist/doc/latex/l3experimental/xgalley/l3galley.pdf
trunk/Master/texmf-dist/doc/latex/l3experimental/xgalley/xgalley.pdf
trunk/Master/texmf-dist/doc/latex/l3kernel/README.md
trunk/Master/texmf-dist/doc/latex/l3kernel/expl3.pdf
trunk/Master/texmf-dist/doc/latex/l3kernel/interface3.pdf
trunk/Master/texmf-dist/doc/latex/l3kernel/l3docstrip.pdf
trunk/Master/texmf-dist/doc/latex/l3kernel/l3styleguide.pdf
trunk/Master/texmf-dist/doc/latex/l3kernel/l3syntax-changes.pdf
trunk/Master/texmf-dist/doc/latex/l3kernel/source3.pdf
trunk/Master/texmf-dist/doc/latex/l3kernel/source3body.tex
trunk/Master/texmf-dist/doc/latex/l3packages/README.md
trunk/Master/texmf-dist/doc/latex/l3packages/l3keys2e/l3keys2e.pdf
trunk/Master/texmf-dist/doc/latex/l3packages/xfp/xfp.pdf
trunk/Master/texmf-dist/doc/latex/l3packages/xfrac/xfrac.pdf
trunk/Master/texmf-dist/doc/latex/l3packages/xparse/xparse.pdf
trunk/Master/texmf-dist/doc/latex/l3packages/xtemplate/xtemplate.pdf
trunk/Master/texmf-dist/source/latex/l3build/l3build.dtx
trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str-convert.dtx
trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str-format.dtx
trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str.ins
trunk/Master/texmf-dist/source/latex/l3experimental/xcoffins/xcoffins.dtx
trunk/Master/texmf-dist/source/latex/l3experimental/xgalley/l3galley.dtx
trunk/Master/texmf-dist/source/latex/l3experimental/xgalley/xgalley.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/expl3.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3.ins
trunk/Master/texmf-dist/source/latex/l3kernel/l3alloc.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3basics.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3bootstrap.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3box.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3candidates.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3clist.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3coffins.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3color.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3deprecation.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3doc.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3docstrip.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3drivers.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3expan.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3file.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3final.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3flag.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3format.ins
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-assign.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-aux.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-basics.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-convert.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-expo.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-extended.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-logic.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-parse.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-random.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-round.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-traps.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-trig.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3fp.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3int.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3keys.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3luatex.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3msg.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3names.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3oldmodules.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3prg.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3prop.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3quark.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3seq.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3skip.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3sort.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3str.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3sys.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3tl.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3token.dtx
trunk/Master/texmf-dist/source/latex/l3packages/l3keys2e/l3keys2e.dtx
trunk/Master/texmf-dist/source/latex/l3packages/xfp/xfp.dtx
trunk/Master/texmf-dist/source/latex/l3packages/xfrac/xfrac.dtx
trunk/Master/texmf-dist/source/latex/l3packages/xparse/xparse.dtx
trunk/Master/texmf-dist/source/latex/l3packages/xtemplate/xtemplate.dtx
trunk/Master/texmf-dist/tex/latex/l3build/l3build.lua
trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3str-convert.sty
trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3str-format.sty
trunk/Master/texmf-dist/tex/latex/l3experimental/xcoffins/xcoffins.sty
trunk/Master/texmf-dist/tex/latex/l3experimental/xgalley/l3galley.sty
trunk/Master/texmf-dist/tex/latex/l3experimental/xgalley/xgalley.sty
trunk/Master/texmf-dist/tex/latex/l3kernel/expl3-code.tex
trunk/Master/texmf-dist/tex/latex/l3kernel/expl3-generic.tex
trunk/Master/texmf-dist/tex/latex/l3kernel/expl3.lua
trunk/Master/texmf-dist/tex/latex/l3kernel/expl3.sty
trunk/Master/texmf-dist/tex/latex/l3kernel/l3doc.cls
trunk/Master/texmf-dist/tex/latex/l3kernel/l3dvipdfmx.def
trunk/Master/texmf-dist/tex/latex/l3kernel/l3dvips.def
trunk/Master/texmf-dist/tex/latex/l3kernel/l3dvisvgm.def
trunk/Master/texmf-dist/tex/latex/l3kernel/l3pdfmode.def
trunk/Master/texmf-dist/tex/latex/l3kernel/l3sort.sty
trunk/Master/texmf-dist/tex/latex/l3kernel/l3xdvipdfmx.def
trunk/Master/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty
trunk/Master/texmf-dist/tex/latex/l3packages/xfp/xfp.sty
trunk/Master/texmf-dist/tex/latex/l3packages/xfrac/xfrac.sty
trunk/Master/texmf-dist/tex/latex/l3packages/xparse/xparse.sty
trunk/Master/texmf-dist/tex/latex/l3packages/xtemplate/xtemplate.sty
Added Paths:
-----------
trunk/Master/texmf-dist/source/latex/l3kernel/l3intarray.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3regex.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3tl-analysis.dtx
trunk/Master/texmf-dist/source/latex/l3kernel/l3tl-build.dtx
trunk/Master/texmf-dist/tex/latex/l3kernel/l3regex.sty
trunk/Master/texmf-dist/tex/latex/l3kernel/l3tl-analysis.sty
Removed Paths:
-------------
trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3intarray.pdf
trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3regex.pdf
trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3tl-analysis.pdf
trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3tl-build.pdf
trunk/Master/texmf-dist/doc/latex/l3kernel/interface3.tex
trunk/Master/texmf-dist/doc/latex/l3kernel/l3styleguide.tex
trunk/Master/texmf-dist/doc/latex/l3kernel/l3syntax-changes.tex
trunk/Master/texmf-dist/doc/latex/l3kernel/source3.tex
trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3intarray.dtx
trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3regex.dtx
trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3tl-analysis.dtx
trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3tl-build.dtx
trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3intarray.sty
trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3regex-trace.sty
trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3regex.sty
trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3tl-analysis.sty
trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3tl-build.sty
Modified: trunk/Master/texmf-dist/doc/latex/l3build/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/latex/l3build/README.md 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/doc/latex/l3build/README.md 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,7 +1,7 @@
l3build: a testing and building system for LaTeX3
=================================================
-Release 2017/05/19
+Release 2017/05/29
Overview
--------
Modified: trunk/Master/texmf-dist/doc/latex/l3build/l3build.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3experimental/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/latex/l3experimental/README.md 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/doc/latex/l3experimental/README.md 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,7 +1,7 @@
Experimental LaTeX3 Concepts
============================
-Release 2017/05/13
+Release 2017/05/29
Overview
--------
@@ -32,11 +32,7 @@
category code 12 ('other'), with the exception of spaces which have the
category code 10 ('space'). The `l3str` bundle consists of two parts. The
first is `l3str` itself. This is a collection of functions to act on strings,
-including for manipulations such as UTF8 mappings in pdfTeX. The second
-part of the bundle is `l3regex`, a regular expression search-and-replace
-implementation written in TeX primitives. The regex module works on token
-lists, and is part of `l3str` (currently) for historical reasons: the team
-anticipate splitting the two in the future.
+including for manipulations such as UTF8 mappings in pdfTeX.
`xcoffins`
----------
Deleted: trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3intarray.pdf
===================================================================
(Binary files differ)
Deleted: trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3regex.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3str-convert.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3str-format.pdf
===================================================================
(Binary files differ)
Deleted: trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3tl-analysis.pdf
===================================================================
(Binary files differ)
Deleted: trunk/Master/texmf-dist/doc/latex/l3experimental/l3str/l3tl-build.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3experimental/xcoffins/xcoffins.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3experimental/xgalley/l3galley.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3experimental/xgalley/xgalley.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3kernel/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/latex/l3kernel/README.md 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/doc/latex/l3kernel/README.md 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,7 +1,7 @@
LaTeX3 Programming Conventions
==============================
-Release 2017/05/13
+Release 2017/05/29
Overview
--------
Modified: trunk/Master/texmf-dist/doc/latex/l3kernel/expl3.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3kernel/interface3.pdf
===================================================================
(Binary files differ)
Deleted: trunk/Master/texmf-dist/doc/latex/l3kernel/interface3.tex
===================================================================
--- trunk/Master/texmf-dist/doc/latex/l3kernel/interface3.tex 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/doc/latex/l3kernel/interface3.tex 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,89 +0,0 @@
-% \iffalse meta-comment
-%
-%% File: interfaces3.tex Copyright (C) 1990-2011 The LaTeX3 Project
-%%
-%% It may be distributed and/or modified under the conditions of the
-%% LaTeX Project Public License (LPPL), either version 1.3c of this
-%% license or (at your option) any later version. The latest version
-%% of this license is in the file
-%%
-%% http://www.latex-project.org/lppl.txt
-%%
-%% This file is part of the "l3kernel bundle" (The Work in LPPL)
-%% and all files in that bundle must be distributed together.
-%%
-%% The released version of this bundle is available from CTAN.
-%%
-%% -----------------------------------------------------------------------
-%%
-%% The development version of the bundle can be found at
-%%
-%% http://www.latex-project.org/svnroot/experimental/trunk/
-%%
-%% for those people who are interested.
-%%
-%%%%%%%%%%%
-%% NOTE: %%
-%%%%%%%%%%%
-%%
-%% Snapshots taken from the repository represent work in progress and may
-%% not work or may contain conflicting material! We therefore ask
-%% people _not_ to put them into distributions, archives, etc. without
-%% prior consultation with the LaTeX3 Project.
-%%
-%% -----------------------------------------------------------------------
-%
-% \fi
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%
-% This document will typeset the LaTeX3 interface descriptions a single
-% document. This will produce quite a large file (more than 170 pages
-% currently).
-%
-% There is also a full version of the sources (source3.tex) which additionally
-% also typesets the command implementations.
-%
-% Do not forget to generate the index (as explained on the terminal output
-% near the end of the run)!
-%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-\documentclass{l3doc}
-\listfiles
-
-\begin{document}
-
-\title{The \LaTeX3 Interfaces}
-\author{%
- The \LaTeX3 Project\thanks
- {%
- E-mail:
- \href{mailto:latex-team at latex-project.org}
- {latex-team at latex-project.org}%
- }%
-}
-
-\pagenumbering{roman}
-\maketitle
-
-%
-% First load all modules and typeset the documentation parts
-%
-
-\input{source3body} % all the individual modules
-
-\clearpage
-
-\begingroup
- \def\endash{--}
- \catcode`\-\active
- \def-{\futurelet\temp\indexdash}
- \def\indexdash{\ifx\temp-\endash\fi}
- \DelayPrintIndex
-\endgroup
-
-\end{document}
-
-
Modified: trunk/Master/texmf-dist/doc/latex/l3kernel/l3docstrip.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3kernel/l3styleguide.pdf
===================================================================
(Binary files differ)
Deleted: trunk/Master/texmf-dist/doc/latex/l3kernel/l3styleguide.tex
===================================================================
--- trunk/Master/texmf-dist/doc/latex/l3kernel/l3styleguide.tex 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/doc/latex/l3kernel/l3styleguide.tex 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,274 +0,0 @@
-\iffalse meta-comment
-
-File l3styleguide.tex Copyright (C) 2011,2012,2015-2017 The LaTeX3 Project
-
-It may be distributed and/or modified under the conditions of the
-LaTeX Project Public License (LPPL), either version 1.3c of this
-license or (at your option) any later version. The latest version
-of this license is in the file
-
- http://www.latex-project.org/lppl.txt
-
-This file is part of the "l3kernel bundle" (The Work in LPPL)
-and all files in that bundle must be distributed together.
-
-The released version of this bundle is available from CTAN.
-
-\fi
-
-\documentclass{l3doc}
-
-
-\title{%
- The \LaTeX3 kernel: style guide for code authors%
-}
-\author{%
- The \LaTeX3 Project\thanks
- {%
- E-mail:
- \href{mailto:latex-team at latex-project.org}%
- {latex-team at latex-project.org}%
- }%
-}
-\date{Released 2017/05/13}
-
-\begin{document}
-
-\maketitle
-
-\tableofcontents
-
-\section{Introduction}
-
-This document is intended as a style guide for authors of code and
-documentation for the \LaTeX3 kernel. It covers both aspects of coding
-style and the formatting of the sources. The aim of providing these
-guidelines is help ensure consistency of the code and sources from
-different authors. Experience suggests that in the long-term this helps
-with maintenance. There will of course be places where there are
-exceptions to these guidelines: common sense should always be
-applied!
-
-\section{Documentation style}
-
-\LaTeX3 source and documentation should be written using the document
-class \cls{l3doc} in \file{dtx} format. This class provides a number
-of logical mark up elements, which should be used where possible.
-In the main, this is standard \LaTeX{} practice, but there are a
-few points to highlight:
-\begin{itemize}
- \item
- Where possible, use \cs{cs} to mark up control sequences
- rather than using a verbatim environment.
- \item
- Arguments which are given in braces should be marked using
- \cs{Arg} when code-level functions are discussed, but using
- \cs{marg} for document functions.
- \item
- The names \TeX{}, \LaTeX{}, \emph{etc}.\ use the normal logical mark
- up followed by an empty group (|{}|), with the exception of |\LaTeX3|,
- where the number should follow directly.
- \item
- Where in line verbatim text is used, it should be marked up
- using the \verb=|...|= construct (\emph{i.e.}~vertical bars delimit
- the verbatim text).
- \item In line quotes should be marked up using the \cs{enquote}
- function.
- \item
- Where numbers in the source have a mathematical meaning,
- they should be included in math mode. Such in-line math mode
- material should be marked up using |$...$| and \emph{not}
- |\(...\)|.
-\end{itemize}
-
-Line length in the source files should be under $80$
-characters where possible, as this helps keep everything on the screen
-when editing files. In the \file{dtx} format, documentation lines start
-with a \texttt{\%}, which is usually followed by a space to leave a
-\enquote{comment margin} at the start of each line.
-
-As with code indenting (see later), nested environments and arguments
-should be indented by (at least) two spaces to make the nature of the nesting
-clear. Thus for example a typical arrangement for the \env{function}
-environment might be
-\begin{verbatim*}
-\begin{function}{\seq_gclear:N, \seq_gclear:c}
- \begin{syntax}
- \cs{seq_gclear:N} \meta{sequence}
- \end{syntax}
- Clears all entries from the \meta{sequence} globally.
-\end{function}
-\end{verbatim*}
-The \enquote{outer} \verb*|% \begin{function}| should have the customary
-space after the |%| character at the start of the line.
-
-In general, a single \env{function} or \env{macro} environment should be
-used for a group of closely-related functions, for example argument
-specification variants. In such cases, a comma-separated list should be
-used, as shown in the preceding example.
-
-\section{Format of the code itself}
-
-The requirement for less than $80$ characters per line applies to the code
-itself as well as the surrounding documentation. A number of the general
-style principles for \LaTeX3 code apply: these are described in the following
-paragraph and an example is then given.
-
-With the exception of simple runs of parameter (|{#1}|, |#1#2|,
-\emph{etc.}), everything should be divided up using spaces to make the code
-more readable. In general, these will be single spaces, but in some
-places it makes more sense to align parts of the code to emphasise
-similarity. (Tabs should not be used for introducing white space.)
-
-Each conceptually-separate step in a function should be on a separate
-line, to make the meaning clearer. Hence the \texttt{false} branch
-in the example uses two lines for the two auxiliary function uses.
-
-Within the definition, a two-space indent should be used to show each
-\enquote{level} of code. Thus in the example \cs{tl_if_empty:nTF} is
-indented by two spaces, but the two branches are indented by four
-spaces. Within the \texttt{false} branch, the need for multiple lines
-means that an additional two-space indent should be used to show that
-these lines are all part of the brace group.
-
-The result of these lay-out conventions is code which will in general
-look like the example:
-\begin{verbatim*}
-\cs_new:Npn \module_foo:nn #1#2
- {
- \tl_if_empty:nTF {#1}
- { \module_foo_aux:n { X #2 } }
- {
- \module_foo_aux:nn {#1} {#2}
- \module_foo_aux:n { #1 #2 }
- }
- }
-\end{verbatim*}
-
-\section{Code conventions}
-
-All code-level functions should be \enquote{long} if they accept any
-arguments, even if it seems \enquote{very unlikely} that a \cs{par} token
-will be passed. Thus \cs{cs_new_nopar:Npn} and so forth should only be used
-to create interfaces at the document level (where trapping \cs{par} tokens
-may be appropriate) or where comparison to other code known not to be
-\enquote{long} is required (\emph{e.g.}~when working with mixed
-\LaTeXe{}/\pkg{expl3} situations).
-
-The expandability of each function should be well-defined. Functions which
-cannot be fully expanded must be \texttt{protected}. This means that expandable
-functions must themselves only contain expandable material. Functions which
-use any non-expandable material must be defined using \cs{cs_new_protected:Npn}
-or similar.
-
-When using \cs{cs_generate_variant:Nn}, group related variants together
-to make the pattern clearer. A common example is variants of a function
-which has an \texttt{N}-type first argument:
-\begin{verbatim}
- \cs_generate_variant:Nn \foo:Nn { NV , No }
- \cs_generate_variant:Nn \foo:Nn { c , cV , co }
-\end{verbatim}
-
-There may be cases where omitting braces from \texttt{o}-type arguments
-is desirable for performance reasons. This should only be done if the
-argument is a single token, thus for example
-\begin{verbatim}
- \tl_set:No \l_some_tl \l_some_other_tl
-\end{verbatim}
-remains clear and can be used where appropriate.
-
-\section{Private and internal functions}
-
-Private functions (those starting \cs{__}) should not be used between modules.
-The only exception is where a \enquote{family} of modules share some
-\enquote{internal} methods: this happens most obviously in the kernel itself.
-Any internal functions or variables \emph{must} be documented in the same way
-as public ones.
-
-The \pkg{l3docstrip} method should be used for internal functions in a module.
-This requires a line
-\begin{quote}
- \ttfamily
- \%<@@=\meta{module}>
-\end{quote}
-at the start of the source (\texttt{.dtx}) file, with internal functions
-then written in the form
-\begin{verbatim}
- \cs_new_protected:Npn \@@_function:nn #1#2
- ...
-\end{verbatim}
-
-\subsection{Access from other modules}
-
-There may be cases where it is useful to use an internal function from
-a third-party module (this includes cases where you are the author of both
-but they are not part of the same \enquote{family}). In these cases, you should
-\emph{copy} the definition of the internal function to your code: this avoids
-relying on non-documented interfaces. At the same time, it is strongly
-encouraged that you discuss your requirements with the author of the
-code you need to access. The best long-term solution to these cases is for
-new documented interfaces to be added to the parent module.
-
-\subsection{Access to primitives}
-
-As \pkg{expl3} is still a developing system, there are places where direct
-access to engine primitives is required. These are all marked as
-\enquote{do not use} in the code and so require special handling. Where a
-programmer is sure that they need to use a primitive (for example where the
-team have not yet covered access to an area) then a local copy of the
-primitive should be made, for example
-\begin{verbatim}
- \cs_new_eq:NN \__module_message:w \tex_message:D
- % ...
- \cs_new_protected:Npn \__module_fancy_msg:n #1
- { \__module_message:w { *** #1 *** } }
-\end{verbatim}
-This approach makes it possible for the team and others to find such
-usage (by searching for the \texttt{:D} argument type) but avoids
-multiple uses in general code.
-
-At the same time, the team ask that these use cases are raised on the
-\texttt{LaTeX-L} mailing list. The team are keen to collect use cases for
-areas that have not yet been addressed and to provide new code where the
-required interfaces become clear.
-
-Programmers using primitives should be ready to make updates to their
-code as the team develop additional interfaces.
-
-\section{Auxiliary functions}
-
-In general, the team encourages the use of descriptive names in \LaTeX3 code.
-Thus many helper functions will have names which describe briefly what they do,
-rather than simply indicating that they are auxiliary to some higher-level
-function. However, there are places where one or more \texttt{aux} functions
-are required. Where possible, these should be differentiated by signature
-\begin{verbatim}
- \cs_new_protected:Npn \@@_function:nn #1#2
- {
- ...
- }
- \cs_new_protected:Npn \@@_function_aux:nn #1#2
- {
- ...
- }
- \cs_new_protected:Npn \@@_function_aux:w #1#2 \q_stop
- {
- ...
- }
-\end{verbatim}
-Where more than one auxiliary shares the same signature, the recommended naming
-scheme is \texttt{auxi}, \texttt{auxii} and so on.
-\begin{verbatim}
- \cs_new_protected:Npn \@@_function_auxi:nn #1#2
- {
- ...
- }
- \cs_new_protected:Npn \@@_function_auxii:nn #1#2
- {
- ...
- }
-\end{verbatim}
-The use of \texttt{aux_i}, \texttt{aux_ii}, \emph{etc.}\ is discouraged as this
-conflicts with the convention used by \cs{use_i:nn} and related functions.
-
-\end{document}
Modified: trunk/Master/texmf-dist/doc/latex/l3kernel/l3syntax-changes.pdf
===================================================================
(Binary files differ)
Deleted: trunk/Master/texmf-dist/doc/latex/l3kernel/l3syntax-changes.tex
===================================================================
--- trunk/Master/texmf-dist/doc/latex/l3kernel/l3syntax-changes.tex 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/doc/latex/l3kernel/l3syntax-changes.tex 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,101 +0,0 @@
-\iffalse meta-comment
-
-File l3syntax-changes.tex Copyright (C) 2011-2012,2017 The LaTeX3 Project
-
-It may be distributed and/or modified under the conditions of the
-LaTeX Project Public License (LPPL), either version 1.3c of this
-license or (at your option) any later version. The latest version
-of this license is in the file
-
- http://www.latex-project.org/lppl.txt
-
-This file is part of the "l3kernel bundle" (The Work in LPPL)
-and all files in that bundle must be distributed together.
-
-The released version of this bundle is available from CTAN.
-
-\fi
-
-\documentclass{l3doc}
-
-
-\title{%
- Syntax changes in \LaTeX3 functions%
-}
-\author{%
- The \LaTeX3 Project\thanks
- {%
- E-mail:
- \href{mailto:latex-team at latex-project.org}%
- {latex-team at latex-project.org}%
- }%
-}
-\date{Released 2017/05/13}
-
-\newcommand{\TF}{\textit{(TF)}}
-
-\begin{document}
-
-\maketitle
-
-This file lists functions whose syntax has changed after August 2011,
-with an approximate date.
-
-\section{August 2011}
-
-\begin{itemize}
- \item \cs{tl_if_single:n\TF} recognized any non-zero number of
- explicit spaces as \meta{true}, and did not ignore trailing spaces.
- Now it is \meta{true} for
- \[
- \meta{optional spaces}
- \meta{normal token or brace group}
- \meta{optional spaces}.
- \]
- \item \cs{tl_reverse:n} stripped outer braces and lost unprotected spaces.
- Now it keeps spaces, leaves unbraced single tokens unbraced, and
- braced groups braced.
- \item \cs{tl_trim_spaces:n} only removed one leading and trailing space.
- Now removes recursively. Also, on the left it used to strip implicit
- and explicit spaces with any character code. Now it strips only explicit
- space characters $(32,10)$.
-\end{itemize}
-
-\section{September 2011}
-
-\begin{itemize}
-\item clist functions which receive an \texttt{n}-type comma list argument
- now trim spaces from each item in the argument.
-\end{itemize}
-
-\section{June 2012}
-
-\begin{itemize}
- \item Access to list functions now indexes from~$1$, not from~$0$.
- This applies to multiple choices in the \pkg{l3keys} module and
- the \cs{clist_item:Nn}, \cs{seq_item:Nn} and \cs{tl_item:Nn}
- functions.
- \item \cs{tl_trim_spaces:n} now requires a variable number of
- expansions to fully expand, rather than exactly two. Of course,
- \texttt{x}-type expansion still correctly evaluates this function.
-\end{itemize}
-
-\section{July 2012}
-
-\begin{itemize}
- \item The \cs{tl_if_head_eq_meaning:nN}, \cs{tl_if_head_eq_catcode:nN}
- and \cs{tl_if_head_eq_charcode:nN} conditionals now never match when
- their first argument is empty.
-\end{itemize}
-
-\section{August 2012}
-
-\begin{itemize}
- \item \cs{lua_now:x} is now a standard \texttt{x}-type expansion of
- \cs{lua_now:n}, which does no expansion. Engine-level expansion is moved
- to \cs{lua_now_x:n}, reflecting the fact that this is non-standard in the
- same way as for example \cs{str_if_eq_x:nn(TF)}.
-\end{itemize}
-
-
-\end{document}
Modified: trunk/Master/texmf-dist/doc/latex/l3kernel/source3.pdf
===================================================================
(Binary files differ)
Deleted: trunk/Master/texmf-dist/doc/latex/l3kernel/source3.tex
===================================================================
--- trunk/Master/texmf-dist/doc/latex/l3kernel/source3.tex 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/doc/latex/l3kernel/source3.tex 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,106 +0,0 @@
-% \iffalse meta-comment
-%
-%% File: source3.tex Copyright (C) 1990-2011 The LaTeX3 Project
-%%
-%% It may be distributed and/or modified under the conditions of the
-%% LaTeX Project Public License (LPPL), either version 1.3c of this
-%% license or (at your option) any later version. The latest version
-%% of this license is in the file
-%%
-%% http://www.latex-project.org/lppl.txt
-%%
-%% This file is part of the "l3kernel bundle" (The Work in LPPL)
-%% and all files in that bundle must be distributed together.
-%%
-%% The released version of this bundle is available from CTAN.
-%%
-%% -----------------------------------------------------------------------
-%%
-%% The development version of the bundle can be found at
-%%
-%% http://www.latex-project.org/svnroot/experimental/trunk/
-%%
-%% for those people who are interested.
-%%
-%%%%%%%%%%%
-%% NOTE: %%
-%%%%%%%%%%%
-%%
-%% Snapshots taken from the repository represent work in progress and may
-%% not work or may contain conflicting material! We therefore ask
-%% people _not_ to put them into distributions, archives, etc. without
-%% prior consultation with the LaTeX3 Project.
-%%
-%% -----------------------------------------------------------------------
-%
-% \fi
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%
-% This document will typeset the LaTeX3 sources as a single document.
-% This will produce quite a large file (more than 780 pages).
-%
-% There is also a shorter version (interface3.tex) that only typesets the
-% command % interface descriptions.
-%
-% Do not forget to generate the index (as explained on the terminal output
-% near the end of the run)!
-%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\documentclass{l3doc}
-\listfiles
-
-\begin{document}
-
-\title{The \LaTeX3 Sources}
-\author{%
- The \LaTeX3 Project\thanks
- {%
- E-mail:
- \href{mailto:latex-team at latex-project.org}
- {latex-team at latex-project.org}%
- }%
-}
-
-\pagenumbering{roman}
-\maketitle
-
-%
-% First load all modules and typeset the documentation parts
-%
-
-\input{source3body} % all the individual modules
-
-%
-% Now reload all modules and typeset the implementation parts
-%
-
-\part{Implementation}
-
-\def\maketitle{}
-\EnableImplementation
-\DisableDocumentation
-\DocInputAgain
-
-\clearpage
-\pagestyle{headings}
-
-% Make TeX shut up.
-\hbadness=10000
-\newcount\hbadness
-\hfuzz=\maxdimen
-
-\PrintChanges
-\clearpage
-
-\begingroup
- \def\endash{--}
- \catcode`\-\active
- \def-{\futurelet\temp\indexdash}
- \def\indexdash{\ifx\temp-\endash\fi}
-
- \DelayPrintIndex
-\endgroup
-
-\end{document}
Modified: trunk/Master/texmf-dist/doc/latex/l3kernel/source3body.tex
===================================================================
--- trunk/Master/texmf-dist/doc/latex/l3kernel/source3body.tex 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/doc/latex/l3kernel/source3body.tex 2017-06-05 23:17:08 UTC (rev 44483)
@@ -347,8 +347,8 @@
|\sys_if_engine_xetex:TF| \Arg{true code} \Arg{false code}
\end{syntax}
The underlining and italic of \texttt{TF} indicates that
- |\xetex_if_engine:T|, |\xetex_if_engine:F| and
- |\xetex_if_engine:TF| are all available. Usually, the illustration
+ |\sys_if_engine_xetex:T|, |\sys_if_engine_xetex:F| and
+ |\sys_if_engine_xetex:TF| are all available. Usually, the illustration
will use the \texttt{TF} variant, and so both \meta{true code}
and \meta{false code} will be shown. The two variant forms \texttt{T} and
\texttt{F} take only \meta{true code} and \meta{false code}, respectively.
@@ -419,6 +419,7 @@
\DocInput{l3str.dtx}
\DocInput{l3seq.dtx}
\DocInput{l3int.dtx}
+\DocInput{l3intarray.dtx}
\DocInput{l3flag.dtx}
\DocInput{l3quark.dtx}
\DocInput{l3prg.dtx}
@@ -452,6 +453,9 @@
\ExplSyntaxOff
\DocInput{l3sort.dtx}
+\DocInput{l3tl-analysis.dtx}
+\DocInput{l3tl-build.dtx}
+\DocInput{l3regex.dtx}
\DocInput{l3box.dtx}
\DocInput{l3coffins.dtx}
\DocInput{l3color.dtx}
Modified: trunk/Master/texmf-dist/doc/latex/l3packages/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/latex/l3packages/README.md 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/doc/latex/l3packages/README.md 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,7 +1,7 @@
LaTeX3 High-Level Concepts
==========================
-Release 2017/05/13
+Release 2017/05/29
Overview
--------
Modified: trunk/Master/texmf-dist/doc/latex/l3packages/l3keys2e/l3keys2e.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3packages/xfp/xfp.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3packages/xfrac/xfrac.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3packages/xparse/xparse.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/latex/l3packages/xtemplate/xtemplate.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/source/latex/l3build/l3build.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3build/l3build.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3build/l3build.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -202,7 +202,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/19}
+% \date{Released 2017/05/29}
%
% \maketitle
% \tableofcontents
@@ -286,7 +286,7 @@
% \item check \meta{name(s)}
% \item cmdcheck
% \item clean
-% \item doc
+% \item doc \meta{name(s)}
% \item install
% \item save \meta{name(s)}
% \item setversion
@@ -443,12 +443,15 @@
% If \var{typesetsearch} is \code{true} (default), standard \texttt{texmf} search trees are used in the typesetting compilation. If set to false, \emph{all} necessary files for compilation must be included in the |build/local| sandbox.
% \end{buildcmd}
%
+% \begin{buildcmd}{doc \meta{name(s)}}
+% Typesets only the files with the \meta{name(s)} given, which should be the
+% root name without any extension.
+% \end{buildcmd}
%
% \begin{buildcmd}{install}
% Copies all package files (defined by \var{installfiles}) into the user's home \texttt{texmf} tree in the form of the \TeX\ Directory Structure.
% \end{buildcmd}
%
-%
% \begin{buildcmd}{save \meta{name(s)}}
% This command runs through the same execution as |check| for a specific test(s) \texttt{\meta{name(s)}.lvt}.
% This command saves the output of the test to a |.tlg| file.
@@ -1067,9 +1070,9 @@
% \texttt{tex} can be used, along with custom code, to define a PDF
% typesetting pathway. The functions \texttt{biber} and \texttt{bibtex}
% take a single argument: the name of the file to work with \emph{minus}
-% any extension. The \texttt{tex} takes as an arugment the full name
+% any extension. The \texttt{tex} takes as an argument the full name
% of the file. The most complex function \texttt{makeindex} requires the
-% name, input extension, putput extension, log extension and style name.
+% name, input extension, output extension, log extension and style name.
% For example, Figure~\ref{fig:PDF} shows a simple script which might
% apply to a case where multiple \BibTeX{} runs are needed (perhaps where
% citations can appear within other references).
@@ -1082,17 +1085,17 @@
%
% module = "mymodule"
%
-% function typeset (file)
-% local name = string.match (file, "^(.*)%.") or name
+% function typeset(file)
+% local name = jobname(file)
% local errorlevel = tex (file)
% if errorlevel == 0 then
% -- Return a non-zero errorlevel if anything goes wrong
-% errorlevel = (
-% bibtex (name) +
-% tex (file) +
-% bibtex (name) +
-% tex (file) +
-% tex (file)
+% errorlevel =(
+% bibtex(name) +
+% tex(file) +
+% bibtex(name) +
+% tex(file) +
+% tex(file)
% )
% end
% return errorlevel
@@ -1154,6 +1157,14 @@
% \meta{target} directory.
% \end{function}
%
+% \begin{function}{basename()}
+% \begin{syntax}
+% |basename(|\meta{file}|)|
+% \end{syntax}
+% Returns a string comprising the full name of the file with the
+% path removed (\emph{i.e.}~from the last |/| onward).
+% \end{function}
+%
% \begin{function}{cleandir()}
% \begin{syntax}
% |cleandir(|\meta{dir}|)|
@@ -1192,8 +1203,17 @@
% all files in the \meta{path}.
% \end{function}
%
-% \begin{function}{mkidr()}
+% \begin{function}{jobname()}
% \begin{syntax}
+% |jobname(|\meta{file}|)|
+% \end{syntax}
+% Returns a string comprising the jobname of the file with the
+% path and extension removed (\emph{i.e.}~from the last |/| up to the
+% last |.|).
+% \end{function}
+%
+% \begin{function}{mkdir()}
+% \begin{syntax}
% |mkdir(|\meta{dir}|)|
% \end{syntax}
% Creates the \meta{dir}; returns an error level.
Deleted: trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3intarray.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3intarray.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3intarray.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,271 +0,0 @@
-% \iffalse meta-comment
-%
-%% File: l3intarray.dtx Copyright (C) 2017 The LaTeX3 Project
-%
-% It may be distributed and/or modified under the conditions of the
-% LaTeX Project Public License (LPPL), either version 1.3c of this
-% license or (at your option) any later version. The latest version
-% of this license is in the file
-%
-% http://www.latex-project.org/lppl.txt
-%
-% This file is part of the "l3experimental bundle" (The Work in LPPL)
-% and all files in that bundle must be distributed together.
-%
-% -----------------------------------------------------------------------
-%
-% The development version of the bundle can be found at
-%
-% https://github.com/latex3/latex3
-%
-% for those people who are interested.
-%
-%<*driver|package>
-% The version of expl3 required is tested as early as possible, as
-% some really old versions do not define \ProvidesExplPackage.
-\RequirePackage{expl3}[2017/05/13]
-%<package>\@ifpackagelater{expl3}{2017/05/13}
-%<package> {}
-%<package> {%
-%<package> \PackageError{l3intarray}{Support package l3kernel too old}
-%<package> {%
-%<package> Please install an up to date version of l3kernel\MessageBreak
-%<package> using your TeX package manager or from CTAN.\MessageBreak
-%<package> \MessageBreak
-%<package> Loading l3intarray will abort!%
-%<package> }%
-%<package> \endinput
-%<package> }
-%</driver|package>
-%<*driver>
-\documentclass[full]{l3doc}
-\usepackage{amsmath}
-\begin{document}
- \DocInput{\jobname.dtx}
-\end{document}
-%</driver>
-% \fi
-%
-%
-% \title{^^A
-% The \textsf{l3intarray} package: low-level arrays of small integers^^A
-% }
-%
-% \author{^^A
-% The \LaTeX3 Project\thanks
-% {^^A
-% E-mail:
-% \href{mailto:latex-team at latex-project.org}
-% {latex-team at latex-project.org}^^A
-% }^^A
-% }
-%
-% \date{Released 2017/05/13}
-%
-% \maketitle
-%
-% \begin{documentation}
-%
-% \section{\pkg{l3intarray} documentation}
-%
-% This module provides no user function: at present it is meant for
-% kernel use only.
-%
-% It is a wrapper around the \tn{fontdimen} primitive, used to store
-% arrays of integers (with a restricted range: absolute value at most
-% $2^{30}-1$). In contrast to \pkg{l3seq} sequences the access to
-% individual entries is done in constant time rather than linear time,
-% but only integers can be stored. More precisely, the primitive
-% \tn{fontdimen} stores dimensions but the \pkg{l3intarray} package
-% transparently converts these from/to integers. Assignments are always
-% global.
-%
-% While \LuaTeX{}'s memory is extensible, other engines can
-% \enquote{only} deal with a bit less than $4\times 10^6$ entries in all
-% \tn{fontdimen} arrays combined (with default \TeX{}Live settings).
-%
-% \subsection{Internal functions}
-%
-% \begin{function}{\__intarray_new:Nn}
-% \begin{syntax}
-% \cs{__intarray_new:Nn} \meta{intarray~var} \Arg{size}
-% \end{syntax}
-% Evaluates the integer expression \meta{size} and allocates an
-% \meta{integer array variable} with that number of (zero) entries.
-% \end{function}
-%
-% \begin{function}[EXP]{\__intarray_count:N}
-% \begin{syntax}
-% \cs{__intarray_count:N} \meta{intarray~var}
-% \end{syntax}
-% Expands to the number of entries in the \meta{integer array variable}.
-% Contrarily to \cs{seq_count:N} this is performed in constant time.
-% \end{function}
-%
-% \begin{function}{\__intarray_gset:Nnn, \__intarray_gset_fast:Nnn}
-% \begin{syntax}
-% \cs{__intarray_gset:Nnn} \meta{intarray~var} \Arg{position} \Arg{value}
-% \cs{__intarray_gset_fast:Nnn} \meta{intarray~var} \Arg{position} \Arg{value}
-% \end{syntax}
-% Stores the result of evaluating the integer expression \meta{value}
-% into the \meta{integer array variable} at the (integer expression)
-% \meta{position}. While \cs{__intarray_gset:Nnn} checks that the
-% \meta{position} is between $1$ and the \cs{__intarray_count:N} and that
-% the \meta{value}'s absolute value is at most $2^{30}-1$, the
-% \enquote{fast} function performs no such bound check.
-% Assignments are always global.
-% \end{function}
-%
-% \begin{function}[EXP]{\__intarray_item:Nn, \__intarray_item_fast:Nn}
-% \begin{syntax}
-% \cs{__intarray_item:Nn} \meta{intarray~var} \Arg{position}
-% \cs{__intarray_item_fast:Nn} \meta{intarray~var} \Arg{position}
-% \end{syntax}
-% Expands to the integer entry stored at the (integer expression)
-% \meta{position} in the \meta{integer array variable}. While
-% \cs{__intarray_item:Nn} checks that the \meta{position} is between $1$
-% and the \cs{__intarray_count:N}, the \enquote{fast} function performs
-% no such bound check.
-% \end{function}
-%
-% \end{documentation}
-%
-% \begin{implementation}
-%
-% \section{\pkg{l3intarray} implementation}
-%
-% \begin{macrocode}
-%<*initex|package>
-% \end{macrocode}
-%
-% \begin{macrocode}
-%<@@=intarray>
-% \end{macrocode}
-%
-% \begin{macrocode}
-\ProvidesExplPackage{l3intarray}{2017/05/13}{}
- {L3 Experimental low-level arrays of small integers}
-% \end{macrocode}
-%
-% \subsection{Allocating arrays}
-%
-% \begin{variable}{\g_@@_font_int}
-% Used to assign one font per array.
-% \begin{macrocode}
-\int_new:N \g_@@_font_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{macro}[int]{\@@_new:Nn}
-% Declare |#1| to be a font (arbitrarily |cmr10| at a never-used
-% size). Store the array's size as the \tn{hyphenchar} of that font
-% and make sure enough \tn{fontdimen} are allocated, by setting the
-% last one. Then clear any \tn{fontdimen} that |cmr10| starts with.
-% It seems \LuaTeX{}'s |cmr10| has an extra \tn{fontdimen} parameter
-% number $8$ compared to other engines (for a math font we would
-% replace $8$ by $22$ or some such).
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_new:Nn #1#2
- {
- \__chk_if_free_cs:N #1
- \int_gincr:N \g_@@_font_int
- \tex_global:D \tex_font:D #1 = cmr10~at~ \g_@@_font_int sp \scan_stop:
- \tex_hyphenchar:D #1 = \int_eval:n {#2} \scan_stop:
- \int_compare:nNnT { \tex_hyphenchar:D #1 } > 0
- { \tex_fontdimen:D \tex_hyphenchar:D #1 #1 = 0 sp \scan_stop: }
- \int_step_inline:nnnn { 1 } { 1 } { 8 }
- { \tex_fontdimen:D ##1 #1 = 0 sp \scan_stop: }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int, EXP]{\@@_count:N}
-% Size of an array.
-% \begin{macrocode}
-\cs_new:Npn \@@_count:N #1 { \tex_the:D \tex_hyphenchar:D #1 }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsection{Array items}
-%
-% \begin{macro}[int]{\@@_gset:Nnn, \@@_gset_fast:Nnn}
-% \begin{macro}[aux]{\@@_gset_aux:Nnn}
-% Set the appropriate \tn{fontdimen}. The slow version checks the
-% position and value are within bounds.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_gset_fast:Nnn #1#2#3
- { \tex_fontdimen:D \int_eval:n {#2} #1 = \int_eval:n {#3} sp \scan_stop: }
-\cs_new_protected:Npn \@@_gset:Nnn #1#2#3
- {
- \exp_args:Nff \@@_gset_aux:Nnn #1
- { \int_eval:n {#2} } { \int_eval:n {#3} }
- }
-\cs_new_protected:Npn \@@_gset_aux:Nnn #1#2#3
- {
- \int_compare:nTF { 1 <= #2 <= \@@_count:N #1 }
- {
- \int_compare:nTF { - \c_max_dim <= \int_abs:n {#3} <= \c_max_dim }
- { \@@_gset_fast:Nnn #1 {#2} {#3} }
- {
- \__msg_kernel_error:nnxxxx { intarray } { overflow }
- { \token_to_str:N #1 } {#2} {#3}
- { \int_compare:nNnT {#3} < 0 { - } \__int_value:w \c_max_dim }
- \@@_gset_fast:Nnn #1 {#2}
- { \int_compare:nNnT {#3} < 0 { - } \c_max_dim }
- }
- }
- {
- \__msg_kernel_error:nnxxx { intarray } { out-of-bounds }
- { \token_to_str:N #1 } {#2} { \@@_count:N #1 }
- }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[EXP]{\@@_item:Nn, \@@_item_fast:Nn}
-% \begin{macro}[aux]{\@@_item_aux:Nn}
-% Get the appropriate \tn{fontdimen} and perform bound checks if requested.
-% \begin{macrocode}
-\cs_new:Npn \@@_item_fast:Nn #1#2
- { \__int_value:w \tex_fontdimen:D \int_eval:n {#2} #1 }
-\cs_new:Npn \@@_item:Nn #1#2
- { \exp_args:Nf \@@_item_aux:Nn #1 { \int_eval:n {#2} } }
-\cs_new:Npn \@@_item_aux:Nn #1#2
- {
- \int_compare:nTF { 1 <= #2 <= \@@_count:N #1 }
- { \@@_item_fast:Nn #1 {#2} }
- {
- \__msg_kernel_expandable_error:nnnnn { intarray } { out-of-bounds }
- { \token_to_str:N #1 } {#2} { \@@_count:N #1 }
- 0
- }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \subsection{Messages}
-%
-% \begin{macrocode}
-\__msg_kernel_new:nnnn { intarray } { overflow }
- { Integers~larger~than~2^{30}-1~cannot~be~stored~in~arrays. }
- {
- An~attempt~was~made~to~store~#3~at~position~#2~in~the~array~'#1'.~
- The~largest~allowed~value~#4~will~be~used~instead.
- }
-\__msg_kernel_new:nnnn { intarray } { out-of-bounds }
- { Access~to~an~entry~beyond~an~array's~bounds. }
- {
- An~attempt~was~made~to~access~or~store~data~at~position~#2~of~the~
- array~'#1',~but~this~array~has~entries~at~positions~from~1~to~#3.
- }
-% \end{macrocode}
-%
-% \begin{macrocode}
-%</initex|package>
-% \end{macrocode}
-%
-% \end{implementation}
-%
-% \PrintIndex
Deleted: trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3regex.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3regex.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3regex.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,6423 +0,0 @@
-% \iffalse meta-comment
-%
-%% File: l3regex.dtx Copyright (C) 2011-2017 The LaTeX3 Project
-%
-% It may be distributed and/or modified under the conditions of the
-% LaTeX Project Public License (LPPL), either version 1.3c of this
-% license or (at your option) any later version. The latest version
-% of this license is in the file
-%
-% http://www.latex-project.org/lppl.txt
-%
-% This file is part of the "l3experimental bundle" (The Work in LPPL)
-% and all files in that bundle must be distributed together.
-%
-% -----------------------------------------------------------------------
-%
-% The development version of the bundle can be found at
-%
-% https://github.com/latex3/latex3
-%
-% for those people who are interested.
-%
-%<*driver|package>
-% The version of expl3 required is tested as early as possible, as
-% some really old versions do not define \ProvidesExplPackage.
-\RequirePackage{expl3}[2017/05/13]
-%<package>\@ifpackagelater{expl3}{2017/05/13}
-%<package> {}
-%<package> {%
-%<package> \PackageError{l3regex}{Support package l3kernel too old}
-%<package> {%
-%<package> Please install an up to date version of l3kernel\MessageBreak
-%<package> using your TeX package manager or from CTAN.\MessageBreak
-%<package> \MessageBreak
-%<package> Loading l3regex will abort!%
-%<package> }%
-%<package> \endinput
-%<package> }
-%</driver|package>
-%<*driver>
-\documentclass[full]{l3doc}
-\usepackage{amsmath}
-\begin{document}
- \DocInput{\jobname.dtx}
-\end{document}
-%</driver>
-% \fi
-%
-% \title{^^A
-% The \textsf{l3regex} package: regular expressions in \TeX{}^^A
-% }
-%
-% \author{^^A
-% The \LaTeX3 Project\thanks
-% {^^A
-% E-mail:
-% \href{mailto:latex-team at latex-project.org}
-% {latex-team at latex-project.org}^^A
-% }^^A
-% }
-%
-% \date{Released 2017/05/13}
-%
-% \maketitle
-%
-% \begin{documentation}
-% \newenvironment{l3regex-syntax}
-% {\begin{itemize}\def\\{\char`\\}\def\makelabel##1{\hss\llap{\ttfamily##1}}}
-% {\end{itemize}}
-%
-% \section{\pkg{l3regex} documentation}
-%
-% The \pkg{l3regex} package provides regular expression testing,
-% extraction of submatches, splitting, and replacement, all acting
-% on token lists. The syntax of regular expressions is mostly a subset
-% of the \textsc{pcre} syntax (and very close to \textsc{posix}),
-% with some additions
-% due to the fact that \TeX{} manipulates tokens rather than characters.
-% For performance reasons, only a limited set of features are implemented.
-% Notably, back-references are not supported.
-%
-% Let us give a few examples. After
-% \begin{verbatim}
-% \tl_set:Nn \l_my_tl { That~cat. }
-% \regex_replace_once:nnN { at } { is } \l_my_tl
-% \end{verbatim}
-% the token list variable \cs{l_my_tl} holds the text
-% \enquote{\texttt{This cat.}}, where the first
-% occurrence of \enquote{\texttt{at}} was replaced
-% by \enquote{\texttt{is}}. A more complicated example is
-% a pattern to add a comma at the end of each word:
-% \begin{verbatim}
-% \regex_replace_all:nnN { \w+ } { \0 , } \l_my_tl
-% \end{verbatim}
-% The |\w| sequence represents any \enquote{word} character,
-% and |+| indicates that the |\w| sequence should be repeated
-% as many times as possible (at least once), hence matching a word in the
-% input token list. In the replacement text, |\0| denotes the full match
-% (here, a word).
-%
-% If a regular expression is to be used several times,
-% it can be compiled once, and stored in a regex
-% variable using \cs{regex_const:Nn}. For example,
-% \begin{verbatim}
-% \regex_const:Nn \c_foo_regex { \c{begin} \cB. (\c[^BE].*) \cE. }
-% \end{verbatim}
-% stores in \cs{c_foo_regex} a regular expression which matches the
-% starting marker for an environment: \cs{begin}, followed by a
-% begin-group token (|\cB.|), then any number of tokens which are
-% neither begin-group nor end-group character tokens (|\c[^BE].*|),
-% ending with an end-group token (|\cE.|). As explained in the next
-% section, the parentheses \enquote{capture} the result of |\c[^BE].*|,
-% giving us access to the name of the environment when doing
-% replacements.
-%
-% \subsection{Syntax of regular expressions}
-%
-% Most characters match exactly themselves,
-% with an arbitrary category code. Some characters are
-% special and must be escaped with a backslash (\emph{e.g.}, |\*|
-% matches a star character). Some escape sequences of
-% the form backslash--letter also have a special meaning
-% (for instance |\d| matches any digit). As a rule,
-% \begin{itemize}
-% \item every alphanumeric character (\texttt{A}--\texttt{Z},
-% \texttt{a}--\texttt{z}, \texttt{0}--\texttt{9}) matches
-% exactly itself, and should not be escaped, because
-% |\A|, |\B|, \ldots{} have special meanings;
-% \item non-alphanumeric printable ascii characters can (and should)
-% always be escaped: many of them have special meanings (\emph{e.g.},
-% use |\(|, |\)|, |\?|, |\.|);
-% \item spaces should always be escaped (even in character
-% classes);
-% \item any other character may be escaped or not, without any
-% effect: both versions will match exactly that character.
-% \end{itemize}
-% Note that these rules play nicely with the fact that many
-% non-alphanumeric characters are difficult to input into \TeX{}
-% under normal category codes. For instance, |\\abc\%|
-% matches the characters |\abc%| (with arbitrary category codes),
-% but does not match the control sequence |\abc| followed by a
-% percent character. Matching control sequences can be done
-% using the |\c|\Arg{regex} syntax (see below).
-%
-% Any special character which appears at a place where its special
-% behaviour cannot apply matches itself instead (for instance, a
-% quantifier appearing at the beginning of a string), after raising a
-% warning.
-%
-% Characters.
-% \begin{l3regex-syntax}
-% \item[\\x\{hh\ldots{}\}] Character with hex code \texttt{hh\ldots{}}
-% \item[\\xhh] Character with hex code \texttt{hh}.
-% \item[\\a] Alarm (hex 07).
-% \item[\\e] Escape (hex 1B).
-% \item[\\f] Form-feed (hex 0C).
-% \item[\\n] New line (hex 0A).
-% \item[\\r] Carriage return (hex 0D).
-% \item[\\t] Horizontal tab (hex 09).
-% \end{l3regex-syntax}
-%
-% Character types.
-% \begin{l3regex-syntax}
-% \item[.] A single period matches any token.
-% \item[\\d] Any decimal digit.
-% \item[\\h] Any horizontal space character,
-% equivalent to |[\ \^^I]|: space and tab.
-% \item[\\s] Any space character,
-% equivalent to |[\ \^^I\^^J\^^L\^^M]|.
-% \item[\\v] Any vertical space character,
-% equivalent to |[\^^J\^^K\^^L\^^M]|. Note that |\^^K| is a vertical space,
-% but not a space, for compatibility with Perl.
-% \item[\\w] Any word character, \emph{i.e.},
-% alpha-numerics and underscore, equivalent to |[A-Za-z0-9\_]|.
-% \item[\\D] Any token not matched by |\d|.
-% \item[\\H] Any token not matched by |\h|.
-% \item[\\N] Any token other than the |\n| character (hex 0A).
-% \item[\\S] Any token not matched by |\s|.
-% \item[\\V] Any token not matched by |\v|.
-% \item[\\W] Any token not matched by |\w|.
-% \end{l3regex-syntax}
-% Of those, |.|, |\D|, |\H|, |\N|, |\S|, |\V|, and |\W| will match arbitrary
-% control sequences.
-%
-% Character classes match exactly one token in the subject.
-% \begin{l3regex-syntax}
-% \item[{[\ldots{}]}] Positive character class.
-% Matches any of the specified tokens.
-% \item[{[\char`\^\ldots{}]}] Negative character class.
-% Matches any token other than the specified characters.
-% \item[{x-y}] Within a character class, this denotes a range (can be
-% used with escaped characters).
-% \item[{[:\meta{name}:]}] Within a character class (one more set of
-% brackets), this denotes the \textsc{posix} character class
-% \meta{name}, which can be \texttt{alnum}, \texttt{alpha},
-% \texttt{ascii}, \texttt{blank}, \texttt{cntrl}, \texttt{digit},
-% \texttt{graph}, \texttt{lower}, \texttt{print}, \texttt{punct},
-% \texttt{space}, \texttt{upper}, \texttt{word}, or \texttt{xdigit}.
-% \item[{[:\char`\^\meta{name}:]}] Negative \textsc{posix} character class.
-% \end{l3regex-syntax}
-% For instance, |[a-oq-z\cC.]| matches any lowercase latin letter
-% except |p|, as well as control sequences (see below for a description
-% of |\c|).
-%
-% Quantifiers (repetition).
-% \begin{l3regex-syntax}
-% \item[?] $0$ or $1$, greedy.
-% \item[??] $0$ or $1$, lazy.
-% \item[*] $0$ or more, greedy.
-% \item[*?] $0$ or more, lazy.
-% \item[+] $1$ or more, greedy.
-% \item[+?] $1$ or more, lazy.
-% \item[\{$n$\}] Exactly $n$.
-% \item[\{$n,$\}] $n$ or more, greedy.
-% \item[\{$n,$\}?] $n$ or more, lazy.
-% \item[\{$n,m$\}] At least $n$, no more than $m$, greedy.
-% \item[\{$n,m$\}?] At least $n$, no more than $m$, lazy.
-% \end{l3regex-syntax}
-%
-% Anchors and simple assertions.
-% \begin{l3regex-syntax}
-% \item[\\b] Word boundary: either the previous token is matched by
-% |\w| and the next by |\W|, or the opposite. For this purpose,
-% the ends of the token list are considered as |\W|.
-% \item[\\B] Not a word boundary: between two |\w| tokens
-% or two |\W| tokens (including the boundary).
-% \item[\char`^ \textrm{or} \\A]
-% Start of the subject token list.
-% \item[\char`$\textrm{,} \\Z \textrm{or} \\z]
-% End of the subject token list.
-% \item[\\G] Start of the current match. This is only different from |^|
-% in the case of multiple matches: for instance
-% |\regex_count:nnN { \G a } { aaba } \l_tmpa_int| yields $2$, but
-% replacing |\G| by |^| would result in \cs{l_tmpa_int} holding the
-% value $1$.
-% \end{l3regex-syntax}
-%
-% Alternation and capturing groups.
-% \begin{l3regex-syntax}
-% \item[A\char`|B\char`|C] Either one of \texttt{A}, \texttt{B},
-% or \texttt{C}.
-% \item[(\ldots{})] Capturing group.
-% \item[(?:\ldots{})] Non-capturing group.
-% \item[(?\char`|\ldots{})] Non-capturing group which resets
-% the group number for capturing groups in each alternative.
-% The following group will be numbered with the first unused
-% group number.
-% \end{l3regex-syntax}
-%
-% The |\c| escape sequence allows to test the category code of tokens,
-% and match control sequences. Each character category is represented
-% by a single uppercase letter:
-% \begin{itemize}
-% \item |C| for control sequences;
-% \item |B| for begin-group tokens;
-% \item |E| for end-group tokens;
-% \item |M| for math shift;
-% \item |T| for alignment tab tokens;
-% \item |P| for macro parameter tokens;
-% \item |U| for superscript tokens (up);
-% \item |D| for subscript tokens (down);
-% \item |S| for spaces;
-% \item |L| for letters;
-% \item |O| for others; and
-% \item |A| for active characters.
-% \end{itemize}
-% The |\c| escape sequence is used as follows.
-% \begin{l3regex-syntax}
-% \item[\\c\Arg{regex}] A control sequence whose csname matches the
-% \meta{regex}, anchored at the beginning and end, so that |\c{begin}|
-% matches exactly \cs{begin}, and nothing else.
-% \item[\\cX] Applies to the next object, which can be a character,
-% character property, class, or group, and forces this object to
-% only match tokens with category |X| (any of |CBEMTPUDSLOA|. For
-% instance, |\cL[A-Z\d]| matches uppercase letters and digits of
-% category code letter, |\cC.| matches any control sequence, and
-% |\cO(abc)| matches |abc| where each character has category other.
-% \item[{\\c[XYZ]}] Applies to the next object, and forces it to only
-% match tokens with category |X|, |Y|, or |Z| (each being any of
-% |CBEMTPUDSLOA|). For instance, |\c[LSO](..)| matches two tokens of
-% category letter, space, or other.
-% \item[{\\c[\char`\^XYZ]}] Applies to the next object and prevents it
-% from matching any token with category |X|, |Y|, or |Z| (each being
-% any of |CBEMTPUDSLOA|). For instance, |\c[^O]\d| matches digits
-% which have any category different from other.
-% \end{l3regex-syntax}
-% The category code tests can be used inside classes; for instance,
-% |[\cO\d \c[LO][A-F]]| matches what \TeX{} considers as hexadecimal
-% digits, namely digits with category other, or uppercase letters from
-% |A| to |F| with category either letter or other. Within a group
-% affected by a category code test, the outer test can be overridden by
-% a nested test: for instance, |\cL(ab\cO\*cd)| matches |ab*cd| where
-% all characters are of category letter, except |*| which has category
-% other.
-%
-% The |\u| escape sequence allows to insert the contents of a token list
-% directly into a regular expression or a replacement, avoiding the need
-% to escape special characters. Namely, |\u|\Arg{tl~var~name} matches
-% the exact contents of the token list \meta{tl~var}. Within a |\c{...}|
-% control sequence matching, the |\u| escape sequence only expands its
-% argument once, in effect performing \cs{tl_to_str:v}. Quantifiers are
-% not supported directly: use a group.
-%
-% The option |(?i)| makes the match case insensitive (identifying
-% \texttt{A}--\texttt{Z} with \texttt{a}--\texttt{z}; no Unicode support
-% yet). This applies until the end of the group in which it appears, and
-% can be reverted using |(?-i)|. For instance, in
-% \verb"(?i)(a(?-i)b|c)d", the letters |a| and |d| are affected by the
-% |i| option. Characters within ranges and classes are affected
-% individually: |(?i)[Y-\\]| is equivalent to |[YZ\[\\yz]|, and
-% |(?i)[^aeiou]| matches any character which is not a vowel. Neither
-% character properties, nor |\c{...}| nor |\u{...}| are affected by the
-% |i| option.
-% ^^A \]
-%
-% In character classes, only |[|, |^|, |-|, |]|, |\| and spaces are
-% special, and should be escaped. Other non-alphanumeric characters can
-% still be escaped without harm. Any escape sequence which matches a
-% single character (|\d|, |\D|, \emph{etc.}) is supported in character
-% classes. If the first character is |^|, then
-% the meaning of the character class is inverted; |^| appearing anywhere
-% else in the range is not special. If the first character (possibly
-% following a leading |^|) is |]| then it does not need to be escaped
-% since ending the range there would make it empty.
-% Ranges of characters
-% can be expressed using |-|, for instance, |[\D 0-5]| and |[^6-9]| are
-% equivalent.
-%
-% Capturing groups are a means of extracting information about the
-% match. Parenthesized groups are labelled in the order of their
-% opening parenthesis, starting at $1$. The contents of those groups
-% corresponding to the \enquote{best} match (leftmost longest)
-% can be extracted and stored in a sequence of token lists using for
-% instance \cs{regex_extract_once:nnNTF}.
-%
-% The |\K| escape sequence resets the beginning of the match to the
-% current position in the token list. This only affects what is reported
-% as the full match. For instance,
-% \begin{verbatim}
-% \regex_extract_all:nnN { a \K . } { a123aaxyz } \l_foo_seq
-% \end{verbatim}
-% results in \cs{l_foo_seq} containing the items |{1}| and |{a}|: the
-% true matches are |{a1}| and |{aa}|, but they are trimmed by the use of
-% |\K|. The |\K| command does not affect capturing groups: for instance,
-% \begin{verbatim}
-% \regex_extract_once:nnN { (. \K c)+ \d } { acbc3 } \l_foo_seq
-% \end{verbatim}
-% results in \cs{l_foo_seq} containing the items |{c3}| and |{bc}|: the
-% true match is |{acbc3}|, with first submatch |{bc}|, but |\K| resets
-% the beginning of the match to the last position where it appears.
-%
-% \subsection{Syntax of the replacement text}
-%
-% Most of the features described in regular expressions do not make
-% sense within the replacement text. Backslash introduces various
-% special constructions:
-% \begin{itemize}
-% \item |\0| is the whole match;
-% \item |\1|, |\2|, \ldots{}, |\9| or |\g{|\meta{number}|}| are the
-% submatches (empty if there are fewer than \meta{number} capturing
-% groups);
-% \item \verb*|\ | inserts a space (spaces are ignored when not
-% escaped);
-% \item |\a|, |\e|, |\f|, |\n|, |\r|, |\t|, |\xhh|, |\x{hhh}|
-% correspond to single characters as in regular expressions;
-% \item |\c|\Arg{cs~name} inserts a control sequence;
-% \item |\c|\meta{category}\meta{character} (see below);
-% \item |\u|\Arg{tl~var~name} inserts the contents of the
-% \meta{tl~var} (see below).
-% \end{itemize}
-% Characters other than backslash and space are simply inserted in the
-% result (but since the replacement text is first converted to a string,
-% one should also escape characters that are special for \TeX{}, for
-% instance use~|\#|). Non-alphanumeric characters can always be safely
-% escaped with a backslash.
-%
-% For instance,
-% \begin{verbatim}
-% \tl_set:Nn \l_my_tl { Hello,~world! }
-% \regex_replace_all:nnN { ([er]?l|o) . } { (\0--\1) } \l_my_tl
-% \end{verbatim}
-% results in \cs{l_my_tl} holding |H(ell--el)(o,--o) w(or--o)(ld--l)!|
-%
-% Submatches always keep the same category codes as in the original
-% token list.
-% The characters inserted by the replacement have category code $12$
-% (other) by default, with the exception of space characters. Spaces
-% inserted through \verb*|\ | have category code $10$, while spaces
-% inserted through |\x20| or |\x{20}| have category code $12$.
-% The escape sequence |\c| allows to insert characters
-% with arbitrary category codes, as well as control sequences.
-% \begin{l3regex-syntax}
-% \item[\\cX(\ldots{})] Produces the characters \enquote{\ldots{}} with
-% category~|X|, which must be one of |CBEMTPUDSLOA| as in regular
-% expressions. Parentheses are optional for a single character (which
-% can be an escape sequence). This can be nested, for instance
-% |\cL(Hello\cS\ world)!|
-% \item[\\c\Arg{text}] Produces the control sequence with csname
-% \meta{text}. The \meta{text} may contain references to the
-% submatches |\0|, |\1|, and so on, as in the example for |\u| below.
-% \end{l3regex-syntax}
-%
-% The escape sequence |\u|\Arg{tl~var~name} allows to insert the
-% contents of the token list with name \meta{tl~var~name} directly into
-% the replacement, giving an easier control of category codes.
-% Within |\c{|\ldots{}|}| and |\u{|\ldots{}|}| constructions, the |\u|
-% and |\c|~escape sequences perform \cs{tl_to_str:v}, namely extract the
-% value of the control sequence and turn it into a string.
-%
-% Matches can be used within the arguments of |\c| and |\u|. For
-% instance,
-% \begin{verbatim}
-% \tl_set:Nn \l_my_one_tl { first }
-% \tl_set:Nn \l_my_two_tl { \emph{second} }
-% \tl_set:Nn \l_my_tl { one , two , one , one }
-% \regex_replace_all:nnN { [^,]+ } { \u{l_my_\0_tl} } \l_my_tl
-% \end{verbatim}
-% results in \cs{l_my_tl} holding |first,\emph{second},first,first|.
-%
-% \subsection{Pre-compiling regular expressions}
-%
-% If a regular expression is to be used several times,
-% it is better to compile it once rather than doing it
-% each time the regular expression is used. The compiled
-% regular expression is stored in a variable. All
-% of the \pkg{l3regex} module's functions can be given their
-% regular expression argument either as an explicit string
-% or as a compiled regular expression.
-%
-% \begin{function}{\regex_new:N}
-% \begin{syntax}
-% \cs{regex_new:N} \meta{regex~var}
-% \end{syntax}
-% Creates a new \meta{regex~var} or raises an error if the
-% name is already taken. The declaration is global. The
-% \meta{regex~var} will initially be such that it never matches.
-% \end{function}
-%
-% \begin{function}{\regex_set:Nn, \regex_gset:Nn, \regex_const:Nn}
-% \begin{syntax}
-% \cs{regex_set:Nn} \meta{regex~var} \Arg{regex}
-% \end{syntax}
-% Stores a compiled version of the \meta{regular expression}
-% in the \meta{regex~var}. For instance, this function can be used
-% as
-% \begin{verbatim}
-% \regex_new:N \l_my_regex
-% \regex_set:Nn \l_my_regex { my\ (simple\ )? reg(ex|ular\ expression) }
-% \end{verbatim}
-% The assignment is local for \cs{regex_set:Nn} and global for
-% \cs{regex_gset:Nn}. Use \cs{regex_const:Nn} for compiled expressions
-% which will never change.
-% \end{function}
-%
-% \begin{function}{\regex_show:n, \regex_show:N}
-% \begin{syntax}
-% \cs{regex_show:n} \Arg{regex}
-% \end{syntax}
-% Shows how \pkg{l3regex} interprets the \meta{regex}. For instance,
-% \cs{regex_show:n} \verb+{\A X|Y}+ shows
-% \begin{verbatim}
-% +-branch
-% anchor at start (\A)
-% char code 88
-% +-branch
-% char code 89
-% \end{verbatim}
-% indicating that the anchor |\A| only applies to the first branch:
-% the second branch is not anchored to the beginning of the match.
-% \end{function}
-%
-% \subsection{Matching}
-%
-% All regular expression functions are available in both |:n| and |:N|
-% variants. The former require a \enquote{standard} regular expression,
-% while the later require a compiled expression as generated by
-% \cs{regex_(g)set:Nn}.
-%
-% \begin{function}[TF]{\regex_match:nn, \regex_match:Nn}
-% \begin{syntax}
-% \cs{regex_match:nnTF} \Arg{regex} \Arg{token list} \Arg{true code} \Arg{false code}
-% \end{syntax}
-% Tests whether the \meta{regular expression} matches any part
-% of the \meta{token list}. For instance,
-% \begin{verbatim}
-% \regex_match:nnTF { b [cde]* } { abecdcx } { TRUE } { FALSE }
-% \regex_match:nnTF { [b-dq-w] } { example } { TRUE } { FALSE }
-% \end{verbatim}
-% leaves \texttt{TRUE} then \texttt{FALSE} in the input stream.
-% \end{function}
-%
-% \begin{function}{\regex_count:nnN, \regex_count:NnN}
-% \begin{syntax}
-% \cs{regex_count:nnN} \Arg{regex} \Arg{token list} \meta{int var}
-% \end{syntax}
-% Sets \meta{int var} within the current \TeX{} group level
-% equal to the number of times
-% \meta{regular expression} appears in \meta{token list}.
-% The search starts by finding the left-most longest match,
-% respecting greedy and ungreedy operators. Then the search
-% starts again from the character following the last character
-% of the previous match, until reaching the end of the token list.
-% Infinite loops are prevented in the case where the regular expression
-% can match an empty token list: then we count one match between each
-% pair of characters.
-% For instance,
-% \begin{verbatim}
-% \int_new:N \l_foo_int
-% \regex_count:nnN { (b+|c) } { abbababcbb } \l_foo_int
-% \end{verbatim}
-% results in \cs{l_foo_int} taking the value $5$.
-% \end{function}
-%
-% \subsection{Submatch extraction}
-%
-% \begin{function}[TF]{\regex_extract_once:nnN, \regex_extract_once:NnN}
-% \begin{syntax}
-% \cs{regex_extract_once:nnN} \Arg{regex} \Arg{token list} \meta{seq~var}
-% \cs{regex_extract_once:nnNTF} \Arg{regex} \Arg{token list} \meta{seq~var} \Arg{true code} \Arg{false code}
-% \end{syntax}
-% Finds the first match of the \meta{regular expression}
-% in the \meta{token list}. If it exists, the match is stored
-% as the zeroeth item of the \meta{seq~var}, and further
-% items are the contents of capturing groups, in the order
-% of their opening parenthesis. The \meta{seq~var}
-% is assigned locally. If there is no match,
-% the \meta{seq~var} is cleared.
-% The testing versions insert the \meta{true code} into the input
-% stream if a match was found, and the \meta{false code} otherwise.
-% For instance, assume that you type
-% \begin{verbatim}
-% \regex_extract_once:nnNTF { \A(La)?TeX(!*)\Z } { LaTeX!!! } \l_foo_seq
-% { true } { false }
-% \end{verbatim}
-% Then the regular expression (anchored at the start with |\A| and
-% at the end with |\Z|) will match the whole token list. The first
-% capturing group, |(La)?|, matches |La|, and the second capturing
-% group, |(!*)|, matches |!!!|. Thus, |\l_foo_seq| will contain
-% the items |{LaTeX!!!}|, |{La}|, and |{!!!}|, and the \texttt{true}
-% branch is left in the input stream.
-% \end{function}
-%
-% \begin{function}[TF]{\regex_extract_all:nnN, \regex_extract_all:NnN}
-% \begin{syntax}
-% \cs{regex_extract_all:nnN} \Arg{regex} \Arg{token list} \meta{seq~var}
-% \cs{regex_extract_all:nnNTF} \Arg{regex} \Arg{token list} \meta{seq~var} \Arg{true code} \Arg{false code}
-% \end{syntax}
-% Finds all matches of the \meta{regular expression}
-% in the \meta{token list}, and stores all the submatch information
-% in a single sequence (concatenating the results of
-% multiple \cs{regex_extract_once:nnN} calls).
-% The \meta{seq~var} is assigned locally. If there is no match,
-% the \meta{seq~var} is cleared.
-% The testing versions insert the \meta{true code} into the input
-% stream if a match was found, and the \meta{false code} otherwise.
-% For instance, assume that you type
-% \begin{verbatim}
-% \regex_extract_all:nnNTF { \w+ } { Hello,~world! } \l_foo_seq
-% { true } { false }
-% \end{verbatim}
-% Then the regular expression will match twice, and the resulting
-% sequence contains the two items |{Hello}| and |{world}|,
-% and the \texttt{true} branch is left in the input stream.
-% \end{function}
-%
-% \begin{function}[TF]{\regex_split:nnN, \regex_split:NnN}
-% \begin{syntax}
-% \cs{regex_split:nnN} \Arg{regular expression} \Arg{token list} \meta{seq~var}
-% \cs{regex_split:nnNTF} \Arg{regular expression} \Arg{token list} \meta{seq~var} \Arg{true code} \Arg{false code}
-% \end{syntax}
-% Splits the \meta{token list} into a sequence of parts, delimited by
-% matches of the \meta{regular expression}. If the \meta{regular expression}
-% has capturing groups, then the token lists that they match are stored as
-% items of the sequence as well. The assignment to \meta{seq~var} is local.
-% If no match is found the resulting \meta{seq~var} has the
-% \meta{token list} as its sole item. If the \meta{regular expression}
-% matches the empty token list, then the \meta{token list} is split
-% into single tokens.
-% The testing versions insert the \meta{true code} into the input
-% stream if a match was found, and the \meta{false code} otherwise.
-% For example, after
-% \begin{verbatim}
-% \seq_new:N \l_path_seq
-% \regex_split:nnNTF { / } { the/path/for/this/file.tex } \l_path_seq
-% { true } { false }
-% \end{verbatim}
-% the sequence |\l_path_seq| contains the items |{the}|, |{path}|,
-% |{for}|, |{this}|, and |{file.tex}|, and the \texttt{true} branch
-% is left in the input stream.
-% \end{function}
-%
-% \subsection{Replacement}
-%
-% \begin{function}[TF]{\regex_replace_once:nnN,\regex_replace_once:NnN}
-% \begin{syntax}
-% \cs{regex_replace_once:nnN} \Arg{regular expression} \Arg{replacement} \meta{tl~var}
-% \cs{regex_replace_once:nnNTF} \Arg{regular expression} \Arg{replacement} \meta{tl~var} \Arg{true code} \Arg{false code}
-% \end{syntax}
-% Searches for the \meta{regular expression} in the \meta{token list}
-% and replaces the first match with the \meta{replacement}. The result
-% is assigned locally to \meta{tl~var}. In the \meta{replacement},
-% |\0| represents the full match, |\1| represent the contents of the
-% first capturing group, |\2| of the second, \emph{etc.}
-% \end{function}
-%
-% \begin{function}[TF]{\regex_replace_all:nnN, \regex_replace_all:NnN}
-% \begin{syntax}
-% \cs{regex_replace_all:nnN} \Arg{regular expression} \Arg{replacement} \meta{tl~var}
-% \cs{regex_replace_all:nnNTF} \Arg{regular expression} \Arg{replacement} \meta{tl~var} \Arg{true code} \Arg{false code}
-% \end{syntax}
-% Replaces all occurrences of the \cs{regular expression} in the
-% \meta{token list} by the \meta{replacement}, where |\0| represents
-% the full match, |\1| represent the contents of the first capturing
-% group, |\2| of the second, \emph{etc.} Every match is treated
-% independently, and matches cannot overlap. The result is assigned
-% locally to \meta{tl~var}.
-% \end{function}
-%
-% \subsection{Bugs, misfeatures, future work, and other possibilities}
-%
-% The following need to be done now.
-% \begin{itemize}
-% \item Change user function names!
-% \item Clean up the use of messages.
-% \item Rewrite the documentation in a more ordered way, perhaps add a
-% \textsc{bnf}?
-% \end{itemize}
-%
-% Additional error-checking to come.
-% \begin{itemize}
-% \item Currently, |a{\x34}| is recognized as |a{4}|.
-% \item Cleaner error reporting in the replacement phase.
-% \item Add tracing information.
-% \item Detect attempts to use back-references and other
-% non-implemented syntax.
-% \item Test for the maximum register \cs{c_max_register_int}.
-% \item Find out whether the fact that |\W| and friends match the
-% end-marker leads to bugs. Possibly update \cs{__regex_item_reverse:n}.
-% \item Enforce that |\cC| can only be followed by a match-all dot.
-% \item The empty cs should be matched by |\c{}|, not by
-% |\c{csname.?endcsname\s?}|.
-% \end{itemize}
-%
-% Code improvements to come.
-% \begin{itemize}
-% \item Shift arrays so that the useful information starts at
-% position~$1$.
-% \item Only build \c{...} once.
-% \item Use arrays for the left and right state stacks when
-% compiling a regex.
-% \item Should \cs{__regex_action_free_group:n} only be used for greedy
-% |{n,}| quantifier? (I think not.)
-% \item Quantifiers for |\u| and assertions.
-% \item When matching, keep track of an explicit stack of
-% \texttt{current_state} and \texttt{current_submatches}.
-% \item If possible, when a state is reused by the same thread, kill
-% other subthreads.
-% \item Use an array rather than \cs{l__regex_balance_tl}
-% to build \cs{__regex_replacement_balance_one_match:n}.
-% \item Reduce the number of epsilon-transitions in alternatives.
-% \item Optimize simple strings: use less states (|abcade| should give
-% two states, for |abc| and |ade|). [Does that really make sense?]
-% \item Optimize groups with no alternative.
-% \item Optimize states with a single \cs{__regex_action_free:n}.
-% \item Optimize the use of \cs{__regex_action_success:} by inserting it
-% in state $2$ directly instead of having an extra transition.
-% \item Optimize the use of \cs{int_step_...} functions.
-% \item Groups don't capture within regexes for csnames; optimize and
-% document.
-% \item Better \enquote{show} for anchors, properties, and catcode tests.
-% \item Does |\K| really need a new state for itself?
-% \item When compiling, use a boolean \texttt{in_cs} and less magic
-% numbers.
-% \item Instead of checking whether the character is special or
-% alphanumeric using its character code, check if it is special in
-% regexes with \cs{cs_if_exist} tests.
-% \end{itemize}
-%
-% The following features are likely to be implemented at some point
-% in the future.
-% \begin{itemize}
-% \item General look-ahead/behind assertions.
-% \item Regex matching on external files.
-% \item Conditional subpatterns with look ahead/behind: \enquote{if
-% what follows is [\ldots{}], then [\ldots{}]}.
-% \item |(*..)| and |(?..)| sequences to set some options.
-% \item UTF-8 mode for pdf\TeX{}.
-% \item Newline conventions are not done.
-% In particular, we should have an option for |.| not to match newlines.
-% Also, |\A| should differ from |^|, and |\Z|, |\z| and |$| should
-% differ.
-% \item Unicode properties: |\p{..}| and |\P{..}|;
-% |\X| which should match any \enquote{extended} Unicode sequence.
-% This requires to manipulate a lot of data, probably using tree-boxes.
-% \end{itemize}
-%
-% The following features of \textsc{pcre} or Perl may or may not be
-% implemented.
-% \begin{itemize}
-% \item |\ddd|, matching the character with octal code \texttt{ddd};
-% \item Callout with |(?C...)|;
-% \item Conditional subpatterns (other than with a look-ahead or
-% look-behind condition): this is non-regular, isn't it?
-% \item Named subpatterns: \TeX{} programmers have lived so far
-% without any need for named macro parameters.
-% \end{itemize}
-%
-% The following features of \textsc{pcre} or Perl will definitely not be
-% implemented.
-% \begin{itemize}
-% \item |\cx|, similar to \TeX{}'s own |\^^x|;
-% \item Comments: \TeX{} already has its own system for comments.
-% \item |\Q...\E| escaping: this would require to read the argument
-% verbatim, which is not in the scope of this module.
-% \item Atomic grouping, possessive quantifiers: those tools, mostly
-% meant to fix catastrophic backtracking, are unnecessary in a
-% non-backtracking algorithm, and difficult to implement.
-% \item Subroutine calls: this syntactic sugar is difficult to include
-% in a non-backtracking algorithm, in particular because the
-% corresponding group should be treated as atomic.
-% \item Recursion: this is a non-regular feature.
-% \item Back-references: non-regular feature, this requires
-% backtracking, which is prohibitively slow.
-% \item Backtracking control verbs: intrinsically tied to
-% backtracking.
-% \item |\C| single byte in UTF-8 mode: Xe\TeX{} and Lua\TeX{} serve
-% us characters directly, and splitting those into bytes is tricky,
-% encoding dependent, and most likely not useful anyways.
-% \end{itemize}
-%
-% \end{documentation}
-%
-% \begin{implementation}
-%
-% \section{\pkg{l3regex} implementation}
-%
-% \begin{macrocode}
-%<*initex|package>
-% \end{macrocode}
-%
-% \begin{macrocode}
-%<@@=regex>
-% \end{macrocode}
-%
-% \begin{macrocode}
-%<*package>
-\ProvidesExplPackage{l3regex}{2017/05/13}{}
- {L3 Experimental regular expressions}
-\RequirePackage{l3tl-build, l3tl-analysis, l3intarray}
-%</package>
-% \end{macrocode}
-%
-% \subsection{Plan of attack}
-%
-% Most regex engines use backtracking. This allows to provide very
-% powerful features (back-references come to mind first), but it is
-% costly, and raises the problem of catastrophic backtracking. Since
-% \TeX{} is not first and foremost a programming language, complicated
-% code tends to run slowly, and we must use faster, albeit slightly more
-% restrictive, techniques, coming from automata theory.
-%
-% Given a regular expression of $n$ characters, we do the following:
-% \begin{itemize}
-% \item (Compiling.) Analyse the regex, finding invalid input, and
-% convert it to an internal representation.
-% \item (Building.) Convert the compiled regex to a non-deterministic
-% finite automaton (\textsc{nfa}) with $O(n)$ states which
-% accepts precisely token lists matching that regex.
-% \item (Matching.) Loop through the query token list one token (one
-% \enquote{position}) at a time, exploring in parallel every
-% possible path (\enquote{active thread}) through the \textsc{nfa},
-% considering active threads in an order determined by the
-% quantifiers' greediness.
-% \end{itemize}
-%
-% We use the following vocabulary in the code comments (and in variable
-% names).
-% \begin{itemize}
-% \item \emph{Group}: index of the capturing group, $-1$ for
-% non-capturing groups.
-% \item \emph{Position}: each token in the query is labelled by an
-% integer \meta{position}, with $\texttt{min_pos} - 1 \leq
-% \meta{position} \leq \texttt{max_pos}$. The lowest and highest
-% positions correspond to imaginary begin and end markers (with
-% inaccessible category code and character code).
-% \item \emph{Query}: the token list to which we apply the regular
-% expression.
-% \item \emph{State}: each state of the \textsc{nfa} is labelled by an
-% integer \meta{state} with $\texttt{min_state} \leq \meta{state} <
-% \texttt{max_state}$.
-% \item \emph{Active thread}: state of the \textsc{nfa} that is reached
-% when reading the query token list for the matching. Those threads
-% are ordered according to the greediness of quantifiers.
-% \item \emph{Step}: used when matching, starts at $0$, incremented
-% every time a character is read, and is not reset when searching
-% for repeated matches. The integer \cs{l_@@_step_int} is a
-% unique id for all the steps of the matching algorithm.
-% \end{itemize}
-%
-% We use \pkg{l3intarray} to manipulate arrays of integers (stored into
-% some dimension registers in scaled points). We also abuse \TeX{}'s
-% \tn{toks} registers, by accessing them directly by number rather than
-% tying them to control sequence using the \tn{newtoks} allocation
-% functions. Specifically, these arrays and \tn{toks} are used as
-% follows. When compiling, \tn{toks} registers are used under the hood
-% by functions from the \pkg{l3tl-build} module. When building,
-% \tn{toks}\meta{state} holds the tests and actions to perform in the
-% \meta{state} of the \textsc{nfa}. When matching,
-% \begin{itemize}
-% \item \cs{g_@@_state_active_intarray} holds the last \meta{step} in
-% which each \meta{state} was active.
-% \item \cs{g_@@_thread_state_intarray} maps each \meta{thread} (with
-% $\texttt{min_active} \leq \meta{thread} < \texttt{max_active}$) to
-% the \meta{state} in which the \meta{thread} currently is. The
-% \meta{threads} or ordered starting from the best to the least
-% preferred.
-% \item \tn{toks}\meta{thread} holds the submatch information for the
-% \meta{thread}, as the contents of a property list.
-% \item \cs{g_@@_charcode_intarray} and \cs{g_@@_catcode_intarray} hold the
-% character codes and category codes of tokens at each
-% \meta{position} in the query.
-% \item \cs{g_@@_balance_intarray} holds the balance of begin-group and
-% end-group character tokens which appear before that point in the
-% token list.
-% \item \tn{toks}\meta{position} holds \meta{tokens} which \texttt{o}-
-% and \texttt{x}-expand to the \meta{position}-th token in the query.
-% \item \cs{g_@@_submatch_prev_intarray}, \cs{g_@@_submatch_begin_intarray}
-% and \cs{g_@@_submatch_end_intarray} hold, for each submatch (as would
-% be extracted by \cs{regex_extract_all:nnN}), the place where the
-% submatch started to be looked for and its two end-points. For
-% historical reasons, the minimum index is twice \texttt{max_state},
-% and the used registers go up to \cs{l_@@_submatch_int}. They are
-% organized in blocks of \cs{l_@@_capturing_group_int} entries, each
-% block corresponding to one match with all its submatches stored in
-% consecutive entries.
-% \end{itemize}
-% \tn{count} registers are not abused, which means that we can safely
-% use named integers in this module. Note that \tn{box} registers are
-% not abused either; maybe we could leverage those for some purpose.
-%
-% The code is structured as follows. Variables are introduced in the
-% relevant section. First we present some generic helper functions. Then
-% comes the code for compiling a regular expression, and for showing the
-% result of the compilation. The building phase converts a compiled
-% regex to \textsc{nfa} states, and the automaton is run by the code in
-% the following section. The only remaining brick is parsing the
-% replacement text and performing the replacement. We are then ready for
-% all the user functions. Finally, messages, and a little bit of tracing
-% code.
-%
-% \subsection{Helpers}
-%
-% \begin{macro}[aux]{\tl_to_str:V}
-% A variant we need for the |\u| escape in the replacement text.
-% \begin{macrocode}
-\cs_generate_variant:Nn \tl_to_str:n { V }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_standard_escapechar:}
-% Make the \tn{escapechar} into the standard backslash.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_standard_escapechar:
- { \int_set:Nn \tex_escapechar:D { `\\ } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int, EXP]{\@@_toks_use:w}
-% Unpack a \tn{toks} given its number.
-% \begin{macrocode}
-\cs_new:Npn \@@_toks_use:w { \tex_the:D \tex_toks:D }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_toks_clear:N, \@@_toks_set:Nn, \@@_toks_set:No}
-% Empty a \tn{toks} or set it to a value, given its number.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_toks_clear:N #1
- { \tex_toks:D #1 { } }
-\cs_new_eq:NN \@@_toks_set:Nn \tex_toks:D
-\cs_new_protected:Npn \@@_toks_set:No #1
- { \@@_toks_set:Nn #1 \exp_after:wN }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_toks_memcpy:NNn}
-% Copy |#3| \tn{toks} registers from |#2| onwards to |#1| onwards,
-% like |C|'s |memcpy|.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_toks_memcpy:NNn #1#2#3
- {
- \prg_replicate:nn {#3}
- {
- \tex_toks:D #1 = \tex_toks:D #2
- \int_incr:N #1
- \int_incr:N #2
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_toks_put_left:Nx}
-% \begin{macro}[int]{\@@_toks_put_right:Nx, \@@_toks_put_right:Nn}
-% During the building phase we wish to add \texttt{x}-expanded
-% material to \tn{toks}, either to the left or to the right. The
-% expansion is done \enquote{by hand} for optimization (these
-% operations are used quite a lot). The \texttt{Nn} version of
-% \cs{@@_toks_put_right:Nx} is provided because it is more
-% efficient than \texttt{x}-expanding with \cs{exp_not:n}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_toks_put_left:Nx #1#2
- {
- \cs_set:Npx \@@_tmp:w { #2 }
- \tex_toks:D #1 \exp_after:wN \exp_after:wN \exp_after:wN
- { \exp_after:wN \@@_tmp:w \tex_the:D \tex_toks:D #1 }
- }
-\cs_new_protected:Npn \@@_toks_put_right:Nx #1#2
- {
- \cs_set:Npx \@@_tmp:w {#2}
- \tex_toks:D #1 \exp_after:wN
- { \tex_the:D \tex_toks:D \exp_after:wN #1 \@@_tmp:w }
- }
-\cs_new_protected:Npn \@@_toks_put_right:Nn #1#2
- { \tex_toks:D #1 \exp_after:wN { \tex_the:D \tex_toks:D #1 #2 } }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int, rEXP]{\@@_current_cs_to_str:}
-% Expands to the string representation of the token (known to be a
-% control sequence) at the current position \cs{l_@@_current_pos_int}.
-% It should only be used in \texttt{x}-expansion to avoid losing a
-% leading space.
-% \begin{macrocode}
-\cs_new:Npn \@@_current_cs_to_str:
- {
- \exp_after:wN \exp_after:wN \exp_after:wN \cs_to_str:N
- \tex_the:D \tex_toks:D \l_@@_current_pos_int
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Constants and variables}
-%
-% \begin{macro}[aux]{\@@_tmp:w}
-% Temporary function used for various short-term purposes.
-% \begin{macrocode}
-\cs_new:Npn \@@_tmp:w { }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{variable}
-% {
-% \l_@@_internal_a_tl, \l_@@_internal_b_tl,
-% \l_@@_internal_a_int, \l_@@_internal_b_int,
-% \l_@@_internal_c_int, \l_@@_internal_bool,
-% \l_@@_internal_seq, \g_@@_internal_tl,
-% }
-% Temporary variables used for various purposes.
-% \begin{macrocode}
-\tl_new:N \l_@@_internal_a_tl
-\tl_new:N \l_@@_internal_b_tl
-\int_new:N \l_@@_internal_a_int
-\int_new:N \l_@@_internal_b_int
-\int_new:N \l_@@_internal_c_int
-\bool_new:N \l_@@_internal_bool
-\seq_new:N \l_@@_internal_seq
-\tl_new:N \g_@@_internal_tl
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\c_@@_no_match_regex}
-% This regular expression matches nothing, but is still a valid
-% regular expression. We could use a failing assertion, but I went for
-% an empty class. It is used as the initial value for regular
-% expressions declared using \cs{regex_new:N}.
-% \begin{macrocode}
-\tl_const:Nn \c_@@_no_match_regex
- {
- \@@_branch:n
- { \@@_class:NnnnN \c_true_bool { } { 1 } { 0 } \c_true_bool }
- }
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\g_@@_charcode_intarray, \g_@@_catcode_intarray, \g_@@_balance_intarray}
-% The first thing we do when matching is to go once through the query
-% token list and store the information for each token into
-% \cs{g_@@_charcode_intarray}, \cs{g_@@_catcode_intarray} and \tn{toks}
-% registers. We also store the balance of begin-group/end-group
-% characters into \cs{g_@@_balance_intarray}.
-% \begin{macrocode}
-\__intarray_new:Nn \g_@@_charcode_intarray { 65536 }
-\__intarray_new:Nn \g_@@_catcode_intarray { 65536 }
-\__intarray_new:Nn \g_@@_balance_intarray { 65536 }
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_balance_int}
-% During this phase, \cs{l_@@_balance_int} counts the balance of
-% begin-group and end-group character tokens which appear before a
-% given point in the token list. This variable is also used to keep
-% track of the balance in the replacement text.
-% \begin{macrocode}
-\int_new:N \l_@@_balance_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_cs_name_tl}
-% This variable is used in \cs{@@_item_cs:n} to store the csname of
-% the currently-tested token when the regex contains a sub-regex for
-% testing csnames.
-% \begin{macrocode}
-\tl_new:N \l_@@_cs_name_tl
-% \end{macrocode}
-% \end{variable}
-%
-% \subsubsection{Testing characters}
-%
-% \begin{macro}{\c_@@_ascii_min_int, \c_@@_ascii_max_control_int, \c_@@_ascii_max_int}
-% \begin{macrocode}
-\int_const:Nn \c_@@_ascii_min_int { 0 }
-\int_const:Nn \c_@@_ascii_max_control_int { 31 }
-\int_const:Nn \c_@@_ascii_max_int { 127 }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{variable}{\c_@@_ascii_lower_int}
-% \begin{macrocode}
-\int_const:Nn \c_@@_ascii_lower_int { `a - `A }
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{macro}[int]{\@@_break_point:TF}
-% \begin{macro}[int]{\@@_break_true:w}
-% When testing whether a character of the query token list matches
-% a given character class in the regular expression, we often
-% have to test it against several ranges of characters, checking
-% if any one of those matches. This is done with a structure like
-% \begin{quote}
-% \meta{test1} \ldots{} \meta{test$\sb{n}$} \\
-% \cs{@@_break_point:TF} \Arg{true code} \Arg{false code}
-% \end{quote}
-% If any of the tests succeeds, it calls \cs{@@_break_true:w},
-% which cleans up and leaves \meta{true code} in the input stream.
-% Otherwise, \cs{@@_break_point:TF} leaves the \meta{false code}
-% in the input stream.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_break_true:w
- #1 \@@_break_point:TF #2 #3 {#2}
-\cs_new_protected:Npn \@@_break_point:TF #1 #2 { #2 }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_item_reverse:n}
-% This function makes showing regular expressions easier, and lets us
-% define |\D| in terms of |\d| for instance. There is a subtlety: the
-% end of the query is marked by $-2$, and will thus match |\D| and
-% other negated properties; this case is caught by another part of
-% the code.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_item_reverse:n #1
- {
- #1
- \@@_break_point:TF { } \@@_break_true:w
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]
-% {\@@_item_caseful_equal:n, \@@_item_caseful_range:nn}
-% Simple comparisons triggering \cs{@@_break_true:w} when true.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_item_caseful_equal:n #1
- {
- \if_int_compare:w #1 = \l_@@_current_char_int
- \exp_after:wN \@@_break_true:w
- \fi:
- }
-\cs_new_protected:Npn \@@_item_caseful_range:nn #1 #2
- {
- \reverse_if:N \if_int_compare:w #1 > \l_@@_current_char_int
- \reverse_if:N \if_int_compare:w #2 < \l_@@_current_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \@@_break_true:w
- \fi:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]
-% {\@@_item_caseless_equal:n, \@@_item_caseless_range:nn}
-% For caseless matching, we perform the test both on the
-% \texttt{current_char} and on the \texttt{case_changed_char}. Before
-% doing the second set of tests, we make sure that
-% \texttt{case_changed_char} has been computed.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_item_caseless_equal:n #1
- {
- \if_int_compare:w #1 = \l_@@_current_char_int
- \exp_after:wN \@@_break_true:w
- \fi:
- \if_int_compare:w \l_@@_case_changed_char_int = \c_max_int
- \@@_compute_case_changed_char:
- \fi:
- \if_int_compare:w #1 = \l_@@_case_changed_char_int
- \exp_after:wN \@@_break_true:w
- \fi:
- }
-\cs_new_protected:Npn \@@_item_caseless_range:nn #1 #2
- {
- \reverse_if:N \if_int_compare:w #1 > \l_@@_current_char_int
- \reverse_if:N \if_int_compare:w #2 < \l_@@_current_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \@@_break_true:w
- \fi:
- \fi:
- \if_int_compare:w \l_@@_case_changed_char_int = \c_max_int
- \@@_compute_case_changed_char:
- \fi:
- \reverse_if:N \if_int_compare:w #1 > \l_@@_case_changed_char_int
- \reverse_if:N \if_int_compare:w #2 < \l_@@_case_changed_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \@@_break_true:w
- \fi:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_compute_case_changed_char:}
-% This function is called when \cs{l_@@_case_changed_char_int} has
-% not yet been computed (or rather, when it is set to the marker value
-% \cs{c_max_int}). If the current character code is in the range
-% $[65,90]$ (upper-case), then add $32$, making it lowercase. If it is
-% in the lower-case letter range $[97,122]$, subtract $32$.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compute_case_changed_char:
- {
- \int_set_eq:NN \l_@@_case_changed_char_int \l_@@_current_char_int
- \if_int_compare:w \l_@@_current_char_int > `Z \exp_stop_f:
- \if_int_compare:w \l_@@_current_char_int > `z \exp_stop_f: \else:
- \if_int_compare:w \l_@@_current_char_int < `a \exp_stop_f: \else:
- \int_sub:Nn \l_@@_case_changed_char_int { \c_@@_ascii_lower_int }
- \fi:
- \fi:
- \else:
- \if_int_compare:w \l_@@_current_char_int < `A \exp_stop_f: \else:
- \int_add:Nn \l_@@_case_changed_char_int { \c_@@_ascii_lower_int }
- \fi:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int, EXP]{\@@_item_equal:n, \@@_item_range:nn}
-% Those must always be defined to expand to a \texttt{caseful}
-% (default) or \texttt{caseless} version, and not be protected: they
-% must expand when compiling, to hard-code which tests are caseless or
-% caseful.
-% \begin{macrocode}
-\cs_new_eq:NN \@@_item_equal:n ?
-\cs_new_eq:NN \@@_item_range:nn ?
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_item_catcode:nT, \@@_item_catcode_reverse:nT}
-% \begin{macro}[aux]{\@@_item_catcode:}
-% The argument is a sum of powers of $4$ with exponents given by the
-% allowed category codes (between $0$ and $13$). Dividing by a given
-% power of $4$ gives an odd result if and only if that category code
-% is allowed. If the catcode does not match, then skip the character
-% code tests which follow.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_item_catcode:
- {
- "
- \if_case:w \l_@@_current_catcode_int
- 1 \or: 4 \or: 10 \or: 40
- \or: 100 \or: \or: 1000 \or: 4000
- \or: 10000 \or: \or: 100000 \or: 400000
- \or: 1000000 \or: 4000000 \else: 1*0
- \fi:
- }
-\cs_new_protected:Npn \@@_item_catcode:nT #1
- {
- \if_int_odd:w \__int_eval:w #1 / \@@_item_catcode: \__int_eval_end:
- \exp_after:wN \use:n
- \else:
- \exp_after:wN \use_none:n
- \fi:
- }
-\cs_new_protected:Npn \@@_item_catcode_reverse:nT #1#2
- { \@@_item_catcode:nT {#1} { \@@_item_reverse:n {#2} } }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_item_exact:nn, \@@_item_exact_cs:n}
-% This matches an exact \meta{category}-\meta{character code} pair, or
-% an exact control sequence, more precisely one of several possible control sequences.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_item_exact:nn #1#2
- {
- \if_int_compare:w #1 = \l_@@_current_catcode_int
- \if_int_compare:w #2 = \l_@@_current_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \@@_break_true:w
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \@@_item_exact_cs:n #1
- {
- \int_compare:nNnTF \l_@@_current_catcode_int = 0
- {
- \tl_set:Nx \l_@@_internal_a_tl
- { \scan_stop: \@@_current_cs_to_str: \scan_stop: }
- \tl_if_in:noTF { \scan_stop: #1 \scan_stop: } \l_@@_internal_a_tl
- { \@@_break_true:w } { }
- }
- { }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_item_cs:n}
-% Match a control sequence (the argument is a compiled regex).
-% First test the catcode of the current token to be zero.
-% Then perform the matching test, and break if the csname
-% indeed matches. The three \cs{exp_after:wN} expand the contents
-% of the \tn{toks}\meta{current position} (of the form \cs{exp_not:n}
-% \Arg{control sequence}) to \meta{control sequence}.
-% We store the cs name before building states for the cs, as those
-% states may overlap with toks registers storing the user's input.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_item_cs:n #1
- {
- \int_compare:nNnT \l_@@_current_catcode_int = 0
- {
- \group_begin:
- \tl_set:Nx \l_@@_cs_name_tl { \@@_current_cs_to_str: }
- \@@_single_match:
- \@@_disable_submatches:
- \@@_build_for_cs:n {#1}
- \bool_set_eq:NN \l_@@_saved_success_bool \g_@@_success_bool
- \exp_args:NV \@@_match:n \l_@@_cs_name_tl
- \if_meaning:w \c_true_bool \g_@@_success_bool
- \group_insert_after:N \@@_break_true:w
- \fi:
- \bool_gset_eq:NN \g_@@_success_bool \l_@@_saved_success_bool
- \group_end:
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Character property tests}
-%
-% \begin{macro}[aux]
-% {
-% \@@_prop_d:, \@@_prop_h:, \@@_prop_s:,
-% \@@_prop_v:, \@@_prop_w:, \@@_prop_N:
-% }
-% Character property tests for |\d|, |\W|, \emph{etc.} These character
-% properties are not affected by the |(?i)| option. The characters
-% recognized by each one are as follows: |\d=[0-9]|,
-% |\w=[0-9A-Z_a-z]|, \verb*+\s=[\ \^^I\^^J\^^L\^^M]+,
-% \verb*+\h=[\ \^^I]+, |\v=[\^^J-\^^M]|, and the upper case
-% counterparts match anything that the lower case does not match. The
-% order in which the various tests appear is optimized for usual
-% mostly lower case letter text.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_prop_d:
- { \@@_item_caseful_range:nn { `0 } { `9 } }
-\cs_new_protected:Npn \@@_prop_h:
- {
- \@@_item_caseful_equal:n { `\ }
- \@@_item_caseful_equal:n { `\^^I }
- }
-\cs_new_protected:Npn \@@_prop_s:
- {
- \@@_item_caseful_equal:n { `\ }
- \@@_item_caseful_equal:n { `\^^I }
- \@@_item_caseful_equal:n { `\^^J }
- \@@_item_caseful_equal:n { `\^^L }
- \@@_item_caseful_equal:n { `\^^M }
- }
-\cs_new_protected:Npn \@@_prop_v:
- { \@@_item_caseful_range:nn { `\^^J } { `\^^M } } % lf, vtab, ff, cr
-\cs_new_protected:Npn \@@_prop_w:
- {
- \@@_item_caseful_range:nn { `a } { `z }
- \@@_item_caseful_range:nn { `A } { `Z }
- \@@_item_caseful_range:nn { `0 } { `9 }
- \@@_item_caseful_equal:n { `_ }
- }
-\cs_new_protected:Npn \@@_prop_N:
- {
- \@@_item_reverse:n
- { \@@_item_caseful_equal:n { `\^^J } }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]
-% {
-% \@@_posix_alnum:, \@@_posix_alpha:, \@@_posix_ascii:,
-% \@@_posix_blank:, \@@_posix_cntrl:, \@@_posix_digit:,
-% \@@_posix_graph:, \@@_posix_lower:, \@@_posix_print:,
-% \@@_posix_punct:, \@@_posix_space:, \@@_posix_upper:,
-% \@@_posix_word: , \@@_posix_xdigit:
-% }
-% \textsc{posix} properties. No surprise.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_posix_alnum:
- { \@@_posix_alpha: \@@_posix_digit: }
-\cs_new_protected:Npn \@@_posix_alpha:
- { \@@_posix_lower: \@@_posix_upper: }
-\cs_new_protected:Npn \@@_posix_ascii:
- {
- \@@_item_caseful_range:nn
- \c_@@_ascii_min_int
- \c_@@_ascii_max_int
- }
-\cs_new_eq:NN \@@_posix_blank: \@@_prop_h:
-\cs_new_protected:Npn \@@_posix_cntrl:
- {
- \@@_item_caseful_range:nn
- \c_@@_ascii_min_int
- \c_@@_ascii_max_control_int
- \@@_item_caseful_equal:n \c_@@_ascii_max_int
- }
-\cs_new_eq:NN \@@_posix_digit: \@@_prop_d:
-\cs_new_protected:Npn \@@_posix_graph:
- { \@@_item_caseful_range:nn { `! } { `\~ } }
-\cs_new_protected:Npn \@@_posix_lower:
- { \@@_item_caseful_range:nn { `a } { `z } }
-\cs_new_protected:Npn \@@_posix_print:
- { \@@_item_caseful_range:nn { `\ } { `\~ } }
-\cs_new_protected:Npn \@@_posix_punct:
- {
- \@@_item_caseful_range:nn { `! } { `/ }
- \@@_item_caseful_range:nn { `: } { `@ }
- \@@_item_caseful_range:nn { `[ } { `` }
- \@@_item_caseful_range:nn { `\{ } { `\~ }
- }
-\cs_new_protected:Npn \@@_posix_space:
- {
- \@@_item_caseful_equal:n { `\ }
- \@@_item_caseful_range:nn { `\^^I } { `\^^M }
- }
-\cs_new_protected:Npn \@@_posix_upper:
- { \@@_item_caseful_range:nn { `A } { `Z } }
-\cs_new_eq:NN \@@_posix_word: \@@_prop_w:
-\cs_new_protected:Npn \@@_posix_xdigit:
- {
- \@@_posix_digit:
- \@@_item_caseful_range:nn { `A } { `F }
- \@@_item_caseful_range:nn { `a } { `f }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Simple character escape}
-%
-% Before actually parsing the regular expression or the replacement
-% text, we go through them once, converting |\n| to the character $10$,
-% \emph{etc.} In this pass, we also convert any special character
-% (\texttt{*}, \texttt{?}, \texttt{\{}, etc.) or escaped alphanumeric
-% character into a marker indicating that this was a special sequence,
-% and replace escaped special characters and non-escaped alphanumeric
-% characters by markers indicating that those were \enquote{raw}
-% characters. The rest of the code can then avoid caring about escaping
-% issues (those can become quite complex to handle in combination with
-% ranges in character classes).
-%
-% Usage: \cs{@@_escape_use:nnnn} \meta{inline~1} \meta{inline~2}
-% \meta{inline~3} \Arg{token list} The \meta{token list} is converted to
-% a string, then read from left to right, interpreting backslashes as
-% escaping the next character. Unescaped characters are fed to the
-% function \meta{inline~1}, and escaped characters are fed to the function
-% \meta{inline~2} within an \texttt{x}-expansion context (typically those
-% functions perform some tests on their argument to decide how to output
-% them). The escape sequences |\a|, |\e|, |\f|, |\n|, |\r|, |\t| and
-% |\x| are recognized, and those are replaced by the corresponding
-% character, then fed to \meta{inline~3}. The result is then left in the
-% input stream. Spaces are ignored unless escaped.
-%
-% The conversion is mostly done within an \texttt{x}-expanding
-% assignment, except for the |\x| escape sequence, which is not amenable
-% to that in general. For this, we use the general framework of
-% \cs{__tl_build:Nw}.
-%
-% \begin{macro}[int]{\@@_escape_use:nnnn}
-% The result is built in \cs{l_@@_internal_a_tl}, which is then
-% left in the input stream. Go through |#4| once, applying |#1|,
-% |#2|, or |#3| as relevant to each character (after de-escaping
-% it). Note that we cannot replace \cs{tl_set:Nx} and
-% \cs{__tl_build_one:o} by a single call to \cs{__tl_build_one:x}, because
-% the \texttt{x}-expanding assignment may be interrupted by |\x|.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_escape_use:nnnn #1#2#3#4
- {
-%<trace> \trace_push:nnn { regex } { 1 } { @@_escape_use:nnnn }
- \__tl_build:Nw \l_@@_internal_a_tl
- \cs_set:Npn \@@_escape_unescaped:N ##1 { #1 }
- \cs_set:Npn \@@_escape_escaped:N ##1 { #2 }
- \cs_set:Npn \@@_escape_raw:N ##1 { #3 }
- \@@_standard_escapechar:
- \tl_gset:Nx \g_@@_internal_tl { \__str_to_other_fast:n {#4} }
- \tl_set:Nx \l_@@_internal_b_tl
- {
- \exp_after:wN \@@_escape_loop:N \g_@@_internal_tl
- { break } \__prg_break_point:
- }
- \__tl_build_one:o \l_@@_internal_b_tl
- \__tl_build_end:
-%<trace> \trace_pop:nnn { regex } { 1 } { @@_escape_use:nnnn }
- \l_@@_internal_a_tl
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_escape_loop:N}
-% \begin{macro}[aux]+\@@_escape_\:w+
-% \cs{@@_escape_loop:N} reads one character: if it is special
-% (space, backslash, or end-marker), perform the associated action,
-% otherwise it is simply an unescaped character. After a backslash,
-% the same is done, but unknown characters are \enquote{escaped}.
-% \begin{macrocode}
-\cs_new:Npn \@@_escape_loop:N #1
- {
- \cs_if_exist_use:cF { @@_escape_\token_to_str:N #1:w }
- { \@@_escape_unescaped:N #1 }
- \@@_escape_loop:N
- }
-\cs_new:cpn { @@_escape_ \c_backslash_str :w }
- \@@_escape_loop:N #1
- {
- \cs_if_exist_use:cF { @@_escape_/\token_to_str:N #1:w }
- { \@@_escape_escaped:N #1 }
- \@@_escape_loop:N
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux]
-% {\@@_escape_unescaped:N, \@@_escape_escaped:N, \@@_escape_raw:N}
-% Those functions are never called before being given a new meaning,
-% so their definitions here don't matter.
-% \begin{macrocode}
-\cs_new_eq:NN \@@_escape_unescaped:N ?
-\cs_new_eq:NN \@@_escape_escaped:N ?
-\cs_new_eq:NN \@@_escape_raw:N ?
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]
-% {
-% \@@_escape_break:w, \@@_escape_/break:w,
-% \@@_escape_/a:w, \@@_escape_/e:w, \@@_escape_/f:w,
-% \@@_escape_/n:w, \@@_escape_/r:w, \@@_escape_/t:w
-% }
-% \begin{macro}[aux]+\@@_escape_ :w+
-% The loop is ended upon seeing the end-marker
-% \enquote{\texttt{break}}, with an error if the string ended in a
-% backslash. Spaces are ignored, and |\a|, |\e|, |\f|, |\n|, |\r|,
-% |\t| take their meaning here.
-% \begin{macrocode}
-\cs_new_eq:NN \@@_escape_break:w \__prg_break:
-\cs_new:cpn { @@_escape_/break:w }
- {
- \if_false: { \fi: }
- \__msg_kernel_error:nn { regex } { trailing-backslash }
- \exp_after:wN \use_none:n \exp_after:wN { \if_false: } \fi:
- }
-\cs_new:cpn { @@_escape_~:w } { }
-\cs_new:cpx { @@_escape_/a:w }
- { \exp_not:N \@@_escape_raw:N \iow_char:N \^^G }
-\cs_new:cpx { @@_escape_/t:w }
- { \exp_not:N \@@_escape_raw:N \iow_char:N \^^I }
-\cs_new:cpx { @@_escape_/n:w }
- { \exp_not:N \@@_escape_raw:N \iow_char:N \^^J }
-\cs_new:cpx { @@_escape_/f:w }
- { \exp_not:N \@@_escape_raw:N \iow_char:N \^^L }
-\cs_new:cpx { @@_escape_/r:w }
- { \exp_not:N \@@_escape_raw:N \iow_char:N \^^M }
-\cs_new:cpx { @@_escape_/e:w }
- { \exp_not:N \@@_escape_raw:N \iow_char:N \^^[ }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_escape_/x:w}
-% \begin{macro}[aux]{\@@_escape_x_end:w, \@@_escape_x_large:n}
-% When |\x| is encountered, \cs{@@_escape_x_test:N} is responsible for
-% grabbing some hexadecimal digits, and feeding the result to
-% \cs{@@_escape_x_end:w}. If the number is too big interrupt the
-% assignment and produce an error, otherwise call \cs{@@_escape_raw:N}
-% on the corresponding character token.
-% \begin{macrocode}
-\cs_new:cpn { @@_escape_/x:w } \@@_escape_loop:N
- {
- \exp_after:wN \@@_escape_x_end:w
- \__int_value:w "0 \@@_escape_x_test:N
- }
-\cs_new:Npn \@@_escape_x_end:w #1 ;
- {
- \int_compare:nNnTF {#1} > \c_max_char_int
- {
- \if_false: { \fi: }
- \__tl_build_one:o \l_@@_internal_b_tl
- \__msg_kernel_error:nnx { regex } { x-overflow } {#1}
- \tl_set:Nx \l_@@_internal_b_tl
- { \if_false: } \fi:
- }
- {
- \exp_last_unbraced:Nf \@@_escape_raw:N
- { \char_generate:nn {#1} { 12 } }
- }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_escape_x_test:N, \@@_escape_x_testii:N}
-% Find out whether the first character is a left brace (allowing any
-% number of hexadecimal digits), or not (allowing up to two
-% hexadecimal digits). We need to check for the end-of-string marker.
-% Eventually, call either \cs{@@_escape_x_loop:N} or
-% \cs{@@_escape_x:N}.
-% \begin{macrocode}
-\cs_new:Npn \@@_escape_x_test:N #1
- {
- \str_if_eq_x:nnTF {#1} { break } { ; }
- {
- \if_charcode:w \c_space_token #1
- \exp_after:wN \@@_escape_x_test:N
- \else:
- \exp_after:wN \@@_escape_x_testii:N
- \exp_after:wN #1
- \fi:
- }
- }
-\cs_new:Npn \@@_escape_x_testii:N #1
- {
- \if_charcode:w \c_left_brace_str #1
- \exp_after:wN \@@_escape_x_loop:N
- \else:
- \@@_hexadecimal_use:NTF #1
- { \exp_after:wN \@@_escape_x:N }
- { ; \exp_after:wN \@@_escape_loop:N \exp_after:wN #1 }
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_escape_x:N}
-% This looks for the second digit in the unbraced case.
-% \begin{macrocode}
-\cs_new:Npn \@@_escape_x:N #1
- {
- \str_if_eq_x:nnTF {#1} { break } { ; }
- {
- \@@_hexadecimal_use:NTF #1
- { ; \@@_escape_loop:N }
- { ; \@@_escape_loop:N #1 }
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_escape_x_loop:N, \@@_escape_x_loop_error:}
-% Grab hexadecimal digits, skip spaces, and at the end, check that
-% there is a right brace, otherwise raise an error outside the
-% assignment.
-% \begin{macrocode}
-\cs_new:Npn \@@_escape_x_loop:N #1
- {
- \str_if_eq_x:nnTF {#1} { break }
- { ; \@@_escape_x_loop_error:n { } {#1} }
- {
- \@@_hexadecimal_use:NTF #1
- { \@@_escape_x_loop:N }
- {
- \token_if_eq_charcode:NNTF \c_space_token #1
- { \@@_escape_x_loop:N }
- {
- ;
- \exp_after:wN
- \token_if_eq_charcode:NNTF \c_right_brace_str #1
- { \@@_escape_loop:N }
- { \@@_escape_x_loop_error:n {#1} }
- }
- }
- }
- }
-\cs_new:Npn \@@_escape_x_loop_error:n #1
- {
- \if_false: { \fi: }
- \__tl_build_one:o \l_@@_internal_b_tl
- \__msg_kernel_error:nnx { regex } { x-missing-rbrace } {#1}
- \tl_set:Nx \l_@@_internal_b_tl
- { \if_false: } \fi: \@@_escape_loop:N #1
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux, rEXP]{\@@_hexadecimal_use:NTF}
-% \TeX{} detects uppercase hexadecimal digits for us but not the
-% lowercase letters, which we need to detect and replace by their
-% uppercase counterpart.
-% \begin{macrocode}
-\prg_new_conditional:Npnn \@@_hexadecimal_use:N #1 { TF }
- {
- \if_int_compare:w 1 < "1 \token_to_str:N #1 \exp_stop_f:
- #1 \prg_return_true:
- \else:
- \if_case:w \__int_eval:w
- \exp_after:wN ` \token_to_str:N #1 - `a
- \__int_eval_end:
- A
- \or: B
- \or: C
- \or: D
- \or: E
- \or: F
- \else:
- \prg_return_false:
- \exp_after:wN \use_none:n
- \fi:
- \prg_return_true:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[EXP, aux]
-% {\@@_char_if_alphanumeric:NTF, \@@_char_if_special:NTF}
-% These two tests are used in the first pass when parsing a regular
-% expression. That pass is responsible for finding escaped and
-% non-escaped characters, and recognizing which ones have special
-% meanings and which should be interpreted as \enquote{raw}
-% characters. Namely,
-% \begin{itemize}
-% \item alphanumerics are \enquote{raw} if they are not escaped, and
-% may have a special meaning when escaped;
-% \item non-alphanumeric printable ascii characters are
-% \enquote{raw} if they are escaped, and may have a special
-% meaning when not escaped;
-% \item characters other than printable ascii are always
-% \enquote{raw}.
-% \end{itemize}
-% The code is ugly, and highly based on magic numbers and the ascii
-% codes of characters. This is mostly unavoidable for performance
-% reasons. Maybe the tests can be optimized a little bit more.
-% Here, \enquote{alphanumeric} means \texttt{0}--\texttt{9},
-% \texttt{A}--\texttt{Z}, \texttt{a}--\texttt{z};
-% \enquote{special} character means non-alphanumeric
-% but printable ascii, from space (hex \texttt{20}) to
-% \texttt{del} (hex \texttt{7E}).
-% \begin{macrocode}
-\prg_new_conditional:Npnn \@@_char_if_special:N #1 { TF }
- {
- \if_int_compare:w `#1 > `Z \exp_stop_f:
- \if_int_compare:w `#1 > `z \exp_stop_f:
- \if_int_compare:w `#1 < \c_@@_ascii_max_int
- \prg_return_true: \else: \prg_return_false: \fi:
- \else:
- \if_int_compare:w `#1 < `a \exp_stop_f:
- \prg_return_true: \else: \prg_return_false: \fi:
- \fi:
- \else:
- \if_int_compare:w `#1 > `9 \exp_stop_f:
- \if_int_compare:w `#1 < `A \exp_stop_f:
- \prg_return_true: \else: \prg_return_false: \fi:
- \else:
- \if_int_compare:w `#1 < `0 \exp_stop_f:
- \if_int_compare:w `#1 < `\ \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \else: \prg_return_false: \fi:
- \fi:
- \fi:
- }
-\prg_new_conditional:Npnn \@@_char_if_alphanumeric:N #1 { TF }
- {
- \if_int_compare:w `#1 > `Z \exp_stop_f:
- \if_int_compare:w `#1 > `z \exp_stop_f:
- \prg_return_false:
- \else:
- \if_int_compare:w `#1 < `a \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \fi:
- \else:
- \if_int_compare:w `#1 > `9 \exp_stop_f:
- \if_int_compare:w `#1 < `A \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \else:
- \if_int_compare:w `#1 < `0 \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \fi:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsection{Compiling}
-%
-% A regular expression starts its life as a string of characters. In
-% this section, we convert it to internal instructions, resulting in a
-% \enquote{compiled} regular expression. This compiled expression is
-% then turned into states of an automaton in the building
-% phase. Compiled regular expressions consist of the following:
-% \begin{itemize}
-% \item \cs{@@_class:NnnnN} \meta{boolean} \Arg{tests} \Arg{min}
-% \Arg{more} \meta{lazyness}
-% \item \cs{@@_group:nnnN} \Arg{branches} \Arg{min} \Arg{more}
-% \meta{lazyness}, also \cs{@@_group_no_capture:nnnN} and
-% \cs{@@_group_resetting:nnnN} with the same syntax.
-% \item \cs{@@_branch:n} \Arg{contents}
-% \item \cs{@@_command_K:}
-% \item \cs{@@_assertion:Nn} \meta{boolean} \Arg{assertion test},
-% where the \meta{assertion test} is \cs{@@_b_test:} or
-% |{|\cs{@@_anchor:N} \meta{integer}|}|
-% \end{itemize}
-% Tests can be the following:
-% \begin{itemize}
-% \item \cs{@@_item_caseful_equal:n} \Arg{char code}
-% \item \cs{@@_item_caseless_equal:n} \Arg{char code}
-% \item \cs{@@_item_caseful_range:nn} \Arg{min} \Arg{max}
-% \item \cs{@@_item_caseless_range:nn} \Arg{min} \Arg{max}
-% \item \cs{@@_item_catcode:nT} \Arg{catcode bitmap} \Arg{tests}
-% \item \cs{@@_item_catcode_reverse:nT} \Arg{catcode bitmap} \Arg{tests}
-% \item \cs{@@_item_reverse:n} \Arg{tests}
-% \item \cs{@@_item_exact:nn} \Arg{catcode} \Arg{char code}
-% \item \cs{@@_item_exact_cs:n} \Arg{csnames}, more precisely given as
-% \meta{csname} \cs{scan_stop:} \meta{csname} \cs{scan_stop:}
-% \meta{csname} and so on in a brace group.
-% \item \cs{@@_item_cs:n} \Arg{compiled regex}
-% \end{itemize}
-%
-% \subsubsection{Variables used when compiling}
-%
-% \begin{variable}{\l_@@_group_level_int}
-% We make sure to open the same number of groups as we close.
-% \begin{macrocode}
-\int_new:N \l_@@_group_level_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_mode_int}
-% \begin{variable}
-% {
-% \c_@@_cs_in_class_mode_int,
-% \c_@@_cs_mode_int,
-% \c_@@_outer_mode_int,
-% \c_@@_catcode_mode_int,
-% \c_@@_class_mode_int,
-% \c_@@_catcode_in_class_mode_int
-% }
-% While compiling, ten modes are recognized, labelled $-63$, $-23$,
-% $-6$, $-2$, $0$, $2$, $3$, $6$, $23$, $63$. See
-% section~\ref{sec:regex-modes}. We only define some of these as
-% constants.
-% \begin{macrocode}
-\int_new:N \l_@@_mode_int
-\int_const:Nn \c_@@_cs_in_class_mode_int { -6 }
-\int_const:Nn \c_@@_cs_mode_int { -2 }
-\int_const:Nn \c_@@_outer_mode_int { 0 }
-\int_const:Nn \c_@@_catcode_mode_int { 2 }
-\int_const:Nn \c_@@_class_mode_int { 3 }
-\int_const:Nn \c_@@_catcode_in_class_mode_int { 6 }
-% \end{macrocode}
-% \end{variable}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_catcodes_int, \l_@@_default_catcodes_int}
-% \begin{variable}{\l_@@_catcodes_bool}
-% We wish to allow constructions such as |\c[^BE](..\cL[a-z]..)|,
-% where the outer catcode test applies to the whole group, but is
-% superseded by the inner catcode test. For this to work, we need to
-% keep track of lists of allowed category codes:
-% \cs{l_@@_catcodes_int} and \cs{l_@@_default_catcodes_int} are
-% bitmaps, sums of $4^c$, for all allowed catcodes $c$. The latter is
-% local to each capturing group, and we reset
-% \cs{l_@@_catcodes_int} to that value after each character or
-% class, changing it only when encountering a |\c| escape. The boolean
-% records whether the list of categories of a catcode test has to be
-% inverted: compare |\c[^BE]| and |\c[BE]|.
-% \begin{macrocode}
-\int_new:N \l_@@_catcodes_int
-\int_new:N \l_@@_default_catcodes_int
-\bool_new:N \l_@@_catcodes_bool
-% \end{macrocode}
-% \end{variable}
-% \end{variable}
-%
-% \begin{variable}
-% {
-% \c_@@_catcode_C_int, \c_@@_catcode_B_int, \c_@@_catcode_E_int,
-% \c_@@_catcode_M_int, \c_@@_catcode_T_int, \c_@@_catcode_P_int,
-% \c_@@_catcode_U_int, \c_@@_catcode_D_int, \c_@@_catcode_S_int,
-% \c_@@_catcode_L_int, \c_@@_catcode_O_int, \c_@@_catcode_A_int
-% }
-% \begin{variable}{\c_@@_all_catcodes_int}
-% Constants: $4^c$ for each category, and the sum of all powers of $4$.
-% \begin{macrocode}
-\int_const:Nn \c_@@_catcode_C_int { "1 }
-\int_const:Nn \c_@@_catcode_B_int { "4 }
-\int_const:Nn \c_@@_catcode_E_int { "10 }
-\int_const:Nn \c_@@_catcode_M_int { "40 }
-\int_const:Nn \c_@@_catcode_T_int { "100 }
-\int_const:Nn \c_@@_catcode_P_int { "1000 }
-\int_const:Nn \c_@@_catcode_U_int { "4000 }
-\int_const:Nn \c_@@_catcode_D_int { "10000 }
-\int_const:Nn \c_@@_catcode_S_int { "100000 }
-\int_const:Nn \c_@@_catcode_L_int { "400000 }
-\int_const:Nn \c_@@_catcode_O_int { "1000000 }
-\int_const:Nn \c_@@_catcode_A_int { "4000000 }
-\int_const:Nn \c_@@_all_catcodes_int { "5515155 }
-% \end{macrocode}
-% \end{variable}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_internal_regex}
-% The compilation step stores its result in this variable.
-% \begin{macrocode}
-\cs_new_eq:NN \l_@@_internal_regex \c_@@_no_match_regex
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_show_prefix_seq}
-% This sequence holds the prefix that makes up the line displayed to
-% the user. The various items must be removed from the right, which is
-% tricky with a token list, hence we use a sequence.
-% \begin{macrocode}
-\seq_new:N \l_@@_show_prefix_seq
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_show_lines_int}
-% A hack. To know whether a given class has a single item in it or
-% not, we count the number of lines when showing the class.
-% \begin{macrocode}
-\int_new:N \l_@@_show_lines_int
-% \end{macrocode}
-% \end{variable}
-%
-% \subsubsection{Generic helpers used when compiling}
-%
-% \begin{macro}[int]{\@@_get_digits:NTFw}
-% \begin{macro}[aux, rEXP]{\@@_get_digits_loop:w}
-% If followed by some raw digits, collect them one by one in the
-% integer variable |#1|, and take the \texttt{true} branch. Otherwise,
-% take the \texttt{false} branch.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_get_digits:NTFw #1#2#3#4#5
- {
- \@@_if_raw_digit:NNTF #4 #5
- { #1 = #5 \@@_get_digits_loop:nw {#2} }
- { #3 #4 #5 }
- }
-\cs_new:Npn \@@_get_digits_loop:nw #1#2#3
- {
- \@@_if_raw_digit:NNTF #2 #3
- { #3 \@@_get_digits_loop:nw {#1} }
- { \scan_stop: #1 #2 #3 }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux, EXP]{\@@_if_raw_digit:NNTF}
-% Test used when grabbing digits for the |{m,n}| quantifier.
-% It only accepts non-escaped digits.
-% \begin{macrocode}
-\prg_new_conditional:Npnn \@@_if_raw_digit:NN #1#2 { TF }
- {
- \if_meaning:w \@@_compile_raw:N #1
- \if_int_compare:w 1 < 1 #2 \exp_stop_f:
- \prg_return_true:
- \else:
- \prg_return_false:
- \fi:
- \else:
- \prg_return_false:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Mode}
-% \label{sec:regex-modes}
-%
-% When compiling the \textsc{nfa} corresponding to a given regex string,
-% we can be in ten distinct modes, which we label by some magic numbers:
-% \begin{itemize}
-% \item[-6] |[\c{...}]| control sequence in a class,
-% \item[-2] |\c{...}| control sequence,
-% \item[0] |...| outer,
-% \item[2] |\c...| catcode test,
-% \item[6] |[\c...]| catcode test in a class,
-% \item[-63] |[\c{[...]}]| class inside mode $-6$,
-% \item[-23] |\c{[...]}| class inside mode $-2$,
-% \item[3] |[...]| class inside mode $0$,
-% \item[23] |\c[...]| class inside mode $2$,
-% \item[63] |[\c[...]]| class inside mode $6$.
-% \end{itemize}
-% This list is exhaustive, because |\c| escape sequences cannot be
-% nested, and character classes cannot be nested directly. The choice of
-% numbers is such as to optimize the most useful tests, and make
-% transitions from one mode to another as simple as possible.
-% \begin{itemize}
-% \item Even modes mean that we are not directly in a character class.
-% In this case, a left bracket appends $3$ to the mode. In a
-% character class, a right bracket changes the mode as $m\to
-% (m-15)/13$, truncated.
-% \item Grouping, assertion, and anchors are allowed in non-positive
-% even modes ($0$, $-2$, $-6$), and do not change the
-% mode. Otherwise, they trigger an error.
-% \item A left bracket is special in even modes, appending $3$ to the
-% mode; in those modes, quantifiers and the dot are recognized, and
-% the right bracket is normal. In odd modes (within classes), the
-% left bracket is normal, but the right bracket ends the class,
-% changing the mode from $m$ to $(m-15)/13$, truncated; also, ranges
-% are recognized.
-% \item In non-negative modes, left and right braces are normal. In
-% negative modes, however, left braces trigger a warning; right
-% braces end the control sequence, going from $-2$ to $0$ or $-6$ to
-% $3$, with error recovery for odd modes.
-% \item Properties (such as the |\d| character class) can appear in
-% any mode.
-% \end{itemize}
-%
-% \begin{macro}[int, EXP]{\@@_if_in_class:TF}
-% Test whether we are directly in a character class (at the innermost
-% level of nesting). There, many escape sequences are not recognized,
-% and special characters are normal. Also, for every raw character, we
-% must look ahead for a possible raw dash.
-% \begin{macrocode}
-\cs_new:Npn \@@_if_in_class:TF
- {
- \if_int_odd:w \l_@@_mode_int
- \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \use_ii:nn
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int, EXP]{\@@_if_in_cs:TF}
-% Right braces are special only directly inside control sequences (at
-% the inner-most level of nesting, not counting groups).
-% \begin{macrocode}
-\cs_new:Npn \@@_if_in_cs:TF
- {
- \if_int_odd:w \l_@@_mode_int
- \exp_after:wN \use_ii:nn
- \else:
- \if_int_compare:w \l_@@_mode_int < \c_@@_outer_mode_int
- \exp_after:wN \exp_after:wN \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \exp_after:wN \exp_after:wN \use_ii:nn
- \fi:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int, EXP]{\@@_if_in_class_or_catcode:TF}
-% Assertions are only allowed in modes $0$, $-2$, and $-6$,
-% \emph{i.e.}, even, non-positive modes.
-% \begin{macrocode}
-\cs_new:Npn \@@_if_in_class_or_catcode:TF
- {
- \if_int_odd:w \l_@@_mode_int
- \exp_after:wN \use_i:nn
- \else:
- \if_int_compare:w \l_@@_mode_int > \c_@@_outer_mode_int
- \exp_after:wN \exp_after:wN \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \exp_after:wN \exp_after:wN \use_ii:nn
- \fi:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int, EXP]{\@@_if_within_catcode:TF}
-% This test takes the true branch if we are in a catcode test, either
-% immediately following it (modes $2$ and $6$) or in a class on which
-% it applies (modes $23$ and $63$). This is used to tweak how left
-% brackets behave in modes $2$ and $6$.
-% \begin{macrocode}
-\cs_new:Npn \@@_if_within_catcode:TF
- {
- \if_int_compare:w \l_@@_mode_int > \c_@@_outer_mode_int
- \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \use_ii:nn
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_chk_c_allowed:T}
-% The |\c| escape sequence is only allowed in modes $0$ and $3$,
-% \emph{i.e.}, not within any other |\c| escape sequence.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_chk_c_allowed:T
- {
- \if_int_compare:w \l_@@_mode_int = \c_@@_outer_mode_int
- \exp_after:wN \use:n
- \else:
- \if_int_compare:w \l_@@_mode_int = \c_@@_class_mode_int
- \exp_after:wN \exp_after:wN \exp_after:wN \use:n
- \else:
- \__msg_kernel_error:nn { regex } { c-bad-mode }
- \exp_after:wN \exp_after:wN \exp_after:wN \use_none:n
- \fi:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_mode_quit_c:}
-% This function changes the mode as it is needed just after a catcode
-% test.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_mode_quit_c:
- {
- \if_int_compare:w \l_@@_mode_int = \c_@@_catcode_mode_int
- \int_set_eq:NN \l_@@_mode_int \c_@@_outer_mode_int
- \else:
- \if_int_compare:w \l_@@_mode_int = \c_@@_catcode_in_class_mode_int
- \int_set_eq:NN \l_@@_mode_int \c_@@_class_mode_int
- \fi:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Framework}
-%
-% \begin{macro}[int]{\@@_compile:w, \@@_compile_end:}
-% Used when compiling a user regex or a regex for the |\c{...}| escape
-% sequence within another regex. Start building a token list within a
-% group (with \texttt{x}-expansion at the outset), and set a few
-% variables (group level, catcodes), then start the first branch. At
-% the end, make sure there are no dangling classes nor groups, close
-% the last branch: we are done building \cs{l_@@_internal_regex}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile:w
- {
- \__tl_build_x:Nw \l_@@_internal_regex
- \int_zero:N \l_@@_group_level_int
- \int_set_eq:NN \l_@@_default_catcodes_int \c_@@_all_catcodes_int
- \int_set_eq:NN \l_@@_catcodes_int \l_@@_default_catcodes_int
- \cs_set:Npn \@@_item_equal:n { \@@_item_caseful_equal:n }
- \cs_set:Npn \@@_item_range:nn { \@@_item_caseful_range:nn }
- \__tl_build_one:n { \@@_branch:n { \if_false: } \fi: }
- }
-\cs_new_protected:Npn \@@_compile_end:
- {
- \@@_if_in_class:TF
- {
- \__msg_kernel_error:nn { regex } { missing-rbrack }
- \use:c { @@_compile_]: }
- \prg_do_nothing: \prg_do_nothing:
- }
- { }
- \if_int_compare:w \l_@@_group_level_int > 0 \exp_stop_f:
- \__msg_kernel_error:nnx { regex } { missing-rparen }
- { \int_use:N \l_@@_group_level_int }
- \prg_replicate:nn
- { \l_@@_group_level_int }
- {
- \__tl_build_one:n
- {
- \if_false: { \fi: }
- \if_false: { \fi: } { 1 } { 0 } \c_true_bool
- }
- \__tl_build_end:
- \__tl_build_one:o \l_@@_internal_regex
- }
- \fi:
- \__tl_build_one:n { \if_false: { \fi: } }
- \__tl_build_end:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_compile:n}
-% The compilation is done between \cs{@@_compile:w} and
-% \cs{@@_compile_end:}, starting in mode~$0$. Then
-% \cs{@@_escape_use:nnnn} distinguishes special characters, escaped
-% alphanumerics, and raw characters, interpreting |\a|, |\x| and other
-% sequences. The $4$ trailing \cs{prg_do_nothing:} are needed because
-% some functions defined later look up to $4$ tokens ahead. Before
-% ending, make sure that any |\c{...}| is properly closed. No need to
-% check that brackets are closed properly since \cs{@@_compile_end:}
-% does that. However, catch the case of a trailing |\cL|
-% construction.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile:n #1
- {
- \@@_compile:w
- \@@_standard_escapechar:
- \int_set_eq:NN \l_@@_mode_int \c_@@_outer_mode_int
- \@@_escape_use:nnnn
- {
- \@@_char_if_special:NTF ##1
- \@@_compile_special:N \@@_compile_raw:N ##1
- }
- {
- \@@_char_if_alphanumeric:NTF ##1
- \@@_compile_escaped:N \@@_compile_raw:N ##1
- }
- { \@@_compile_raw:N ##1 }
- { #1 }
- \prg_do_nothing: \prg_do_nothing:
- \prg_do_nothing: \prg_do_nothing:
- \int_compare:nNnT \l_@@_mode_int = \c_@@_catcode_mode_int
- { \__msg_kernel_error:nn { regex } { c-trailing } }
- \int_compare:nNnT \l_@@_mode_int < \c_@@_outer_mode_int
- {
- \__msg_kernel_error:nn { regex } { c-missing-rbrace }
- \@@_compile_end_cs:
- \prg_do_nothing: \prg_do_nothing:
- \prg_do_nothing: \prg_do_nothing:
- }
- \@@_compile_end:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_compile_escaped:N, \@@_compile_special:N}
-% If the special character or escaped alphanumeric has a particular
-% meaning in regexes, the corresponding function is used. Otherwise,
-% it is interpreted as a raw character. We distinguish special
-% characters from escaped alphanumeric characters because they behave
-% differently when appearing as an end-point of a range.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_special:N #1
- {
- \cs_if_exist_use:cF { @@_compile_#1: }
- { \@@_compile_raw:N #1 }
- }
-\cs_new_protected:Npn \@@_compile_escaped:N #1
- {
- \cs_if_exist_use:cF { @@_compile_/#1: }
- { \@@_compile_raw:N #1 }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_compile_one:x}
-% This is used after finding one \enquote{test}, such as |\d|, or a
-% raw character. If that followed a catcode test (\emph{e.g.}, |\cL|),
-% then restore the mode. If we are not in a class, then the test is
-% \enquote{standalone}, and we need to add \cs{@@_class:NnnnN} and
-% search for quantifiers. In any case, insert the test, possibly
-% together with a catcode test if appropriate.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_one:x #1
- {
- \@@_mode_quit_c:
- \@@_if_in_class:TF { }
- {
- \__tl_build_one:n
- { \@@_class:NnnnN \c_true_bool { \if_false: } \fi: }
- }
- \__tl_build_one:x
- {
- \if_int_compare:w \l_@@_catcodes_int < \c_@@_all_catcodes_int
- \@@_item_catcode:nT { \int_use:N \l_@@_catcodes_int }
- { \exp_not:N \exp_not:n {#1} }
- \else:
- \exp_not:N \exp_not:n {#1}
- \fi:
- }
- \int_set_eq:NN \l_@@_catcodes_int \l_@@_default_catcodes_int
- \@@_if_in_class:TF { } { \@@_compile_quantifier:w }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]
-% {\@@_compile_abort_tokens:n, \@@_compile_abort_tokens:x}
-% This function places the collected tokens back in the input stream,
-% each as a raw character. Spaces are not preserved.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_abort_tokens:n #1
- {
- \use:x
- {
- \exp_args:No \tl_map_function:nN { \tl_to_str:n {#1} }
- \@@_compile_raw:N
- }
- }
-\cs_generate_variant:Nn \@@_compile_abort_tokens:n { x }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Quantifiers}
-%
-% \begin{macro}[int]{\@@_compile_quantifier:w}
-% This looks ahead and finds any quantifier (special character equal
-% to either of \texttt{?+*\{}).
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_quantifier:w #1#2
- {
- \token_if_eq_meaning:NNTF #1 \@@_compile_special:N
- {
- \cs_if_exist_use:cF { @@_compile_quantifier_#2:w }
- { \@@_compile_quantifier_none: #1 #2 }
- }
- { \@@_compile_quantifier_none: #1 #2 }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_quantifier_none:}
-% \begin{macro}[aux]{\@@_compile_quantifier_abort:xNN}
-% Those functions are called whenever there is no quantifier, or a
-% braced construction is invalid (equivalent to no quantifier, and
-% whatever characters were grabbed are left raw).
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_quantifier_none:
- { \__tl_build_one:n { \if_false: { \fi: } { 1 } { 0 } \c_false_bool } }
-\cs_new_protected:Npn \@@_compile_quantifier_abort:xNN #1#2#3
- {
- \@@_compile_quantifier_none:
- \__msg_kernel_warning:nnxx { regex } { invalid-quantifier } {#1} {#3}
- \@@_compile_abort_tokens:x {#1}
- #2 #3
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_quantifier_lazyness:nnNN}
-% Once the \enquote{main} quantifier (\texttt{?}, \texttt{*},
-% \texttt{+} or a braced construction) is found, we check whether it
-% is lazy (followed by a question mark). We then add to the compiled
-% regex a closing brace (ending \cs{@@_class:NnnnN} and friends),
-% the start-point of the range, its end-point, and a boolean,
-% \texttt{true} for lazy and \texttt{false} for greedy operators.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_quantifier_lazyness:nnNN #1#2#3#4
- {
- \str_if_eq:nnTF { #3 #4 } { \@@_compile_special:N ? }
- { \__tl_build_one:n { \if_false: { \fi: } { #1 } { #2 } \c_true_bool } }
- {
- \__tl_build_one:n { \if_false: { \fi: } { #1 } { #2 } \c_false_bool }
- #3 #4
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]
-% {
-% \@@_compile_quantifier_?:w,
-% \@@_compile_quantifier_*:w,
-% \@@_compile_quantifier_+:w
-% }
-% For each \enquote{basic} quantifier, |?|, |*|, |+|, feed the correct
-% arguments to \cs{@@_compile_quantifier_lazyness:nnNN}, $-1$ means
-% that there is no upper bound on the number of repetitions.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_quantifier_?:w }
- { \@@_compile_quantifier_lazyness:nnNN { 0 } { 1 } }
-\cs_new_protected:cpn { @@_compile_quantifier_*:w }
- { \@@_compile_quantifier_lazyness:nnNN { 0 } { -1 } }
-\cs_new_protected:cpn { @@_compile_quantifier_+:w }
- { \@@_compile_quantifier_lazyness:nnNN { 1 } { -1 } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]+\@@_compile_quantifier_{:w+ ^^A}
-% \begin{macro}[aux]
-% {
-% \@@_compile_quantifier_braced_auxi:w,
-% \@@_compile_quantifier_braced_auxii:w,
-% \@@_compile_quantifier_braced_auxiii:w,
-% }
-% Three possible syntaxes: \texttt{\{\meta{int}\}},
-% \texttt{\{\meta{int},\}}, or \texttt{\{\meta{int},\meta{int}\}}. Any
-% other syntax causes us to abort and put whatever we collected back
-% in the input stream, as \texttt{raw} characters, including the
-% opening brace. Grab a number into \cs{l_@@_internal_a_int}. If
-% the number is followed by a right brace, the range is $[a,a]$. If
-% followed by a comma, grab one more number, and call the \texttt{_ii}
-% or \texttt{_iii} auxiliary. Those auxiliaries check for a closing
-% brace, leading to the range $[a,\infty]$ or $[a,b]$, encoded as
-% $\{a\}\{-1\}$ and $\{a\}\{b-a\}$.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_quantifier_ \c_left_brace_str :w }
- {
- \@@_get_digits:NTFw \l_@@_internal_a_int
- { \@@_compile_quantifier_braced_auxi:w }
- { \@@_compile_quantifier_abort:xNN { \c_left_brace_str } }
- }
-\cs_new_protected:Npn \@@_compile_quantifier_braced_auxi:w #1#2
- {
- \str_case_x:nnF { #1 #2 }
- {
- { \@@_compile_special:N \c_right_brace_str }
- {
- \exp_args:No \@@_compile_quantifier_lazyness:nnNN
- { \int_use:N \l_@@_internal_a_int } { 0 }
- }
- { \@@_compile_special:N , }
- {
- \@@_get_digits:NTFw \l_@@_internal_b_int
- { \@@_compile_quantifier_braced_auxiii:w }
- { \@@_compile_quantifier_braced_auxii:w }
- }
- }
- {
- \@@_compile_quantifier_abort:xNN
- { \c_left_brace_str \int_use:N \l_@@_internal_a_int }
- #1 #2
- }
- }
-\cs_new_protected:Npn \@@_compile_quantifier_braced_auxii:w #1#2
- {
- \str_if_eq_x:nnTF
- { #1 #2 } { \@@_compile_special:N \c_right_brace_str }
- {
- \exp_args:No \@@_compile_quantifier_lazyness:nnNN
- { \int_use:N \l_@@_internal_a_int } { -1 }
- }
- {
- \@@_compile_quantifier_abort:xNN
- { \c_left_brace_str \int_use:N \l_@@_internal_a_int , }
- #1 #2
- }
- }
-\cs_new_protected:Npn \@@_compile_quantifier_braced_auxiii:w #1#2
- {
- \str_if_eq_x:nnTF
- { #1 #2 } { \@@_compile_special:N \c_right_brace_str }
- {
- \if_int_compare:w \l_@@_internal_a_int > \l_@@_internal_b_int
- \__msg_kernel_error:nnxx { regex } { backwards-quantifier }
- { \int_use:N \l_@@_internal_a_int }
- { \int_use:N \l_@@_internal_b_int }
- \int_zero:N \l_@@_internal_b_int
- \else:
- \int_sub:Nn \l_@@_internal_b_int \l_@@_internal_a_int
- \fi:
- \exp_args:Noo \@@_compile_quantifier_lazyness:nnNN
- { \int_use:N \l_@@_internal_a_int }
- { \int_use:N \l_@@_internal_b_int }
- }
- {
- \@@_compile_quantifier_abort:xNN
- {
- \c_left_brace_str
- \int_use:N \l_@@_internal_a_int ,
- \int_use:N \l_@@_internal_b_int
- }
- #1 #2
- }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \subsubsection{Raw characters}
-%
-% \begin{macro}[int]{\@@_compile_raw_error:N}
-% Within character classes, and following catcode tests, some escaped
-% alphanumeric sequences such as |\b| do not have any meaning. They
-% are replaced by a raw character, after spitting out an error.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_raw_error:N #1
- {
- \__msg_kernel_error:nnx { regex } { bad-escape } {#1}
- \@@_compile_raw:N #1
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_compile_raw:N}
-% If we are in a character class and the next character is an
-% unescaped dash, this denotes a range. Otherwise, the current
-% character |#1| matches itself.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_raw:N #1#2#3
- {
- \@@_if_in_class:TF
- {
- \str_if_eq:nnTF {#2#3} { \@@_compile_special:N - }
- { \@@_compile_range:Nw #1 }
- {
- \@@_compile_one:x
- { \@@_item_equal:n { \__int_value:w `#1 ~ } }
- #2 #3
- }
- }
- {
- \@@_compile_one:x
- { \@@_item_equal:n { \__int_value:w `#1 ~ } }
- #2 #3
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_range:Nw, \@@_if_end_range:NNTF}
-% We have just read a raw character followed by a dash; this should be
-% followed by an end-point for the range. Valid end-points are: any
-% raw character; any special character, except a right bracket. In
-% particular, escaped characters are forbidden.
-% \begin{macrocode}
-\prg_new_protected_conditional:Npnn \@@_if_end_range:NN #1#2 { TF }
- {
- \if_meaning:w \@@_compile_raw:N #1
- \prg_return_true:
- \else:
- \if_meaning:w \@@_compile_special:N #1
- \if_charcode:w ] #2
- \prg_return_false:
- \else:
- \prg_return_true:
- \fi:
- \else:
- \prg_return_false:
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \@@_compile_range:Nw #1#2#3
- {
- \@@_if_end_range:NNTF #2 #3
- {
- \if_int_compare:w `#1 > `#3 \exp_stop_f:
- \__msg_kernel_error:nnxx { regex } { range-backwards } {#1} {#3}
- \else:
- \__tl_build_one:x
- {
- \if_int_compare:w `#1 = `#3 \exp_stop_f:
- \@@_item_equal:n
- \else:
- \@@_item_range:nn { \__int_value:w `#1 ~ }
- \fi:
- { \__int_value:w `#3 ~ }
- }
- \fi:
- }
- {
- \__msg_kernel_warning:nnxx { regex } { range-missing-end }
- {#1} { \c_backslash_str #3 }
- \__tl_build_one:x
- {
- \@@_item_equal:n { \__int_value:w `#1 ~ }
- \@@_item_equal:n { \__int_value:w `- ~ }
- }
- #2#3
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Character properties}
-%
-% \begin{macro}[aux]{\@@_compile_.:, \@@_prop_.:}
-% In a class, the dot has no special meaning. Outside, insert
-% \cs{@@_prop_.:}, which matches any character or control
-% sequence, and refuses $-2$ (end-marker).
-% \begin{macrocode}
-\cs_new_protected:cpx { @@_compile_.: }
- {
- \exp_not:N \@@_if_in_class:TF
- { \@@_compile_raw:N . }
- { \@@_compile_one:x \exp_not:c { @@_prop_.: } }
- }
-\cs_new_protected:cpn { @@_prop_.: }
- {
- \if_int_compare:w \l_@@_current_char_int > - 2 \exp_stop_f:
- \exp_after:wN \@@_break_true:w
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]
-% {
-% \@@_compile_/d:, \@@_compile_/D:,
-% \@@_compile_/h:, \@@_compile_/H:,
-% \@@_compile_/s:, \@@_compile_/S:,
-% \@@_compile_/v:, \@@_compile_/V:,
-% \@@_compile_/w:, \@@_compile_/W:,
-% \@@_compile_/N:,
-% }
-% The constants \cs{@@_prop_d:}, \emph{etc.} hold
-% a list of tests which match the corresponding character
-% class, and jump to the \cs{@@_break_point:TF} marker.
-% As for a normal character, we check for quantifiers.
-% \begin{macrocode}
-\cs_set_protected:Npn \@@_tmp:w #1#2
- {
- \cs_new_protected:cpx { @@_compile_/#1: }
- { \@@_compile_one:x \exp_not:c { @@_prop_#1: } }
- \cs_new_protected:cpx { @@_compile_/#2: }
- {
- \@@_compile_one:x
- { \@@_item_reverse:n \exp_not:c { @@_prop_#1: } }
- }
- }
-\@@_tmp:w d D
-\@@_tmp:w h H
-\@@_tmp:w s S
-\@@_tmp:w v V
-\@@_tmp:w w W
-\cs_new_protected:cpn { @@_compile_/N: }
- { \@@_compile_one:x \@@_prop_N: }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Anchoring and simple assertions}
-%
-% \begin{macro}[aux]{\@@_compile_anchor:NF}
-% \begin{macro}[aux]+\@@_compile_^:+
-% \begin{macro}[aux]{\@@_compile_/A:, \@@_compile_/G:}
-% \begin{macro}[aux]+\@@_compile_$:+
-% \begin{macro}[aux]{\@@_compile_/Z:, \@@_compile_/z:}
-% In modes where assertions are allowed, anchor to the start of the
-% query, the start of the match, or the end of the query, depending on
-% the integer |#1|. In other modes, |#2| treats the character as raw,
-% with an error for escaped letters (|$| is valid in a class, but |\A|
-% is definitely a mistake on the user's part).
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_anchor:NF #1#2
- {
- \@@_if_in_class_or_catcode:TF {#2}
- {
- \__tl_build_one:n
- { \@@_assertion:Nn \c_true_bool { \@@_anchor:N #1 } }
- }
- }
-\cs_set_protected:Npn \@@_tmp:w #1#2
- {
- \cs_new_protected:cpn { @@_compile_/#1: }
- { \@@_compile_anchor:NF #2 { \@@_compile_raw_error:N #1 } }
- }
-\@@_tmp:w A \l_@@_min_pos_int
-\@@_tmp:w G \l_@@_start_pos_int
-\@@_tmp:w Z \l_@@_max_pos_int
-\@@_tmp:w z \l_@@_max_pos_int
-\cs_set_protected:Npn \@@_tmp:w #1#2
- {
- \cs_new_protected:cpn { @@_compile_#1: }
- { \@@_compile_anchor:NF #2 { \@@_compile_raw:N #1 } }
- }
-\exp_args:Nx \@@_tmp:w { \iow_char:N \^ } \l_@@_min_pos_int
-\exp_args:Nx \@@_tmp:w { \iow_char:N \$ } \l_@@_max_pos_int
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_/b:, \@@_compile_/B:}
-% Contrarily to |^| and |$|, which could be implemented without really
-% knowing what precedes in the token list, this requires more
-% information, namely, the knowledge of the last character code.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_/b: }
- {
- \@@_if_in_class_or_catcode:TF
- { \@@_compile_raw_error:N b }
- {
- \__tl_build_one:n
- { \@@_assertion:Nn \c_true_bool { \@@_b_test: } }
- }
- }
-\cs_new_protected:cpn { @@_compile_/B: }
- {
- \@@_if_in_class_or_catcode:TF
- { \@@_compile_raw_error:N B }
- {
- \__tl_build_one:n
- { \@@_assertion:Nn \c_false_bool { \@@_b_test: } }
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Character classes}
-%
-% \begin{macro}[aux]{\@@_compile_]:}
-% Outside a class, right brackets have no meaning. In a class, change
-% the mode ($m\to (m-15)/13$, truncated) to reflect the fact that we
-% are leaving the class. Look for quantifiers, unless we are still in
-% a class after leaving one (the case of |[...\cL[...]...]|).
-% quantifiers.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_]: }
- {
- \@@_if_in_class:TF
- {
- \if_int_compare:w \l_@@_mode_int > \c_@@_catcode_in_class_mode_int
- \__tl_build_one:n { \if_false: { \fi: } }
- \fi:
- \tex_advance:D \l_@@_mode_int - 15 \exp_stop_f:
- \tex_divide:D \l_@@_mode_int 13 \exp_stop_f:
- \if_int_odd:w \l_@@_mode_int \else:
- \exp_after:wN \@@_compile_quantifier:w
- \fi:
- }
- { \@@_compile_raw:N ] }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_[:}
-% In a class, left brackets might introduce a \textsc{posix} character
-% class, or mean nothing. Immediately following |\c|\meta{category},
-% we must insert the appropriate catcode test, then parse the class; we
-% pre-expand the catcode as an optimization. Otherwise (modes $0$,
-% $-2$ and $-6$) just parse the class. The mode is updated later.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_[: }
- {
- \@@_if_in_class:TF
- { \@@_compile_class_posix_test:w }
- {
- \@@_if_within_catcode:TF
- {
- \exp_after:wN \@@_compile_class_catcode:w
- \int_use:N \l_@@_catcodes_int ;
- }
- { \@@_compile_class_normal:w }
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_class_normal:w}
-% In the \enquote{normal} case, we will insert \cs{@@_class:NnnnN}
-% \meta{boolean} in the compiled code. The \meta{boolean} is true for
-% positive classes, and false for negative classes, characterized by a
-% leading |^|. The auxiliary \cs{@@_compile_class:TFNN} also
-% checks for a leading |]| which has a special meaning.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_class_normal:w
- {
- \@@_compile_class:TFNN
- { \@@_class:NnnnN \c_true_bool }
- { \@@_class:NnnnN \c_false_bool }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_class_catcode:w}
-% This function is called for a left bracket in modes $2$ or $6$
-% (catcode test, and catcode test within a class). In mode $2$ the
-% whole construction needs to be put in a class (like single
-% character). Then determine if the class is positive or negative,
-% inserting \cs{@@_item_catcode:nT} or the \texttt{reverse} variant
-% as appropriate, each with the current catcodes bitmap |#1| as an
-% argument, and reset the catcodes.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_class_catcode:w #1;
- {
- \if_int_compare:w \l_@@_mode_int = \c_@@_catcode_mode_int
- \__tl_build_one:n
- { \@@_class:NnnnN \c_true_bool { \if_false: } \fi: }
- \fi:
- \int_set_eq:NN \l_@@_catcodes_int \l_@@_default_catcodes_int
- \@@_compile_class:TFNN
- { \@@_item_catcode:nT {#1} }
- { \@@_item_catcode_reverse:nT {#1} }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]
-% {\@@_compile_class:TFNN, \@@_compile_class:NN}
-% If the first character is |^|, then the class is negative (use
-% |#2|), otherwise it is positive (use |#1|). If the next character
-% is a right bracket, then it should be changed to a raw one.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_class:TFNN #1#2#3#4
- {
- \l_@@_mode_int = \__int_value:w \l_@@_mode_int 3 \exp_stop_f:
- \str_if_eq:nnTF { #3 #4 } { \@@_compile_special:N ^ }
- {
- \__tl_build_one:n { #2 { \if_false: } \fi: }
- \@@_compile_class:NN
- }
- {
- \__tl_build_one:n { #1 { \if_false: } \fi: }
- \@@_compile_class:NN #3 #4
- }
- }
-\cs_new_protected:Npn \@@_compile_class:NN #1#2
- {
- \token_if_eq_charcode:NNTF #2 ]
- { \@@_compile_raw:N #2 }
- { #1 #2 }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]
-% {
-% \@@_compile_class_posix_test:w,
-% \@@_compile_class_posix:NNNNw,
-% \@@_compile_class_posix_loop:w,
-% \@@_compile_class_posix_end:w
-% }
-% Here we check for a syntax such as |[:alpha:]|. We also detect |[=|
-% and |[.| which have a meaning in \textsc{posix} regular expressions,
-% but are not implemented in \pkg{l3regex}. In case we see |[:|, grab
-% raw characters until hopefully reaching |:]|. If that's missing, or
-% the \textsc{posix} class is unknown, abort. If all is right, add the
-% test to the current class, with an extra \cs{@@_item_reverse:n}
-% for negative classes.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_class_posix_test:w #1#2
- {
- \token_if_eq_meaning:NNT \@@_compile_special:N #1
- {
- \str_case:nn { #2 }
- {
- : { \@@_compile_class_posix:NNNNw }
- = { \__msg_kernel_warning:nnx { regex } { posix-unsupported } { = } }
- . { \__msg_kernel_warning:nnx { regex } { posix-unsupported } { . } }
- }
- }
- \@@_compile_raw:N [ #1 #2
- }
-\cs_new_protected:Npn \@@_compile_class_posix:NNNNw #1#2#3#4#5#6
- {
- \str_if_eq:nnTF { #5 #6 } { \@@_compile_special:N ^ }
- {
- \bool_set_false:N \l_@@_internal_bool
- \tl_set:Nx \l_@@_internal_a_tl { \if_false: } \fi:
- \@@_compile_class_posix_loop:w
- }
- {
- \bool_set_true:N \l_@@_internal_bool
- \tl_set:Nx \l_@@_internal_a_tl { \if_false: } \fi:
- \@@_compile_class_posix_loop:w #5 #6
- }
- }
-\cs_new:Npn \@@_compile_class_posix_loop:w #1#2
- {
- \token_if_eq_meaning:NNTF \@@_compile_raw:N #1
- { #2 \@@_compile_class_posix_loop:w }
- { \if_false: { \fi: } \@@_compile_class_posix_end:w #1 #2 }
- }
-\cs_new_protected:Npn \@@_compile_class_posix_end:w #1#2#3#4
- {
- \str_if_eq:nnTF { #1 #2 #3 #4 }
- { \@@_compile_special:N : \@@_compile_special:N ] }
- {
- \cs_if_exist:cTF { @@_posix_ \l_@@_internal_a_tl : }
- {
- \@@_compile_one:x
- {
- \bool_if:NF \l_@@_internal_bool \@@_item_reverse:n
- \exp_not:c { @@_posix_ \l_@@_internal_a_tl : }
- }
- }
- {
- \__msg_kernel_warning:nnx { regex } { posix-unknown }
- { \l_@@_internal_a_tl }
- \@@_compile_abort_tokens:x
- {
- [: \bool_if:NF \l_@@_internal_bool { ^ }
- \l_@@_internal_a_tl :]
- }
- }
- }
- {
- \__msg_kernel_error:nnxx { regex } { posix-missing-close }
- { [: \l_@@_internal_a_tl } { #2 #4 }
- \@@_compile_abort_tokens:x { [: \l_@@_internal_a_tl }
- #1 #2 #3 #4
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Groups and alternations}
-%
-% \begin{macro}[aux]{\@@_compile_group_begin:N, \@@_compile_group_end:}
-% The contents of a regex group are turned into compiled code in
-% \cs{l_@@_internal_regex}, which ends up with items of the form
-% \cs{@@_branch:n} \Arg{concatenation}. This construction is done
-% using \pkg{l3tl-build} within a \TeX{} group, which automatically
-% makes sure that options (case-sensitivity and default catcode) are
-% reset at the end of the group. The argument |#1| is
-% \cs{@@_group:nnnN} or a variant thereof. A small subtlety to
-% support |\cL(abc)| as a shorthand for |(\cLa\cLb\cLc)|: exit any
-% pending catcode test, save the category code at the start of the
-% group as the default catcode for that group, and make sure that the
-% catcode is restored to the default outside the group.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_group_begin:N #1
- {
- \__tl_build_one:n { #1 { \if_false: } \fi: }
- \@@_mode_quit_c:
- \__tl_build:Nw \l_@@_internal_regex
- \int_set_eq:NN \l_@@_default_catcodes_int \l_@@_catcodes_int
- \int_incr:N \l_@@_group_level_int
- \__tl_build_one:n { \@@_branch:n { \if_false: } \fi: }
- }
-\cs_new_protected:Npn \@@_compile_group_end:
- {
- \if_int_compare:w \l_@@_group_level_int > 0 \exp_stop_f:
- \__tl_build_one:n { \if_false: { \fi: } }
- \__tl_build_end:
- \int_set_eq:NN \l_@@_catcodes_int \l_@@_default_catcodes_int
- \__tl_build_one:o \l_@@_internal_regex
- \exp_after:wN \@@_compile_quantifier:w
- \else:
- \__msg_kernel_warning:nn { regex } { extra-rparen }
- \exp_after:wN \@@_compile_raw:N \exp_after:wN )
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_(:}
-% In a class, parentheses are not special. Outside, check for a |?|,
-% denoting special groups, and run the code for the corresponding
-% special group.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_(: }
- {
- \@@_if_in_class:TF { \@@_compile_raw:N ( }
- { \@@_compile_lparen:w }
- }
-\cs_new_protected:Npn \@@_compile_lparen:w #1#2#3#4
- {
- \str_if_eq:nnTF { #1 #2 } { \@@_compile_special:N ? }
- {
- \cs_if_exist_use:cF
- { @@_compile_special_group_\token_to_str:N #4 :w }
- {
- \__msg_kernel_warning:nnx { regex } { special-group-unknown }
- { (? #4 }
- \@@_compile_group_begin:N \@@_group:nnnN
- \@@_compile_raw:N ? #3 #4
- }
- }
- {
- \@@_compile_group_begin:N \@@_group:nnnN
- #1 #2 #3 #4
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]+\@@_compile_|:+
-% In a class, the pipe is not special. Otherwise, end the current
-% branch and open another one.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_|: }
- {
- \@@_if_in_class:TF { \@@_compile_raw:N | }
- {
- \__tl_build_one:n
- { \if_false: { \fi: } \@@_branch:n { \if_false: } \fi: }
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_):}
-% Within a class, parentheses are not special. Outside, close a group.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_): }
- {
- \@@_if_in_class:TF { \@@_compile_raw:N ) }
- { \@@_compile_group_end: }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_special_group_::w}
-% \begin{macro}[aux]+\@@_compile_special_group_|:w+
-% Non-capturing, and resetting groups are easy to take care of during
-% compilation; for those groups, the harder parts will come when
-% building.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_special_group_::w }
- { \@@_compile_group_begin:N \@@_group_no_capture:nnnN }
-\cs_new_protected:cpn { @@_compile_special_group_|:w }
- { \@@_compile_group_begin:N \@@_group_resetting:nnnN }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux]
-% {\@@_compile_special_group_i:w, \@@_compile_special_group_-:w}
-% The match can be made case-insensitive by setting the option with
-% \texttt{(?i)}; the original behaviour is restored by \texttt{(?-i)}.
-% This is the only supported option.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_special_group_i:w #1#2
- {
- \str_if_eq:nnTF { #1 #2 } { \@@_compile_special:N ) }
- {
- \cs_set:Npn \@@_item_equal:n { \@@_item_caseless_equal:n }
- \cs_set:Npn \@@_item_range:nn { \@@_item_caseless_range:nn }
- }
- {
- \__msg_kernel_warning:nnx { regex } { unknown-option } { (?i #2 }
- \@@_compile_raw:N (
- \@@_compile_raw:N ?
- \@@_compile_raw:N i
- #1 #2
- }
- }
-\cs_new_protected:cpn { @@_compile_special_group_-:w } #1#2#3#4
- {
- \str_if_eq:nnTF { #1 #2 #3 #4 }
- { \@@_compile_raw:N i \@@_compile_special:N ) }
- {
- \cs_set:Npn \@@_item_equal:n { \@@_item_caseful_equal:n }
- \cs_set:Npn \@@_item_range:nn { \@@_item_caseful_range:nn }
- }
- {
- \__msg_kernel_warning:nnx { regex } { unknown-option } { (?-#2#4 }
- \@@_compile_raw:N (
- \@@_compile_raw:N ?
- \@@_compile_raw:N -
- #1 #2 #3 #4
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Catcodes and csnames}
-%
-% \begin{macro}[aux]{\@@_compile_/c:, \@@_compile_c_test:NN}
-% The |\c| escape sequence can be followed by a capital letter
-% representing a character category, by a left bracket which starts a
-% list of categories, or by a brace group holding a regular expression
-% for a control sequence name. Otherwise, raise an error.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_/c: }
- { \@@_chk_c_allowed:T { \@@_compile_c_test:NN } }
-\cs_new_protected:Npn \@@_compile_c_test:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \@@_compile_raw:N
- {
- \int_if_exist:cTF { c_@@_catcode_#2_int }
- {
- \int_set_eq:Nc \l_@@_catcodes_int { c_@@_catcode_#2_int }
- \l_@@_mode_int
- = \if_case:w \l_@@_mode_int
- \c_@@_catcode_mode_int
- \else:
- \c_@@_catcode_in_class_mode_int
- \fi:
- }
- }
- { \cs_if_exist_use:cF { @@_compile_c_#2:w } }
- {
- \__msg_kernel_error:nnx { regex } { c-missing-category } {#2}
- #1 #2
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]
-% {
-% \@@_compile_c_[:w,
-% \@@_compile_c_lbrack_loop:NN,
-% \@@_compile_c_lbrack_add:N,
-% \@@_compile_c_lbrack_end:,
-% }
-% When encountering |\c[|, the task is to collect uppercase letters
-% representing character categories. First check for |^| which negates
-% the list of category codes.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_c_[:w } #1#2
- {
- \l_@@_mode_int
- = \if_case:w \l_@@_mode_int
- \c_@@_catcode_mode_int
- \else:
- \c_@@_catcode_in_class_mode_int
- \fi:
- \int_zero:N \l_@@_catcodes_int
- \str_if_eq:nnTF { #1 #2 } { \@@_compile_special:N ^ }
- {
- \bool_set_false:N \l_@@_catcodes_bool
- \@@_compile_c_lbrack_loop:NN
- }
- {
- \bool_set_true:N \l_@@_catcodes_bool
- \@@_compile_c_lbrack_loop:NN
- #1 #2
- }
- }
-\cs_new_protected:Npn \@@_compile_c_lbrack_loop:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \@@_compile_raw:N
- {
- \int_if_exist:cTF { c_@@_catcode_#2_int }
- {
- \exp_args:Nc \@@_compile_c_lbrack_add:N
- { c_@@_catcode_#2_int }
- \@@_compile_c_lbrack_loop:NN
- }
- }
- {
- \token_if_eq_charcode:NNTF #2 ]
- { \@@_compile_c_lbrack_end: }
- }
- {
- \__msg_kernel_error:nnx { regex } { c-missing-rbrack } {#2}
- \@@_compile_c_lbrack_end:
- #1 #2
- }
- }
-\cs_new_protected:Npn \@@_compile_c_lbrack_add:N #1
- {
- \if_int_odd:w \__int_eval:w \l_@@_catcodes_int / #1 \__int_eval_end:
- \else:
- \int_add:Nn \l_@@_catcodes_int {#1}
- \fi:
- }
-\cs_new_protected:Npn \@@_compile_c_lbrack_end:
- {
- \if_meaning:w \c_false_bool \l_@@_catcodes_bool
- \int_set:Nn \l_@@_catcodes_int
- { \c_@@_all_catcodes_int - \l_@@_catcodes_int }
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}+\@@_compile_c_{:+
-% The case of a left brace is easy, based on what we have done so far:
-% in a group, compile the regular expression, after changing the mode
-% to forbid nesting |\c|. Additionally, disable submatch tracking
-% since groups don't escape the scope of |\c{...}|.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_c_ \c_left_brace_str :w }
- {
- \@@_compile:w
- \@@_disable_submatches:
- \l_@@_mode_int
- = \if_case:w \l_@@_mode_int
- \c_@@_cs_mode_int
- \else:
- \c_@@_cs_in_class_mode_int
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}+\@@_compile_}:+
-% \begin{macro}{\@@_compile_end_cs:}
-% \begin{macro}[EXP,aux]{\@@_compile_cs_aux:Nn, \@@_compile_cs_aux:NNnnnN}
-% Non-escaped right braces are only special if they appear when
-% compiling the regular expression for a csname, but not within a
-% class: |\c{[{}]}| matches the control sequences |\{| and |\}|. So,
-% end compiling the inner regex (this closes any dangling class or
-% group). Then insert the corresponding test in the outer regex. As
-% an optimization, if the control sequence test simply consists of
-% several explicit possibilities (branches) then use
-% \cs{@@_item_exact_cs:n} with an argument consisting of all
-% possibilities separated by \cs{scan_stop:}.
-% \begin{macrocode}
-\flag_new:n { @@_cs }
-\cs_new_protected:cpn { @@_compile_ \c_right_brace_str : }
- {
- \@@_if_in_cs:TF
- { \@@_compile_end_cs: }
- { \exp_after:wN \@@_compile_raw:N \c_right_brace_str }
- }
-\cs_new_protected:Npn \@@_compile_end_cs:
- {
- \@@_compile_end:
- \flag_clear:n { @@_cs }
- \tl_set:Nx \l_@@_internal_a_tl
- {
- \exp_after:wN \@@_compile_cs_aux:Nn \l_@@_internal_regex
- \q_nil \q_nil \q_recursion_stop
- }
- \exp_args:Nx \@@_compile_one:x
- {
- \flag_if_raised:nTF { @@_cs }
- { \@@_item_cs:n { \exp_not:o \l_@@_internal_regex } }
- { \@@_item_exact_cs:n { \tl_tail:N \l_@@_internal_a_tl } }
- }
- }
-\cs_new:Npn \@@_compile_cs_aux:Nn #1#2
- {
- \cs_if_eq:NNTF #1 \@@_branch:n
- {
- \scan_stop:
- \@@_compile_cs_aux:NNnnnN #2
- \q_nil \q_nil \q_nil \q_nil \q_nil \q_nil \q_recursion_stop
- \@@_compile_cs_aux:Nn
- }
- {
- \quark_if_nil:NF #1 { \flag_raise:n { @@_cs } }
- \use_none_delimit_by_q_recursion_stop:w
- }
- }
-\cs_new:Npn \@@_compile_cs_aux:NNnnnN #1#2#3#4#5#6
- {
- \bool_lazy_all:nTF
- {
- { \cs_if_eq_p:NN #1 \@@_class:NnnnN }
- {#2}
- { \tl_if_head_eq_meaning_p:nN {#3} \@@_item_caseful_equal:n }
- { \int_compare_p:nNn { \tl_count:n {#3} } = { 2 } }
- { \int_compare_p:nNn {#5} = { 0 } }
- }
- {
- \prg_replicate:nn {#4}
- { \char_generate:nn { \use_ii:nn #3 } {12} }
- \@@_compile_cs_aux:NNnnnN
- }
- {
- \quark_if_nil:NF #1
- {
- \flag_raise:n { @@_cs }
- \use_i_delimit_by_q_recursion_stop:nw
- }
- \use_none_delimit_by_q_recursion_stop:w
- }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
-% \subsubsection{Raw token lists with \cs{u}}
-%
-% \begin{macro}[aux]{\@@_compile_/u:}
-% \begin{macro}[aux, EXP]{\@@_compile_u_loop:NN}
-% The |\u| escape is invalid in classes and directly following a
-% catcode test. Otherwise, it must be followed by a left brace. We
-% then collect the characters for the argument of |\u| within an
-% \texttt{x}-expanding assignment. In principle we could just wait to
-% encounter a right brace, but this is unsafe: if the right brace is
-% missing, then we will reach the end-markers of the regex, and
-% continue, leading to obscure fatal errors. Instead, we only allow
-% raw and special characters, and stop when encountering a special
-% right brace, any escaped character, or the end-marker.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_/u: } #1#2
- {
- \@@_if_in_class_or_catcode:TF
- { \@@_compile_raw_error:N u #1 #2 }
- {
- \str_if_eq_x:nnTF {#1#2} { \@@_compile_special:N \c_left_brace_str }
- {
- \tl_set:Nx \l_@@_internal_a_tl { \if_false: } \fi:
- \@@_compile_u_loop:NN
- }
- {
- \__msg_kernel_error:nn { regex } { u-missing-lbrace }
- \@@_compile_raw:N u #1 #2
- }
- }
- }
-\cs_new:Npn \@@_compile_u_loop:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \@@_compile_raw:N
- { #2 \@@_compile_u_loop:NN }
- {
- \token_if_eq_meaning:NNTF #1 \@@_compile_special:N
- {
- \exp_after:wN \token_if_eq_charcode:NNTF \c_right_brace_str #2
- { \if_false: { \fi: } \@@_compile_u_end: }
- { #2 \@@_compile_u_loop:NN }
- }
- {
- \if_false: { \fi: }
- \__msg_kernel_error:nnx { regex } { u-missing-rbrace } {#2}
- \@@_compile_u_end:
- #1 #2
- }
- }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_u_end:}
-% Once we have extracted the variable's name, we store the contents of
-% that variable in \cs{l_@@_internal_a_tl}. The behaviour of |\u|
-% then depends on whether we are within a |\c{...}| escape (in this
-% case, the variable is turned to a string), or not.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_u_end:
- {
- \tl_set:Nv \l_@@_internal_a_tl { \l_@@_internal_a_tl }
- \if_int_compare:w \l_@@_mode_int = \c_@@_outer_mode_int
- \@@_compile_u_not_cs:
- \else:
- \@@_compile_u_in_cs:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_u_in_cs:}
-% When |\u| appears within a control sequence, we convert the variable
-% to a string with escaped spaces. Then for each character insert a
-% class matching exactly that character, once.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_u_in_cs:
- {
- \tl_gset:Nx \g_@@_internal_tl
- { \exp_args:No \__str_to_other_fast:n { \l_@@_internal_a_tl } }
- \__tl_build_one:x
- {
- \tl_map_function:NN \g_@@_internal_tl
- \@@_compile_u_in_cs_aux:n
- }
- }
-\cs_new:Npn \@@_compile_u_in_cs_aux:n #1
- {
- \@@_class:NnnnN \c_true_bool
- { \@@_item_caseful_equal:n { \__int_value:w `#1 } }
- { 1 } { 0 } \c_false_bool
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_compile_u_not_cs:}
-% In mode $0$, the |\u| escape adds one state to the NFA for each
-% token in \cs{l_@@_internal_a_tl}. If a given \meta{token} is a
-% control sequence, then insert a string comparison test, otherwise,
-% \cs{@@_item_exact:nn} which compares catcode and character code.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_u_not_cs:
- {
- \exp_args:No \__tl_analysis_map_inline:nn { \l_@@_internal_a_tl }
- {
- \__tl_build_one:n
- {
- \@@_class:NnnnN \c_true_bool
- {
- \if_int_compare:w "##2 = 0 \exp_stop_f:
- \@@_item_exact_cs:n { \exp_after:wN \cs_to_str:N ##1 }
- \else:
- \@@_item_exact:nn { \__int_value:w "##2 } { ##3 }
- \fi:
- }
- { 1 } { 0 } \c_false_bool
- }
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Other}
-%
-% \begin{macro}[aux]{\@@_compile_/K:}
-% The |\K| control sequence is currently the only \enquote{command},
-% which performs some action, rather than matching something. It is
-% allowed in the same contexts as |\b|. At the compilation stage, we
-% leave it as a single control sequence, defined later.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_/K: }
- {
- \int_compare:nNnTF \l_@@_mode_int = \c_@@_outer_mode_int
- { \__tl_build_one:n { \@@_command_K: } }
- { \@@_compile_raw_error:N K }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Showing regexes}
-%
-% \begin{macro}[aux]{\@@_show:Nn}
-% Within a \cs{__tl_build:Nw} \ldots{} \cs{__tl_build_end:} group, we
-% redefine all the function that can appear in a compiled regex, then
-% run the regex. The result is then shown.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_show:Nn #1#2
- {
- \__tl_build:Nw \l_@@_internal_a_tl
- \cs_set_protected:Npn \@@_branch:n
- {
- \seq_pop_right:NN \l_@@_show_prefix_seq \l_@@_internal_a_tl
- \@@_show_one:n { +-branch }
- \seq_put_right:No \l_@@_show_prefix_seq \l_@@_internal_a_tl
- \use:n
- }
- \cs_set_protected:Npn \@@_group:nnnN
- { \@@_show_group_aux:nnnnN { } }
- \cs_set_protected:Npn \@@_group_no_capture:nnnN
- { \@@_show_group_aux:nnnnN { ~(no~capture) } }
- \cs_set_protected:Npn \@@_group_resetting:nnnN
- { \@@_show_group_aux:nnnnN { ~(resetting) } }
- \cs_set_eq:NN \@@_class:NnnnN \@@_show_class:NnnnN
- \cs_set_protected:Npn \@@_command_K:
- { \@@_show_one:n { reset~match~start~(\iow_char:N\\K) } }
- \cs_set_protected:Npn \@@_assertion:Nn ##1##2
- { \@@_show_one:n { \bool_if:NF ##1 { negative~ } assertion:~##2 } }
- \cs_set:Npn \@@_b_test: { word~boundary }
- \cs_set_eq:NN \@@_anchor:N \@@_show_anchor_to_str:N
- \cs_set_protected:Npn \@@_item_caseful_equal:n ##1
- { \@@_show_one:n { char~code~\int_eval:n{##1} } }
- \cs_set_protected:Npn \@@_item_caseful_range:nn ##1##2
- { \@@_show_one:n { range~[\int_eval:n{##1}, \int_eval:n{##2}] } }
- \cs_set_protected:Npn \@@_item_caseless_equal:n ##1
- { \@@_show_one:n { char~code~\int_eval:n{##1}~(caseless) } }
- \cs_set_protected:Npn \@@_item_caseless_range:nn ##1##2
- {
- \@@_show_one:n
- { Range~[\int_eval:n{##1}, \int_eval:n{##2}]~(caseless) }
- }
- \cs_set_protected:Npn \@@_item_catcode:nT
- { \@@_show_item_catcode:NnT \c_true_bool }
- \cs_set_protected:Npn \@@_item_catcode_reverse:nT
- { \@@_show_item_catcode:NnT \c_false_bool }
- \cs_set_protected:Npn \@@_item_reverse:n
- { \@@_show_scope:nn { Reversed~match } }
- \cs_set_protected:Npn \@@_item_exact:nn ##1##2
- { \@@_show_one:n { char~##2,~catcode~##1 } }
- \cs_set_eq:NN \@@_item_exact_cs:n \@@_show_item_exact_cs:n
- \cs_set_protected:Npn \@@_item_cs:n
- { \@@_show_scope:nn { control~sequence } }
- \cs_set:cpn { @@_prop_.: } { \@@_show_one:n { any~token } }
- \seq_clear:N \l_@@_show_prefix_seq
- \@@_show_push:n { ~ }
- \cs_if_exist_use:N #1
- \__tl_build_end:
- \__msg_show_variable:NNNnn #1 \cs_if_exist:NTF ? { }
- { >~Compiled~regex~#2: \l_@@_internal_a_tl }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_show_one:n}
-% Every part of the final message go through this function, which adds
-% one line to the output, with the appropriate prefix.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_show_one:n #1
- {
- \int_incr:N \l_@@_show_lines_int
- \__tl_build_one:x
- {
- \exp_not:N \\
- \seq_map_function:NN \l_@@_show_prefix_seq \use:n
- #1
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]
-% {\@@_show_push:n, \@@_show_pop:, \@@_show_scope:nn}
-% Enter and exit levels of nesting. The \texttt{scope} function prints
-% its first argument as an \enquote{introduction}, then performs its
-% second argument in a deeper level of nesting.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_show_push:n #1
- { \seq_put_right:Nx \l_@@_show_prefix_seq { #1 ~ } }
-\cs_new_protected:Npn \@@_show_pop:
- { \seq_pop_right:NN \l_@@_show_prefix_seq \l_@@_internal_a_tl }
-\cs_new_protected:Npn \@@_show_scope:nn #1#2
- {
- \@@_show_one:n {#1}
- \@@_show_push:n { ~ }
- #2
- \@@_show_pop:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_show_group_aux:nnnnN}
-% We display all groups in the same way, simply adding a message,
-% \texttt{(no capture)} or \texttt{(resetting)}, to special groups.
-% The odd \cs{use_ii:nn} avoids printing a spurious \texttt{+-branch}
-% for the first branch.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_show_group_aux:nnnnN #1#2#3#4#5
- {
- \@@_show_one:n { ,-group~begin #1 }
- \@@_show_push:n { | }
- \use_ii:nn #2
- \@@_show_pop:
- \@@_show_one:n
- { `-group~end \@@_msg_repeated:nnN {#3} {#4} #5 }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_show_class:NnnnN}
-% I'm entirely unhappy about this function: I couldn't find a way to
-% test if a class is a single test. Instead, collect the
-% representation of the tests in the class. If that had more than one
-% line, write \texttt{Match} or \texttt{Don't match} on its own line,
-% with the repeating information if any. Then the various tests on
-% lines of their own, and finally a line. Otherwise, we need to
-% evaluate the representation of the tests again (since the prefix is
-% incorrect). That's clunky, but not too expensive, since it's only
-% one test.
-% \begin{macrocode}
-\cs_set:Npn \@@_show_class:NnnnN #1#2#3#4#5
- {
- \__tl_build:Nw \l_@@_internal_a_tl
- \int_zero:N \l_@@_show_lines_int
- \@@_show_push:n {~}
- #2
- \exp_last_unbraced:Nf
- \int_case:nnF { \l_@@_show_lines_int }
- {
- {0}
- {
- \__tl_build_end:
- \@@_show_one:n { \bool_if:NTF #1 { Fail } { Pass } }
- }
- {1}
- {
- \__tl_build_end:
- \bool_if:NTF #1
- {
- #2
- \__tl_build_one:n { \@@_msg_repeated:nnN {#3} {#4} #5 }
- }
- {
- \@@_show_one:n
- { Don't~match~\@@_msg_repeated:nnN {#3} {#4} #5 }
- \__tl_build_one:o \l_@@_internal_a_tl
- }
- }
- }
- {
- \__tl_build_end:
- \@@_show_one:n
- {
- \bool_if:NTF #1 { M } { Don't~m } atch
- \@@_msg_repeated:nnN {#3} {#4} #5
- }
- \__tl_build_one:o \l_@@_internal_a_tl
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux, rEXP]{\@@_show_anchor_to_str:N}
-% The argument is an integer telling us where the anchor is. We
-% convert that to the relevant info.
-% \begin{macrocode}
-\cs_new:Npn \@@_show_anchor_to_str:N #1
- {
- anchor~at~
- \str_case:nnF { #1 }
- {
- { \l_@@_min_pos_int } { start~(\iow_char:N\\A) }
- { \l_@@_start_pos_int } { start~of~match~(\iow_char:N\\G) }
- { \l_@@_max_pos_int } { end~(\iow_char:N\\Z) }
- }
- { <error:~'#1'~not~recognized> }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_show_item_catcode:NnT}
-% Produce a sequence of categories which the catcode bitmap |#2|
-% contains, and show it, indenting the tests on which this catcode
-% constraint applies.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_show_item_catcode:NnT #1#2
- {
- \seq_set_split:Nnn \l_@@_internal_seq { } { CBEMTPUDSLOA }
- \seq_set_filter:NNn \l_@@_internal_seq \l_@@_internal_seq
- { \int_if_odd_p:n { #2 / \int_use:c { c_@@_catcode_##1_int } } }
- \@@_show_scope:nn
- {
- categories~
- \seq_map_function:NN \l_@@_internal_seq \use:n
- , ~
- \bool_if:NF #1 { negative~ } class
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_show_item_exact_cs:n}
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_show_item_exact_cs:n #1
- {
- \seq_set_split:Nnn \l_@@_internal_seq { \scan_stop: } {#1}
- \seq_set_map:NNn \l_@@_internal_seq
- \l_@@_internal_seq { \iow_char:N\\##1 }
- \@@_show_one:n
- { control~sequence~ \seq_use:Nn \l_@@_internal_seq { ~or~ } }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsection{Building}
-%
-% \subsubsection{Variables used while building}
-%
-% \begin{variable}{\l_@@_min_state_int, \l_@@_max_state_int}
-% The last state that was allocated is $\cs{l_@@_max_state_int}-1$,
-% so that \cs{l_@@_max_state_int} always points to a free state.
-% The \texttt{min_state} variable is $1$, but is included to
-% avoid hard-coding this value everywhere.
-% \begin{macrocode}
-\int_new:N \l_@@_min_state_int
-\int_set:Nn \l_@@_min_state_int { 1 }
-\int_new:N \l_@@_max_state_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_left_state_int, \l_@@_right_state_int}
-% \begin{variable}{\l_@@_left_state_seq, \l_@@_right_state_seq}
-% Alternatives are implemented by branching from a \texttt{left} state
-% into the various choices, then merging those into a \texttt{right}
-% state. We store information about those states in two sequences.
-% Those states are also used to implement group quantifiers. Most
-% often, the left and right pointers only differ by~$1$.
-% \begin{macrocode}
-\int_new:N \l_@@_left_state_int
-\int_new:N \l_@@_right_state_int
-\seq_new:N \l_@@_left_state_seq
-\seq_new:N \l_@@_right_state_seq
-% \end{macrocode}
-% \end{variable}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_capturing_group_int}
-% \cs{l_@@_capturing_group_int} is the \textsc{id} number that will
-% be assigned to a capturing group if one was opened now. This starts
-% at $0$ for the group enclosing the full regular expression, and
-% groups are counted in the order of their left parenthesis, except
-% when encountering \texttt{resetting} groups.
-% \begin{macrocode}
-\int_new:N \l_@@_capturing_group_int
-% \end{macrocode}
-% \end{variable}
-%
-% \subsubsection{Framework}
-%
-% This phase is about going from a compiled regex to an \textsc{nfa}.
-% Each state of the \textsc{nfa} is stored in a \tn{toks}. The
-% operations which can appear in the \tn{toks} are
-% \begin{itemize}
-% \item \cs{@@_action_start_wildcard:} inserted at the start
-% of the regular expression to make it unanchored.
-% \item \cs{@@_action_success:} marks the exit state of the
-% \textsc{nfa}.
-% \item \cs{@@_action_cost:n} \Arg{shift} is a transition from the
-% current \meta{state} to $\meta{state}+\meta{shift}$, which
-% consumes the current character: the target state is saved and will
-% be considered again when matching at the next position.
-% \item \cs{@@_action_free:n} \Arg{shift}, and
-% \cs{@@_action_free_group:n} \Arg{shift} are free transitions,
-% which immediately perform the actions for the state
-% $\meta{state}+\meta{shift}$ of the \textsc{nfa}. They differ in
-% how they detect and avoid infinite loops. For now, we just need to
-% know that the \texttt{group} variant must be used for transitions
-% back to the start of a group.
-% \item \cs{@@_action_submatch:n} \Arg{key} where the \meta{key} is
-% a group number followed by |<| or |>| for the beginning or end of
-% group. This causes the current position in the query to be stored
-% as the \meta{key} submatch boundary.
-% \end{itemize}
-%
-% We strive to preserve the following properties while building.
-% \begin{itemize}
-% \item The current capturing group is
-% $\text{\texttt{capturing_group}}-1$, and if a group is opened now,
-% it will be labelled \texttt{capturing_group}.
-% \item The last allocated state is $\text{\texttt{max_state}}-1$, so
-% \texttt{max_state} is a free state.
-% \item The \texttt{left_state} points to a state to the left of the
-% current group or of the last class.
-% \item The \texttt{right_state} points to a newly created,
-% empty state, with some transitions leading to it.
-% \item The \texttt{left/right} sequences hold a list of the
-% corresponding end-points of nested groups.
-% \end{itemize}
-%
-% \begin{macro}[int]{\@@_build:n, \@@_build:N}
-% The \texttt{n}-type function first compiles its argument. Reset some
-% variables. Allocate two states, and put a wildcard in state $0$
-% (transitions to state $1$ and $0$ state). Then build the regex
-% within a (capturing) group, which will be numbered $0$ (current
-% value of \texttt{capturing_group}). Finally, if the match reaches the
-% last state, it is successful.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_build:n #1
- {
- \@@_compile:n {#1}
- \@@_build:N \l_@@_internal_regex
- }
-\cs_new_protected:Npn \@@_build:N #1
- {
-%<trace> \trace_push:nnn { regex } { 1 } { @@_build }
- \@@_standard_escapechar:
- \int_zero:N \l_@@_capturing_group_int
- \int_set_eq:NN \l_@@_max_state_int \l_@@_min_state_int
- \@@_build_new_state:
- \@@_build_new_state:
- \@@_toks_put_right:Nn \l_@@_left_state_int
- { \@@_action_start_wildcard: }
- \@@_group:nnnN {#1} { 1 } { 0 } \c_false_bool
- \@@_toks_put_right:Nn \l_@@_right_state_int
- { \@@_action_success: }
-%<trace> \@@_trace_states:n { 2 }
-%<trace> \trace_pop:nnn { regex } { 1 } { @@_build }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_build_for_cs:n}
-% When using a regex to match a cs, we don't insert a wildcard, we
-% anchor at the end, and since we ignore submatches, there is no need
-% to surround the expression with a group. However, for branches to
-% work properly at the outer level, we need to put the appropriate
-% \texttt{left} and \texttt{right} states in their sequence.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_build_for_cs:n #1
- {
-%<trace> \trace_push:nnn { regex } { 1 } { @@_build_for_cs }
- \int_set_eq:NN \l_@@_max_state_int \l_@@_min_state_int
- \@@_build_new_state:
- \@@_build_new_state:
- \@@_push_lr_states:
- #1
- \@@_pop_lr_states:
- \@@_toks_put_right:Nn \l_@@_right_state_int
- {
- \if_int_compare:w \l_@@_current_pos_int = \l_@@_max_pos_int
- \exp_after:wN \@@_action_success:
- \fi:
- }
-%<trace> \@@_trace_states:n { 2 }
-%<trace> \trace_pop:nnn { regex } { 1 } { @@_build_for_cs }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Helpers for building an \textsc{nfa}}
-%
-% \begin{macro}[int]{\@@_push_lr_states:, \@@_pop_lr_states:}
-% When building the regular expression, we keep track of pointers to
-% the left-end and right-end of each group without help from \TeX{}'s
-% grouping.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_push_lr_states:
- {
- \seq_push:No \l_@@_left_state_seq
- { \int_use:N \l_@@_left_state_int }
- \seq_push:No \l_@@_right_state_seq
- { \int_use:N \l_@@_right_state_int }
- }
-\cs_new_protected:Npn \@@_pop_lr_states:
- {
- \seq_pop:NN \l_@@_left_state_seq \l_@@_internal_a_tl
- \int_set:Nn \l_@@_left_state_int \l_@@_internal_a_tl
- \seq_pop:NN \l_@@_right_state_seq \l_@@_internal_a_tl
- \int_set:Nn \l_@@_right_state_int \l_@@_internal_a_tl
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]
-% {
-% \@@_build_transition_left:NNN,
-% \@@_build_transition_right:nNn
-% }
-% Add a transition from |#2| to |#3| using the function |#1|. The
-% \texttt{left} function is used for higher priority transitions, and
-% the \texttt{right} function for lower priority transitions (which
-% should be performed later). The signatures differ to reflect the
-% differing usage later on. Both functions could be optimized.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_build_transition_left:NNN #1#2#3
- { \@@_toks_put_left:Nx #2 { #1 { \int_eval:n { #3 - #2 } } } }
-\cs_new_protected:Npn \@@_build_transition_right:nNn #1#2#3
- { \@@_toks_put_right:Nx #2 { #1 { \int_eval:n { #3 - #2 } } } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_build_new_state:}
-% Add a new empty state to the \textsc{nfa}. Then update the
-% \texttt{left}, \texttt{right}, and \texttt{max} states, so that the
-% \texttt{right} state is the new empty state, and the \texttt{left}
-% state points to the previously \enquote{current} state.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_build_new_state:
- {
-%<*trace>
- \trace:nnx { regex } { 2 }
- {
- regex~new~state~
- L=\int_use:N \l_@@_left_state_int ~ -> ~
- R=\int_use:N \l_@@_right_state_int ~ -> ~
- M=\int_use:N \l_@@_max_state_int ~ -> ~
- \int_eval:n { \l_@@_max_state_int + 1 }
- }
-%</trace>
- \@@_toks_clear:N \l_@@_max_state_int
- \int_set_eq:NN \l_@@_left_state_int \l_@@_right_state_int
- \int_set_eq:NN \l_@@_right_state_int \l_@@_max_state_int
- \int_incr:N \l_@@_max_state_int
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_build_transitions_lazyness:NNNNN}
-% This function creates a new state, and puts two transitions starting
-% from the old current state. The order of the transitions is
-% controlled by |#1|, true for lazy quantifiers, and false for greedy
-% quantifiers.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_build_transitions_lazyness:NNNNN #1#2#3#4#5
- {
- \@@_build_new_state:
- \@@_toks_put_right:Nx \l_@@_left_state_int
- {
- \if_meaning:w \c_true_bool #1
- #2 { \int_eval:n { #3 - \l_@@_left_state_int } }
- #4 { \int_eval:n { #5 - \l_@@_left_state_int } }
- \else:
- #4 { \int_eval:n { #5 - \l_@@_left_state_int } }
- #2 { \int_eval:n { #3 - \l_@@_left_state_int } }
- \fi:
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Building classes}
-%
-% \begin{macro}[int]{\@@_class:NnnnN}
-% \begin{macro}[int, rEXP]{\@@_tests_action_cost:n}
-% The arguments are: \meta{boolean} \Arg{tests} \Arg{min} \Arg{more}
-% \meta{lazyness}. First store the tests with a trailing
-% \cs{@@_action_cost:n}, in the true branch of
-% \cs{@@_break_point:TF} for positive classes, or the false branch
-% for negative classes. The integer \meta{more} is $0$ for fixed
-% repetitions, $-1$ for unbounded repetitions, and
-% $\meta{max}-\meta{min}$ for a range of repetitions.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_class:NnnnN #1#2#3#4#5
- {
- \cs_set:Npx \@@_tests_action_cost:n ##1
- {
- \exp_not:n { \exp_not:n {#2} }
- \bool_if:NTF #1
- { \@@_break_point:TF { \@@_action_cost:n {##1} } { } }
- { \@@_break_point:TF { } { \@@_action_cost:n {##1} } }
- }
- \if_case:w - #4 \exp_stop_f:
- \@@_class_repeat:n {#3}
- \or: \@@_class_repeat:nN {#3} #5
- \else: \@@_class_repeat:nnN {#3} {#4} #5
- \fi:
- }
-\cs_new:Npn \@@_tests_action_cost:n { \@@_action_cost:n }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_class_repeat:n}
-% This is used for a fixed number of repetitions. Build one state for
-% each repetition, with a transition controlled by the tests that we
-% have collected. That works just fine for |#1|${}=0$ repetitions:
-% nothing is built.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_class_repeat:n #1
- {
- \prg_replicate:nn {#1}
- {
- \@@_build_new_state:
- \@@_build_transition_right:nNn \@@_tests_action_cost:n
- \l_@@_left_state_int \l_@@_right_state_int
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_class_repeat:nN}
-% This implements unbounded repetitions of a single class (\emph{e.g.}
-% the |*| and |+| quantifiers). If the minimum number |#1| of
-% repetitions is $0$, then build a transition from the current state
-% to itself governed by the tests, and a free transition to a new
-% state (hence skipping the tests). Otherwise, call
-% \cs{@@_class_repeat:n} for the code to match |#1| repetitions,
-% and add free transitions from the last state to the previous one,
-% and to a new one. In both cases, the order of transitions is
-% controlled by the lazyness boolean |#2|.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_class_repeat:nN #1#2
- {
- \if_int_compare:w #1 = 0 \exp_stop_f:
- \@@_build_transitions_lazyness:NNNNN #2
- \@@_action_free:n \l_@@_right_state_int
- \@@_tests_action_cost:n \l_@@_left_state_int
- \else:
- \@@_class_repeat:n {#1}
- \int_set_eq:NN \l_@@_internal_a_int \l_@@_left_state_int
- \@@_build_transitions_lazyness:NNNNN #2
- \@@_action_free:n \l_@@_right_state_int
- \@@_action_free:n \l_@@_internal_a_int
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_class_repeat:nnN}
-% We want to build the code to match from |#1| to $|#1|+|#2|$
-% repetitions. Match |#1| repetitions (can be $0$). Compute the final
-% state of the next construction as \texttt{a}. Build $|#2|>0$ states,
-% each with a transition to the next state governed by the tests, and
-% a transition to the final state \texttt{a}. The computation of
-% \texttt{a} is safe because states are allocated in order, starting
-% from \texttt{max_state}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_class_repeat:nnN #1#2#3
- {
- \@@_class_repeat:n {#1}
- \int_set:Nn \l_@@_internal_a_int
- { \l_@@_max_state_int + #2 - 1 }
- \prg_replicate:nn { #2 }
- {
- \@@_build_transitions_lazyness:NNNNN #3
- \@@_action_free:n \l_@@_internal_a_int
- \@@_tests_action_cost:n \l_@@_right_state_int
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Building groups}
-%
-% \begin{macro}[aux]{\@@_group_aux:nnnnN}
-% Arguments: \Arg{label} \Arg{contents} \Arg{min} \Arg{more}
-% \meta{lazyness}. If \meta{min} is $0$, we need to add a state before
-% building the group, so that the thread which skips the group does
-% not also set the start-point of the submatch. After adding one more
-% state, the \texttt{left_state} is the left end of the group, from
-% which all branches will stem, and the \texttt{right_state} is the
-% right end of the group, and all branches end their course in that
-% state. We store those two integers to be queried for each branch, we
-% build the \textsc{nfa} states for the contents |#2| of the group,
-% and we forget about the two integers. Once this is done, perform the
-% repetition: either exactly |#3| times, or |#3| or more times, or
-% between |#3| and $|#3|+|#4|$ times, with lazyness |#5|. The
-% \meta{label} |#1| is used for submatch tracking. Each of the three
-% auxiliaries expects \texttt{left_state} and \texttt{right_state} to
-% be set properly.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_group_aux:nnnnN #1#2#3#4#5
- {
-%<trace> \trace_push:nnn { regex } { 1 } { @@_group }
- \if_int_compare:w #3 = 0 \exp_stop_f:
- \@@_build_new_state:
-%<assert>\assert_int:n { \l_@@_max_state_int = \l_@@_right_state_int + 1 }
- \@@_build_transition_right:nNn \@@_action_free_group:n
- \l_@@_left_state_int \l_@@_right_state_int
- \fi:
- \@@_build_new_state:
- \@@_push_lr_states:
- #2
- \@@_pop_lr_states:
- \if_case:w - #4 \exp_stop_f:
- \@@_group_repeat:nn {#1} {#3}
- \or: \@@_group_repeat:nnN {#1} {#3} #5
- \else: \@@_group_repeat:nnnN {#1} {#3} {#4} #5
- \fi:
-%<trace> \trace_pop:nnn { regex } { 1 } { @@_group }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_group:nnnN, \@@_group_no_capture:nnnN}
-% Hand to \cs{@@_group_aux:nnnnnN} the label of that group
-% (expanded), and the group itself, with some extra commands to
-% perform.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_group:nnnN #1
- {
- \exp_args:No \@@_group_aux:nnnnN
- { \int_use:N \l_@@_capturing_group_int }
- {
- \int_incr:N \l_@@_capturing_group_int
- #1
- }
- }
-\cs_new_protected:Npn \@@_group_no_capture:nnnN
- { \@@_group_aux:nnnnN { -1 } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_group_resetting:nnnN}
-% \begin{macro}[aux]{\@@_group_resetting_loop:nnNn}
-% Again, hand the label $-1$ to \cs{@@_group_aux:nnnnN}, but this
-% time we work a little bit harder to keep track of the maximum group
-% label at the end of any branch, and to reset the group number at
-% each branch. This relies on the fact that a compiled regex always is
-% a sequence of items of the form \cs{@@_branch:n} \Arg{branch}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_group_resetting:nnnN #1
- {
- \@@_group_aux:nnnnN { -1 }
- {
- \exp_args:Noo \@@_group_resetting_loop:nnNn
- { \int_use:N \l_@@_capturing_group_int }
- { \int_use:N \l_@@_capturing_group_int }
- #1
- { ?? \__prg_break:n } { }
- \__prg_break_point:
- }
- }
-\cs_new_protected:Npn \@@_group_resetting_loop:nnNn #1#2#3#4
- {
- \use_none:nn #3 { \int_set:Nn \l_@@_capturing_group_int {#1} }
- \int_set:Nn \l_@@_capturing_group_int {#2}
- #3 {#4}
- \exp_args:Nf \@@_group_resetting_loop:nnNn
- { \int_max:nn {#1} { \l_@@_capturing_group_int } }
- {#2}
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_branch:n}
-% Add a free transition from the left state of the current group to a
-% brand new state, starting point of this branch. Once the branch is
-% built, add a transition from its last state to the right state of
-% the group. The left and right states of the group are extracted from
-% the relevant sequences.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_branch:n #1
- {
-%<trace> \trace_push:nnn { regex } { 1 } { @@_branch }
- \@@_build_new_state:
- \seq_get:NN \l_@@_left_state_seq \l_@@_internal_a_tl
- \int_set:Nn \l_@@_left_state_int \l_@@_internal_a_tl
- \@@_build_transition_right:nNn \@@_action_free:n
- \l_@@_left_state_int \l_@@_right_state_int
- #1
- \seq_get:NN \l_@@_right_state_seq \l_@@_internal_a_tl
- \@@_build_transition_right:nNn \@@_action_free:n
- \l_@@_right_state_int \l_@@_internal_a_tl
-%<trace> \trace_pop:nnn { regex } { 1 } { @@_branch }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_group_repeat:nn}
-% This function is called to repeat a group a fixed number of times
-% |#2|; if this is $0$ we remove the group altogether (but don't reset
-% the \texttt{capturing_group} label). Otherwise, the auxiliary
-% \cs{@@_group_repeat_aux:n} copies |#2| times the \tn{toks} for
-% the group, and leaves \texttt{internal_a} pointing to the left end
-% of the last repetition. We only record the submatch information at
-% the last repetition. Finally, add a state at the end (the transition
-% to it has been taken care of by the replicating auxiliary.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_group_repeat:nn #1#2
- {
- \if_int_compare:w #2 = 0 \exp_stop_f:
- \int_set:Nn \l_@@_max_state_int
- { \l_@@_left_state_int - 1 }
- \@@_build_new_state:
- \else:
- \@@_group_repeat_aux:n {#2}
- \@@_group_submatches:nNN {#1}
- \l_@@_internal_a_int \l_@@_right_state_int
- \@@_build_new_state:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_group_submatches:nNN}
-% This inserts in states |#2| and |#3| the code for tracking
-% submatches of the group |#1|, unless inhibited by a label of $-1$.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_group_submatches:nNN #1#2#3
- {
- \if_int_compare:w #1 > - 1 \exp_stop_f:
- \@@_toks_put_left:Nx #2 { \@@_action_submatch:n { #1 < } }
- \@@_toks_put_left:Nx #3 { \@@_action_submatch:n { #1 > } }
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_group_repeat_aux:n}
-% Here we repeat \tn{toks} ranging from \texttt{left_state} to
-% \texttt{max_state}, $|#1|>0$ times. First add a transition so that
-% the copies will \enquote{chain} properly. Compute the shift
-% \texttt{c} between the original copy and the last copy we
-% want. Shift the \texttt{right_state} and \texttt{max_state} to their
-% final values. We then want to perform \texttt{c} copy operations. At
-% the end, \texttt{b} is equal to the \texttt{max_state}, and
-% \texttt{a} points to the left of the last copy of the group.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_group_repeat_aux:n #1
- {
- \@@_build_transition_right:nNn \@@_action_free:n
- \l_@@_right_state_int \l_@@_max_state_int
- \int_set_eq:NN \l_@@_internal_a_int \l_@@_left_state_int
- \int_set_eq:NN \l_@@_internal_b_int \l_@@_max_state_int
- \if_int_compare:w \__int_eval:w #1 > 1 \exp_stop_f:
- \int_set:Nn \l_@@_internal_c_int
- {
- ( #1 - 1 )
- * ( \l_@@_internal_b_int - \l_@@_internal_a_int )
- }
- \int_add:Nn \l_@@_right_state_int { \l_@@_internal_c_int }
- \int_add:Nn \l_@@_max_state_int { \l_@@_internal_c_int }
- \@@_toks_memcpy:NNn
- \l_@@_internal_b_int
- \l_@@_internal_a_int
- \l_@@_internal_c_int
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_group_repeat:nnN}
-% This function is called to repeat a group at least $n$ times; the
-% case $n=0$ is very different from $n>0$. Assume first that $n=0$.
-% Insert submatch tracking information at the start and end of the
-% group, add a free transition from the right end to the
-% \enquote{true} left state \texttt{a} (remember: in this case we had
-% added an extra state before the left state). This forms the loop,
-% which we break away from by adding a free transition from \texttt{a}
-% to a new state.
-%
-% Now consider the case $n>0$. Repeat the group $n$ times, chaining
-% various copies with a free transition. Add submatch tracking only to
-% the last copy, then add a free transition from the right end back to
-% the left end of the last copy, either before or after the transition
-% to move on towards the rest of the \textsc{nfa}. This transition can
-% end up before submatch tracking, but that is irrelevant since it
-% only does so when going again through the group, recording new
-% matches. Finally, add a state; we already have a transition pointing
-% to it from \cs{@@_group_repeat_aux:n}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_group_repeat:nnN #1#2#3
- {
- \if_int_compare:w #2 = 0 \exp_stop_f:
- \@@_group_submatches:nNN {#1}
- \l_@@_left_state_int \l_@@_right_state_int
- \int_set:Nn \l_@@_internal_a_int
- { \l_@@_left_state_int - 1 }
- \@@_build_transition_right:nNn \@@_action_free:n
- \l_@@_right_state_int \l_@@_internal_a_int
- \@@_build_new_state:
- \if_meaning:w \c_true_bool #3
- \@@_build_transition_left:NNN \@@_action_free:n
- \l_@@_internal_a_int \l_@@_right_state_int
- \else:
- \@@_build_transition_right:nNn \@@_action_free:n
- \l_@@_internal_a_int \l_@@_right_state_int
- \fi:
- \else:
- \@@_group_repeat_aux:n {#2}
- \@@_group_submatches:nNN {#1}
- \l_@@_internal_a_int \l_@@_right_state_int
- \if_meaning:w \c_true_bool #3
- \@@_build_transition_right:nNn \@@_action_free_group:n
- \l_@@_right_state_int \l_@@_internal_a_int
- \else:
- \@@_build_transition_left:NNN \@@_action_free_group:n
- \l_@@_right_state_int \l_@@_internal_a_int
- \fi:
- \@@_build_new_state:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_group_repeat:nnnN}
-% We wish to repeat the group between |#2| and $|#2|+|#3|$ times, with
-% a lazyness controlled by |#4|. We insert submatch tracking up front:
-% in principle, we could avoid recording submatches for the first |#2|
-% copies of the group, but that forces us to treat specially the case
-% $|#2|=0$. Repeat that group with submatch tracking $|#2|+|#3|$ times
-% (the maximum number of repetitions). Then our goal is to add |#3|
-% transitions from the end of the |#2|-th group, and each subsequent
-% groups, to the end. For a lazy quantifier, we add those transitions
-% to the left states, before submatch tracking. For the greedy case,
-% we add the transitions to the right states, after submatch tracking
-% and the transitions which go on with more repetitions. In the greedy
-% case with $|#2|=0$, the transition which skips over all copies of
-% the group must be added separately, because its starting state does
-% not follow the normal pattern: we had to add it \enquote{by hand}
-% earlier.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_group_repeat:nnnN #1#2#3#4
- {
- \@@_group_submatches:nNN {#1}
- \l_@@_left_state_int \l_@@_right_state_int
- \@@_group_repeat_aux:n { #2 + #3 }
- \if_meaning:w \c_true_bool #4
- \int_set_eq:NN \l_@@_left_state_int \l_@@_max_state_int
- \prg_replicate:nn { #3 }
- {
- \int_sub:Nn \l_@@_left_state_int
- { \l_@@_internal_b_int - \l_@@_internal_a_int }
- \@@_build_transition_left:NNN \@@_action_free:n
- \l_@@_left_state_int \l_@@_max_state_int
- }
- \else:
- \prg_replicate:nn { #3 - 1 }
- {
- \int_sub:Nn \l_@@_right_state_int
- { \l_@@_internal_b_int - \l_@@_internal_a_int }
- \@@_build_transition_right:nNn \@@_action_free:n
- \l_@@_right_state_int \l_@@_max_state_int
- }
- \if_int_compare:w #2 = 0 \exp_stop_f:
- \int_set:Nn \l_@@_right_state_int
- { \l_@@_left_state_int - 1 }
- \else:
- \int_sub:Nn \l_@@_right_state_int
- { \l_@@_internal_b_int - \l_@@_internal_a_int }
- \fi:
- \@@_build_transition_right:nNn \@@_action_free:n
- \l_@@_right_state_int \l_@@_max_state_int
- \fi:
- \@@_build_new_state:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Others}
-%
-% \begin{macro}[int]{\@@_assertion:Nn, \@@_b_test:, \@@_anchor:N}
-% Usage: \cs{@@_assertion:Nn} \meta{boolean} \Arg{test}, where the
-% \meta{test} is either of the two other functions. Add a free
-% transition to a new state, conditionally to the assertion test. The
-% \cs{@@_b_test:} test is used by the |\b| and |\B| escape: check
-% if the last character was a word character or not, and do the same
-% to the current character. The boundary-markers of the string are
-% non-word characters for this purpose. Anchors at the start or end
-% of match use \cs{@@_anchor:N}, with a position controlled by the
-% integer |#1|.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_assertion:Nn #1#2
- {
- \@@_build_new_state:
- \@@_toks_put_right:Nx \l_@@_left_state_int
- {
- \exp_not:n {#2}
- \@@_break_point:TF
- \bool_if:NF #1 { { } }
- {
- \@@_action_free:n
- {
- \int_eval:n
- { \l_@@_right_state_int - \l_@@_left_state_int }
- }
- }
- \bool_if:NT #1 { { } }
- }
- }
-\cs_new_protected:Npn \@@_anchor:N #1
- {
- \if_int_compare:w #1 = \l_@@_current_pos_int
- \exp_after:wN \@@_break_true:w
- \fi:
- }
-\cs_new_protected:Npn \@@_b_test:
- {
- \group_begin:
- \int_set_eq:NN \l_@@_current_char_int \l_@@_last_char_int
- \@@_prop_w:
- \@@_break_point:TF
- { \group_end: \@@_item_reverse:n \@@_prop_w: }
- { \group_end: \@@_prop_w: }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_command_K:}
-% Change the starting point of the $0$-th submatch (full match), and
-% transition to a new state, pretending that this is a fresh thread.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_command_K:
- {
- \@@_build_new_state:
- \@@_toks_put_right:Nx \l_@@_left_state_int
- {
- \@@_action_submatch:n { 0< }
- \bool_set_true:N \l_@@_fresh_thread_bool
- \@@_action_free:n
- { \int_eval:n { \l_@@_right_state_int - \l_@@_left_state_int } }
- \bool_set_false:N \l_@@_fresh_thread_bool
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsection{Matching}
-%
-% We search for matches by running all the execution threads through the
-% \textsc{nfa} in parallel, reading one token of the query at each step.
-% The \textsc{nfa} contains \enquote{free} transitions to other states,
-% and transitions which \enquote{consume} the current token. For free
-% transitions, the instruction at the new state of the \textsc{nfa} is
-% performed immediately. When a transition consumes a character, the
-% new state is appended to a list of \enquote{active states}, stored in
-% \cs{g_@@_thread_state_intarray}: this thread will be active again when the next
-% token is read from the query. At every step (for each token in the
-% query), we unpack that list of active states and the corresponding
-% submatch props, and empty those.
-%
-% If two paths through the \textsc{nfa} \enquote{collide} in the sense
-% that they reach the same state after reading a given token, then they
-% only differ in how they previously matched, and the future execution
-% will be identical for both. (Note that this would be wrong in the
-% presence of back-references.) Hence, we only need to keep one of the
-% two threads: the thread with the highest priority. Our \textsc{nfa} is
-% built in such a way that higher priority actions always come before
-% lower priority actions, which makes things work.
-%
-% The explanation in the previous paragraph may make us think that we
-% simply need to keep track of which states were visited at a given
-% step: after all, the loop generated when matching |(a?)*| against |a|
-% is broken, isn't it? No. The group first matches |a|, as it should,
-% then repeats; it attempts to match |a| again but fails; it skips |a|,
-% and finds out that this state has already been seen at this position
-% in the query: the match stops. The capturing group is (wrongly) |a|.
-% What went wrong is that a thread collided with itself, and the later
-% version, which has gone through the group one more times with an empty
-% match, should have a higher priority than not going through the group.
-%
-% We solve this by distinguishing \enquote{normal} free transitions
-% \cs{@@_action_free:n} from transitions
-% \cs{@@_action_free_group:n} which go back to the start of the
-% group. The former will keep threads unless they have been visited by a
-% \enquote{completed} thread, while the latter kind of transition also
-% prevents going back to a state visited by the current thread.
-%
-% \subsubsection{Variables used when matching}
-%
-% \begin{variable}
-% {
-% \l_@@_min_pos_int,
-% \l_@@_max_pos_int,
-% \l_@@_current_pos_int,
-% \l_@@_start_pos_int,
-% \l_@@_success_pos_int,
-% }
-% The tokens in the query are indexed from \texttt{min_pos} for the
-% first to $\texttt{max_pos}-1$ for the last, and their information is
-% stored in several arrays and \tn{toks} registers with those numbers. We
-% don't start from $0$ because the \tn{toks} registers with low
-% numbers are used to hold the states of the \textsc{nfa}. We match
-% without backtracking, keeping all threads in lockstep at the
-% \texttt{current_pos} in the query. The starting point of the current
-% match attempt is \texttt{start_pos}, and \texttt{success_pos},
-% updated whenever a thread succeeds, is used as the next starting
-% position.
-% \begin{macrocode}
-\int_new:N \l_@@_min_pos_int
-\int_new:N \l_@@_max_pos_int
-\int_new:N \l_@@_current_pos_int
-\int_new:N \l_@@_start_pos_int
-\int_new:N \l_@@_success_pos_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}
-% {
-% \l_@@_current_char_int,
-% \l_@@_current_catcode_int,
-% \l_@@_last_char_int,
-% \l_@@_case_changed_char_int
-% }
-% The character and category codes of the token at the current
-% position; the character code of the token at the previous position;
-% and the character code of the result of changing the case of the
-% current token (|A-Z|$\leftrightarrow$|a-z|). This last integer is
-% only computed when necessary, and is otherwise \cs{c_max_int}. The
-% \texttt{current_char} variable is also used in various other phases
-% to hold a character code.
-% \begin{macrocode}
-\int_new:N \l_@@_current_char_int
-\int_new:N \l_@@_current_catcode_int
-\int_new:N \l_@@_last_char_int
-\int_new:N \l_@@_case_changed_char_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_current_state_int}
-% For every character in the token list, each of the active states is
-% considered in turn. The variable \cs{l_@@_current_state_int}
-% holds the state of the \textsc{nfa} which is currently considered:
-% transitions are then given as shifts relative to the current state.
-% \begin{macrocode}
-\int_new:N \l_@@_current_state_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}
-% {\l_@@_current_submatches_prop, \l_@@_success_submatches_prop}
-% The submatches for the thread which is currently active are stored
-% in the \texttt{current_submatches} property list variable. This
-% property list is stored by \cs{@@_action_cost:n} into the
-% \tn{toks} register for the target state of the transition, to be
-% retrieved when matching at the next position. When a thread
-% succeeds, this property list is copied to
-% \cs{l_@@_success_submatches_prop}: only the last successful thread
-% will remain there.
-% \begin{macrocode}
-\prop_new:N \l_@@_current_submatches_prop
-\prop_new:N \l_@@_success_submatches_prop
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_step_int}
-% This integer, always even, is increased every time a character in
-% the query is read, and not reset when doing multiple matches. We
-% store in \cs{g_@@_state_active_intarray} the last step in which each
-% \meta{state} in the \textsc{nfa} was encountered. This lets us break
-% infinite loops by not visiting the same state twice in the same
-% step. In fact, the step we store is equal to \texttt{step} when we
-% have started performing the operations of \tn{toks}\meta{state}, but
-% not finished yet. However, once we finish, we store
-% $\text{\texttt{step}}+1$ in \cs{g_@@_state_active_intarray}. This is
-% needed to track submatches
-% properly (see building phase). The \texttt{step} is also used to
-% attach each set of submatch information to a given iteration (and
-% automatically discard it when it corresponds to a past step).
-% \begin{macrocode}
-\int_new:N \l_@@_step_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_min_active_int, \l_@@_max_active_int}
-% All the currently active threads are kept in order of precedence in
-% \cs{g_@@_thread_state_intarray}, and the corresponding submatches in the
-% \tn{toks}. For our purposes, those serve as an array, indexed from
-% \texttt{min_active} (inclusive) to \texttt{max_active} (excluded).
-% At the start of every step, the whole array is unpacked, so that the
-% space can immediately be reused, and \texttt{max_active} is reset to
-% \texttt{min_active}, effectively clearing the array.
-% \begin{macrocode}
-\int_new:N \l_@@_min_active_int
-\int_new:N \l_@@_max_active_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\g_@@_state_active_intarray, \g_@@_thread_state_intarray}
-% \cs{g_@@_state_active_intarray} stores the last \meta{step} in which
-% each \meta{state} was active. \cs{g_@@_thread_state_intarray} stores
-% threads that will be considered in the next step, more precisely the
-% states in which these threads are.
-% \begin{macrocode}
-\__intarray_new:Nn \g_@@_state_active_intarray { 65536 }
-\__intarray_new:Nn \g_@@_thread_state_intarray { 65536 }
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_every_match_tl}
-% Every time a match is found, this token list is used. For single
-% matching, the token list is empty. For multiple matching, the token
-% list is set to repeat the matching, after performing some operation
-% which depends on the user function. See \cs{@@_single_match:} and
-% \cs{@@_multi_match:n}.
-% \begin{macrocode}
-\tl_new:N \l_@@_every_match_tl
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_fresh_thread_bool, \l_@@_empty_success_bool}
-% \begin{macro}[aux]{\@@_if_two_empty_matches:F}
-% When doing multiple matches, we need to avoid infinite loops where
-% each iteration matches the same empty token list. When an empty
-% token list is matched, the next successful match of the same empty
-% token list is suppressed. We detect empty matches by setting
-% \cs{l_@@_fresh_thread_bool} to \texttt{true} for threads which
-% directly come from the start of the regex or from the |\K| command,
-% and testing that boolean whenever a thread succeeds. The function
-% \cs{@@_if_two_empty_matches:F} is redefined at every match
-% attempt, depending on whether the previous match was empty or not:
-% if it was, then the function must cancel a purported success if it
-% is empty and at the same spot as the previous match; otherwise, we
-% definitely don't have two identical empty matches, so the function
-% is \cs{use:n}.
-% \begin{macrocode}
-\bool_new:N \l_@@_fresh_thread_bool
-\bool_new:N \l_@@_empty_success_bool
-\cs_new_eq:NN \@@_if_two_empty_matches:F \use:n
-% \end{macrocode}
-% \end{macro}
-% \end{variable}
-%
-% \begin{variable}
-% {
-% \g_@@_success_bool,
-% \l_@@_saved_success_bool,
-% \l_@@_match_success_bool
-% }
-% The boolean \cs{l_@@_match_success_bool} is true if the current
-% match attempt was successful, and \cs{g_@@_success_bool} is true
-% if there was at least one successful match. This is the only global
-% variable in this whole module, but we would need it to be local when
-% matching a control sequence with |\c{...}|. This is done by saving
-% the global variable into \cs{l_@@_saved_success_bool}, which is
-% local, hence not affected by the changes due to inner regex
-% functions.
-% \begin{macrocode}
-\bool_new:N \g_@@_success_bool
-\bool_new:N \l_@@_saved_success_bool
-\bool_new:N \l_@@_match_success_bool
-% \end{macrocode}
-% \end{variable}
-%
-% \subsubsection{Matching: framework}
-%
-% \begin{macro}[int]{\@@_match:n}
-% First store the query into \tn{toks} registers and arrays (see
-% \cs{@@_query_set:nnn}). Then initialize the variables that should
-% be set once for each user function (even for multiple
-% matches). Namely, the overall matching is not yet successful; none of
-% the states should be marked as visited (\cs{g_@@_state_active_intarray}), and
-% we start at step $0$; we pretend that there was a previous match
-% ending at the start of the query, which was not empty (to avoid
-% smothering an empty match at the start). Once all this is set up, we
-% are ready for the ride. Find the first match.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_match:n #1
- {
-%<trace> \trace_push:nnx { regex } { 1 } { @@_match }
-%<trace> \trace:nnx { regex } { 1 } { analyzing~query~token~list }
- \int_zero:N \l_@@_balance_int
- \int_set:Nn \l_@@_current_pos_int { 2 * \l_@@_max_state_int }
- \@@_query_set:nnn { } { -1 } { -2 }
- \int_set_eq:NN \l_@@_min_pos_int \l_@@_current_pos_int
- \__tl_analysis_map_inline:nn {#1}
- { \@@_query_set:nnn {##1} {"##2} {##3} }
- \int_set_eq:NN \l_@@_max_pos_int \l_@@_current_pos_int
- \@@_query_set:nnn { } { -1 } { -2 }
-%<trace> \trace:nnx { regex } { 1 } { initializing }
- \bool_gset_false:N \g_@@_success_bool
- \int_step_inline:nnnn
- \l_@@_min_state_int { 1 } { \l_@@_max_state_int - 1 }
- { \__intarray_gset_fast:Nnn \g_@@_state_active_intarray {##1} { 1 } }
- \int_set_eq:NN \l_@@_min_active_int \l_@@_max_state_int
- \int_zero:N \l_@@_step_int
- \int_set_eq:NN \l_@@_success_pos_int \l_@@_min_pos_int
- \int_set:Nn \l_@@_min_submatch_int
- { 2 * \l_@@_max_state_int }
- \int_set_eq:NN \l_@@_submatch_int \l_@@_min_submatch_int
- \bool_set_false:N \l_@@_empty_success_bool
- \@@_match_once:
-%<trace> \trace_pop:nnx { regex } { 1 } { @@_match }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_match_once:}
-% This function finds one match, then does some action defined by the
-% \texttt{every_match} token list, which may recursively call
-% \cs{@@_match_once:}. First initialize some variables: set the
-% conditional which detects identical empty matches; this match
-% attempt starts at the previous \texttt{success_pos}, is not yet
-% successful, and has no submatches yet; clear the array of active
-% threads, and put the starting state $0$ in it. We are then almost
-% ready to read our first token in the query, but we actually start
-% one position earlier than the start, and \texttt{get} that token, so
-% that the \texttt{last_char} will be set properly for word
-% boundaries. Then call \cs{@@_match_loop:}, which runs through the
-% query until the end or until a successful match breaks early.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_match_once:
- {
- \if_meaning:w \c_true_bool \l_@@_empty_success_bool
- \cs_set:Npn \@@_if_two_empty_matches:F
- { \int_compare:nNnF \l_@@_start_pos_int = \l_@@_current_pos_int }
- \else:
- \cs_set_eq:NN \@@_if_two_empty_matches:F \use:n
- \fi:
- \int_set_eq:NN \l_@@_start_pos_int \l_@@_success_pos_int
- \bool_set_false:N \l_@@_match_success_bool
- \prop_clear:N \l_@@_current_submatches_prop
- \int_set_eq:NN \l_@@_max_active_int \l_@@_min_active_int
- \@@_store_state:n { \l_@@_min_state_int }
- \int_set:Nn \l_@@_current_pos_int
- { \l_@@_start_pos_int - 1 }
- \@@_query_get:
- \@@_match_loop:
- \l_@@_every_match_tl
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_single_match:, \@@_multi_match:n}
-% For a single match, the overall success is determined by whether the
-% only match attempt is a success. When doing multiple matches, the
-% overall matching is successful as soon as any match
-% succeeds. Perform the action |#1|, then find the next match.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_single_match:
- {
- \tl_set:Nn \l_@@_every_match_tl
- { \bool_gset_eq:NN \g_@@_success_bool \l_@@_match_success_bool }
- }
-\cs_new_protected:Npn \@@_multi_match:n #1
- {
- \tl_set:Nn \l_@@_every_match_tl
- {
- \if_meaning:w \c_true_bool \l_@@_match_success_bool
- \bool_gset_true:N \g_@@_success_bool
- #1
- \exp_after:wN \@@_match_once:
- \fi:
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_match_loop:}
-% \begin{macro}[aux, rEXP]{\@@_match_one_active:n}
-% At each new position, set some variables and get the new character
-% and category from the query. Then unpack the array of active
-% threads, and clear it by resetting its length
-% (\texttt{max_active}). This results in a sequence of
-% \cs{@@_use_state_and_submatches:nn} \Arg{state} \Arg{prop}, and
-% we consider those states one by one in order. As soon as a thread
-% succeeds, exit the step, and, if there are threads to consider at the
-% next position, and we have not reached the end of the string,
-% repeat the loop. Otherwise, the last thread that succeeded is what
-% \cs{@@_match_once:} matches. We explain the \texttt{fresh_thread}
-% business when describing \cs{@@_action_wildcard:}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_match_loop:
- {
- \int_add:Nn \l_@@_step_int { 2 }
- \int_incr:N \l_@@_current_pos_int
- \int_set_eq:NN \l_@@_last_char_int \l_@@_current_char_int
- \int_set_eq:NN \l_@@_case_changed_char_int \c_max_int
- \@@_query_get:
- \use:x
- {
- \int_set_eq:NN \l_@@_max_active_int \l_@@_min_active_int
- \int_step_function:nnnN
- { \l_@@_min_active_int }
- { 1 }
- { \l_@@_max_active_int - 1 }
- \@@_match_one_active:n
- }
- \__prg_break_point:
- \bool_set_false:N \l_@@_fresh_thread_bool %^^A was arg of break_point:n
- \if_int_compare:w \l_@@_max_active_int > \l_@@_min_active_int
- \if_int_compare:w \l_@@_current_pos_int < \l_@@_max_pos_int
- \exp_after:wN \exp_after:wN \exp_after:wN \@@_match_loop:
- \fi:
- \fi:
- }
-\cs_new:Npn \@@_match_one_active:n #1
- {
- \@@_use_state_and_submatches:nn
- { \__intarray_item_fast:Nn \g_@@_thread_state_intarray {#1} }
- { \@@_toks_use:w #1 }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_query_set:nnn}
-% The arguments are: tokens that \texttt{o} and \texttt{x} expand to
-% one token of the query, the catcode, and the character code. Store
-% those, and the current brace balance (used later to check for
-% overall brace balance) in a \tn{toks} register and some arrays,
-% then update the \texttt{balance}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_query_set:nnn #1#2#3
- {
- \__intarray_gset_fast:Nnn \g_@@_charcode_intarray
- { \l_@@_current_pos_int } {#3}
- \__intarray_gset_fast:Nnn \g_@@_catcode_intarray
- { \l_@@_current_pos_int } {#2}
- \__intarray_gset_fast:Nnn \g_@@_balance_intarray
- { \l_@@_current_pos_int } { \l_@@_balance_int }
- \@@_toks_set:Nn \l_@@_current_pos_int {#1}
- \int_incr:N \l_@@_current_pos_int
- \if_case:w #2 \exp_stop_f:
- \or: \int_incr:N \l_@@_balance_int
- \or: \int_decr:N \l_@@_balance_int
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_query_get:}
-% Extract the current character and category codes at the current
-% position from the appropriate arrays.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_query_get:
- {
- \l_@@_current_char_int
- = \__intarray_item_fast:Nn \g_@@_charcode_intarray
- { \l_@@_current_pos_int } \scan_stop:
- \l_@@_current_catcode_int
- = \__intarray_item_fast:Nn \g_@@_catcode_intarray
- { \l_@@_current_pos_int } \scan_stop:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Using states of the \textsc{nfa}}
-%
-% \begin{macro}[int]{\@@_use_state:}
-% Use the current \textsc{nfa} instruction. The state is initially
-% marked as belonging to the current \texttt{step}: this allows normal
-% free transition to repeat, but group-repeating transitions
-% won't. Once we are done exploring all the branches it spawned, the
-% state is marked as $\texttt{step}+1$: any thread hitting it at that
-% point will be terminated.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_use_state:
- {
-%<*trace>
- \trace:nnx { regex } { 2 } { state~\int_use:N \l_@@_current_state_int }
-%</trace>
- \__intarray_gset_fast:Nnn \g_@@_state_active_intarray
- { \l_@@_current_state_int } { \l_@@_step_int }
- \@@_toks_use:w \l_@@_current_state_int
- \__intarray_gset_fast:Nnn \g_@@_state_active_intarray
- { \l_@@_current_state_int } { \l_@@_step_int + 1 }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_use_state_and_submatches:nn}
-% This function is called as one item in the array of active threads
-% after that array has been unpacked for a new step. Update the
-% \texttt{current_state} and \texttt{current_submatches} and use the
-% state if it has not yet been encountered at this step.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_use_state_and_submatches:nn #1 #2
- {
- \int_set:Nn \l_@@_current_state_int {#1}
- \if_int_compare:w
- \__intarray_item_fast:Nn \g_@@_state_active_intarray
- { \l_@@_current_state_int }
- < \l_@@_step_int
- \tl_set:Nn \l_@@_current_submatches_prop {#2}
- \exp_after:wN \@@_use_state:
- \fi:
- \scan_stop:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Actions when matching}
-%
-% \begin{macro}[int]{\@@_action_start_wildcard:}
-% For an unanchored match, state $0$ has a free transition to the next
-% and a costly one to itself, to repeat at the next position. To catch
-% repeated identical empty matches, we need to know if a successful
-% thread corresponds to an empty match. The instruction resetting
-% \cs{l_@@_fresh_thread_bool} may be skipped by a successful
-% thread, hence we had to add it to \cs{@@_match_loop:} too.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_action_start_wildcard:
- {
- \bool_set_true:N \l_@@_fresh_thread_bool
- \@@_action_free:n {1}
- \bool_set_false:N \l_@@_fresh_thread_bool
- \@@_action_cost:n {0}
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_action_free:n, \@@_action_free_group:n}
-% \begin{macro}[aux]{\@@_action_free_aux:nn}
-% These functions copy a thread after checking that the \textsc{nfa}
-% state has not already been used at this position. If not, store
-% submatches in the new state, and insert the instructions for that
-% state in the input stream. Then restore the old value of
-% \cs{l_@@_current_state_int} and of the current submatches. The
-% two types of free transitions differ by how they test that the state
-% has not been encountered yet: the \texttt{group} version is
-% stricter, and will not use a state if it was used earlier in the
-% current thread, hence forcefully breaking the loop, while the
-% \enquote{normal} version will revisit a state when within the thread
-% itself.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_action_free:n
- { \@@_action_free_aux:nn { > \l_@@_step_int \else: } }
-\cs_new_protected:Npn \@@_action_free_group:n
- { \@@_action_free_aux:nn { < \l_@@_step_int } }
-\cs_new_protected:Npn \@@_action_free_aux:nn #1#2
- {
- \use:x
- {
- \int_add:Nn \l_@@_current_state_int {#2}
- \exp_not:n
- {
- \if_int_compare:w
- \__intarray_item_fast:Nn \g_@@_state_active_intarray
- { \l_@@_current_state_int }
- #1
- \exp_after:wN \@@_use_state:
- \fi:
- }
- \int_set:Nn \l_@@_current_state_int
- { \int_use:N \l_@@_current_state_int }
- \tl_set:Nn \exp_not:N \l_@@_current_submatches_prop
- { \exp_not:o \l_@@_current_submatches_prop }
- }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_action_cost:n}
-% A transition which consumes the current character and shifts the
-% state by |#1|. The resulting state is stored in the appropriate array
-% for use at the next position, and we also store the current
-% submatches.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_action_cost:n #1
- {
- \exp_args:No \@@_store_state:n
- { \__int_value:w \__int_eval:w \l_@@_current_state_int + #1 }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_store_state:n}
-% \begin{macro}[aux]{\@@_store_submatches:}
-% Put the given state in \cs{g_@@_thread_state_intarray}, and increment
-% the length of the array. Also store the current submatch in the
-% appropriate \tn{toks}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_store_state:n #1
- {
- \@@_store_submatches:
- \__intarray_gset_fast:Nnn \g_@@_thread_state_intarray
- { \l_@@_max_active_int } {#1}
- \int_incr:N \l_@@_max_active_int
- }
-\cs_new_protected:Npn \@@_store_submatches:
- {
- \@@_toks_set:No \l_@@_max_active_int
- { \l_@@_current_submatches_prop }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_disable_submatches:}
-% Some user functions don't require tracking submatches.
-% We get a performance improvement by simply defining the
-% relevant functions to remove their argument and do nothing
-% with it.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_disable_submatches:
- {
- \cs_set_protected:Npn \@@_store_submatches: { }
- \cs_set_protected:Npn \@@_action_submatch:n ##1 { }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_action_submatch:n}
-% Update the current submatches with the information from the current
-% position. Maybe a bottleneck.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_action_submatch:n #1
- {
- \prop_put:Nno \l_@@_current_submatches_prop {#1}
- { \int_use:N \l_@@_current_pos_int }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_action_success:}
-% There is a successful match when an execution path reaches the last
-% state in the \textsc{nfa}, unless this marks a second identical
-% empty match. Then mark that there was a successful match; it is
-% empty if it is \enquote{fresh}; and we store the current position
-% and submatches. The current step is then interrupted with
-% \cs{__prg_break:}, and only paths with higher precedence are
-% pursued further. The values stored here may be overwritten by a
-% later success of a path with higher precedence.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_action_success:
- {
- \@@_if_two_empty_matches:F
- {
- \bool_set_true:N \l_@@_match_success_bool
- \bool_set_eq:NN \l_@@_empty_success_bool
- \l_@@_fresh_thread_bool
- \int_set_eq:NN \l_@@_success_pos_int \l_@@_current_pos_int
- \prop_set_eq:NN \l_@@_success_submatches_prop
- \l_@@_current_submatches_prop
- \__prg_break:
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsection{Replacement}
-%
-% \subsubsection{Variables and helpers used in replacement}
-%
-% \begin{variable}{\l_@@_replacement_csnames_int}
-% The behaviour of closing braces inside a replacement text depends on
-% whether a sequences |\c{| or |\u{| has been encountered. The number
-% of \enquote{open} such sequences that should be closed by |}| is
-% stored in \cs{l_@@_replacement_csnames_int}, and decreased by
-% $1$ by each |}|.
-% \begin{macrocode}
-\int_new:N \l_@@_replacement_csnames_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_replacement_category_tl, \l_@@_replacement_category_seq}
-% This sequence of letters is used to correctly restore categories in
-% nested constructions such as |\cL(abc\cD(_)d)|.
-% \begin{macrocode}
-\tl_new:N \l_@@_replacement_category_tl
-\seq_new:N \l_@@_replacement_category_seq
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_balance_tl}
-% This token list holds the replacement text for
-% \cs{@@_replacement_balance_one_match:n} while it is being built
-% incrementally.
-% \begin{macrocode}
-\tl_new:N \l_@@_balance_tl
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{macro}[aux, rEXP]{\@@_replacement_balance_one_match:n}
-% This expects as an argument the first index of a set of entries in
-% \cs{g_@@_submatch_begin_intarray} (and related arrays) which hold the
-% submatch information for a given match. It
-% can be used within an integer expression to obtain the brace balance
-% incurred by performing the replacement on that match. This combines
-% the braces lost by removing the match, braces added by all the
-% submatches appearing in the replacement, and braces appearing
-% explicitly in the replacement. Even though it is always redefined
-% before use, we initialize it as for an empty replacement. An
-% important property is that concatenating several calls to that
-% function must result in a valid integer expression (hence a leading
-% |+| in the actual definition).
-% \begin{macrocode}
-\cs_new:Npn \@@_replacement_balance_one_match:n #1
- { - \@@_submatch_balance:n {#1} }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux, rEXP]{\@@_replacement_do_one_match:n}
-% The input is the same as \cs{@@_replacement_balance_one_match:n}.
-% This function is redefined to expand to the part of the token list
-% from the end of the previous match to a given match, followed by the
-% replacement text. Hence concatenating the result of this function
-% with all possible arguments (one call for each match), as well as
-% the range from the end of the last match to the end of the string,
-% will produce the fully replaced token list. The initialization does
-% not matter, but (as an example) we set it as for an empty replacement.
-% \begin{macrocode}
-\cs_new:Npn \@@_replacement_do_one_match:n #1
- {
- \@@_query_range:nn
- { \__intarray_item_fast:Nn \g_@@_submatch_prev_intarray {#1} }
- { \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray {#1} }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_exp_not:N}
-% This function lets us navigate around the fact that the primitive
-% \cs{exp_not:n} requires a braced argument. As far as I can tell, it
-% is only needed if the user tries to include in the replacement text
-% a control sequence set equal to a macro parameter character, such as
-% \cs{c_parameter_token}. Indeed, within an \texttt{x}-expanding
-% assignment, \cs{exp_not:N}~|#| behaves as a single |#|, whereas
-% \cs{exp_not:n}~|{#}| behaves as a doubled |##|.
-% \begin{macrocode}
-\cs_new:Npn \@@_replacement_exp_not:N #1 { \exp_not:n {#1} }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Query and brace balance}
-%
-% \begin{macro}[int, rEXP]{\@@_query_range:nn}
-% \begin{macro}[aux, rEXP]{\@@_query_range_loop:ww}
-% When it is time to extract submatches from the token list, the
-% various tokens are stored in \tn{toks} registers numbered from
-% \cs{l_@@_min_pos_int} inclusive to \cs{l_@@_max_pos_int}
-% exclusive. The function \cs{@@_query_range:nn} \Arg{min}
-% \Arg{max} unpacks registers from the position \meta{min} to the
-% position $\meta{max}-1$ included. Once this is expanded, a second
-% \texttt{x}-expansion will result in the actual tokens from the
-% query. That second expansion is only done by user functions at the
-% very end of their operation, after checking (and correcting) the
-% brace balance first.
-% \begin{macrocode}
-\cs_new:Npn \@@_query_range:nn #1#2
- {
- \exp_after:wN \@@_query_range_loop:ww
- \__int_value:w \__int_eval:w #1 \exp_after:wN ;
- \__int_value:w \__int_eval:w #2 ;
- \__prg_break_point:
- }
-\cs_new:Npn \@@_query_range_loop:ww #1 ; #2 ;
- {
- \if_int_compare:w #1 < #2 \exp_stop_f:
- \else:
- \exp_after:wN \__prg_break:
- \fi:
- \@@_toks_use:w #1 \exp_stop_f:
- \exp_after:wN \@@_query_range_loop:ww
- \__int_value:w \__int_eval:w #1 + 1 ; #2 ;
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_query_submatch:n}
-% Find the start and end positions for a given submatch (of a given match).
-% \begin{macrocode}
-\cs_new:Npn \@@_query_submatch:n #1
- {
- \@@_query_range:nn
- { \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray {#1} }
- { \__intarray_item_fast:Nn \g_@@_submatch_end_intarray {#1} }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[rEXP]{\@@_submatch_balance:n}
-% Every user function must result in a balanced token list (unbalanced
-% token lists cannot be stored by TeX). When we unpacked the query, we
-% kept track of the brace balance, hence the contribution from a given
-% range is the difference between the brace balances at the
-% \meta{max~pos} and \meta{min~pos}. These two positions are found in
-% the corresponding \enquote{submatch} arrays.
-%^^A todo: understand when these int_compare are needed
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_submatch_balance:n #1
- {
- \__int_eval:w
- \int_compare:nNnTF
- { \__intarray_item_fast:Nn \g_@@_submatch_end_intarray {#1} } = 0
- { 0 }
- {
- \__intarray_item_fast:Nn \g_@@_balance_intarray
- { \__intarray_item_fast:Nn \g_@@_submatch_end_intarray {#1} }
- }
- -
- \int_compare:nNnTF
- { \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray {#1} } = 0
- { 0 }
- {
- \__intarray_item_fast:Nn \g_@@_balance_intarray
- { \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray {#1} }
- }
- \__int_eval_end:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Framework}
-%
-% \begin{macro}[int]{\@@_replacement:n}
-% \begin{macro}[aux]{\@@_replacement_aux:n}
-% The replacement text is built incrementally by abusing \tn{toks}
-% within a group (see \pkg{l3tl-build}). We keep track in
-% \cs{l_@@_balance_int} of the balance of explicit begin- and
-% end-group tokens and \cs{l_@@_balance_tl} will consist of some
-% code to compute the brace balance from submatches (see its
-% description). Detect unescaped right braces, and escaped characters,
-% with trailing \cs{prg_do_nothing:} because some of the later
-% function look-ahead. Once the whole replacement text has been
-% parsed, make sure that there is no open csname. Finally, define the
-% \texttt{balance_one_match} and \texttt{do_one_match} functions.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replacement:n #1
- {
-%<trace> \trace_push:nnn { regex } { 1 } { @@_replacement:n }
- \__tl_build:Nw \l_@@_internal_a_tl
- \int_zero:N \l_@@_balance_int
- \tl_clear:N \l_@@_balance_tl
- \@@_escape_use:nnnn
- {
- \if_charcode:w \c_right_brace_str ##1
- \@@_replacement_rbrace:N
- \else:
- \@@_replacement_normal:n
- \fi:
- ##1
- }
- { \@@_replacement_escaped:N ##1 }
- { \@@_replacement_normal:n ##1 }
- {#1}
- \prg_do_nothing: \prg_do_nothing:
- \if_int_compare:w \l_@@_replacement_csnames_int > 0 \exp_stop_f:
- \__msg_kernel_error:nnx { regex } { replacement-missing-rbrace }
- { \int_use:N \l_@@_replacement_csnames_int }
- \__tl_build_one:x
- { \prg_replicate:nn \l_@@_replacement_csnames_int \cs_end: }
- \fi:
- \seq_if_empty:NF \l_@@_replacement_category_seq
- {
- \__msg_kernel_error:nnx { regex } { replacement-missing-rparen }
- { \seq_count:N \l_@@_replacement_category_seq }
- \seq_clear:N \l_@@_replacement_category_seq
- }
- \cs_gset:Npx \@@_replacement_balance_one_match:n ##1
- {
- + \int_use:N \l_@@_balance_int
- \l_@@_balance_tl
- - \@@_submatch_balance:n {##1}
- }
- \__tl_build_end:
- \exp_args:No \@@_replacement_aux:n \l_@@_internal_a_tl
-%<trace> \trace_pop:nnn { regex } { 1 } { @@_replacement:n }
- }
-\cs_new_protected:Npn \@@_replacement_aux:n #1
- {
- \cs_set:Npn \@@_replacement_do_one_match:n ##1
- {
- \@@_query_range:nn
- { \__intarray_item_fast:Nn \g_@@_submatch_prev_intarray {##1} }
- { \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray {##1} }
- #1
- }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_normal:n}
-% Most characters are simply sent to the output by
-% \cs{__tl_build_one:n}, unless a particular category code has been
-% requested: then \cs{@@_replacement_c_A:w} or a similar auxiliary is
-% called. One exception is right parentheses, which restore the
-% category code in place before the group started. Note that the
-% sequence is non-empty there: it contains an empty entry
-% corresponding to the initial value of
-% \cs{l_@@_replacement_category_tl}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replacement_normal:n #1
- {
- \tl_if_empty:NTF \l_@@_replacement_category_tl
- { \__tl_build_one:n {#1} }
- { % (
- \token_if_eq_charcode:NNTF #1 )
- {
- \seq_pop:NN \l_@@_replacement_category_seq
- \l_@@_replacement_category_tl
- }
- {
- \use:c { @@_replacement_c_ \l_@@_replacement_category_tl :w }
- \@@_replacement_normal:n {#1}
- }
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_escaped:N}
-% As in parsing a regular expression, we use an auxiliary built from
-% |#1| if defined. Otherwise, check for escaped digits (standing from
-% submatches from $0$ to $9$): anything else is a raw character.
-% We use \cs{token_to_str:N} to give spaces the right category code.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replacement_escaped:N #1
- {
- \cs_if_exist_use:cF { @@_replacement_#1:w }
- {
- \if_int_compare:w 1 < 1#1 \exp_stop_f:
- \@@_replacement_put_submatch:n {#1}
- \else:
- \exp_args:No \@@_replacement_normal:n
- { \token_to_str:N #1 }
- \fi:
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Submatches}
-%
-% \begin{macro}[aux]{\@@_replacement_put_submatch:n}
-% Insert a submatch in the replacement text. This is dropped if the
-% submatch number is larger than the number of capturing groups.
-% Unless the submatch appears inside a |\c{...}| or |\u{...}|
-% construction, it must be taken into account in the brace balance.
-% Here, |##1| will receive a pointer to the $0$-th submatch for a
-% given match. We cannot use \cs{int_eval:n} because it is
-% expandable, and would be expanded too early (short of adding
-% \cs{exp_not:N}, making the code messy again).
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replacement_put_submatch:n #1
- {
- \if_int_compare:w #1 < \l_@@_capturing_group_int
- \__tl_build_one:n { \@@_query_submatch:n { #1 + ##1 } }
- \if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
- \tl_put_right:Nn \l_@@_balance_tl
- { + \@@_submatch_balance:n { \__int_eval:w #1+##1 \__int_eval_end: } }
- \fi:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_g:w}
-% \begin{macro}[aux,rEXP]{\@@_replacement_g_digits:NN}
-% Grab digits for the |\g| escape sequence in a primitive assignment
-% to the integer \cs{l_@@_internal_a_int}. At the end of the run of
-% digits, check that it ends with a right brace.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replacement_g:w #1#2
- {
- \str_if_eq_x:nnTF { #1#2 } { \@@_replacement_normal:n \c_left_brace_str }
- { \l_@@_internal_a_int = \@@_replacement_g_digits:NN }
- { \@@_replacement_error:NNN g #1 #2 }
- }
-\cs_new:Npn \@@_replacement_g_digits:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \@@_replacement_normal:n
- {
- \if_int_compare:w 1 < 1#2 \exp_stop_f:
- #2
- \exp_after:wN \use_i:nnn
- \exp_after:wN \@@_replacement_g_digits:NN
- \else:
- \exp_stop_f:
- \exp_after:wN \@@_replacement_error:NNN
- \exp_after:wN g
- \fi:
- }
- {
- \exp_stop_f:
- \if_meaning:w \@@_replacement_rbrace:N #1
- \exp_args:No \@@_replacement_put_submatch:n
- { \int_use:N \l_@@_internal_a_int }
- \exp_after:wN \use_none:nn
- \else:
- \exp_after:wN \@@_replacement_error:NNN
- \exp_after:wN g
- \fi:
- }
- #1 #2
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \subsubsection{Csnames in replacement}
-%
-% \begin{macro}[aux]{\@@_replacement_c:w}
-% |\c| may only be followed by an unescaped character. If followed by
-% a left brace, start a control sequence by calling an auxiliary
-% common with |\u|. Otherwise test whether the category is known; if
-% it is not, complain.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replacement_c:w #1#2
- {
- \token_if_eq_meaning:NNTF #1 \@@_replacement_normal:n
- {
- \exp_after:wN \token_if_eq_charcode:NNTF \c_left_brace_str #2
- { \@@_replacement_cu_aux:Nw \@@_replacement_exp_not:N }
- {
- \cs_if_exist:cTF { @@_replacement_c_#2:w }
- { \@@_replacement_cat:NNN #2 }
- { \@@_replacement_error:NNN c #1#2 }
- }
- }
- { \@@_replacement_error:NNN c #1#2 }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_cu_aux:Nw}
-% Start a control sequence with \cs{cs:w}, which will be protected
-% from expansion by |#1| (either \cs{@@_replacement_exp_not:N} or
-% \cs{exp_not:V}), or turned to a string by \cs{tl_to_str:V} if inside
-% another csname construction |\c| or |\u|. We use \cs{tl_to_str:V}
-% rather than \cs{tl_to_str:N} to deal with integers and other
-% registers.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replacement_cu_aux:Nw #1
- {
- \if_case:w \l_@@_replacement_csnames_int
- \__tl_build_one:n { \exp_not:n { \exp_after:wN #1 \cs:w } }
- \else:
- \__tl_build_one:n { \exp_not:n { \exp_after:wN \tl_to_str:V \cs:w } }
- \fi:
- \int_incr:N \l_@@_replacement_csnames_int
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_u:w}
-% Check that |\u| is followed by a left brace. If so, start a control
-% sequence with \cs{cs:w}, which is then unpacked either with
-% \cs{exp_not:V} or \cs{tl_to_str:V} depending on the current context.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replacement_u:w #1#2
- {
- \str_if_eq_x:nnTF { #1#2 } { \@@_replacement_normal:n \c_left_brace_str }
- { \@@_replacement_cu_aux:Nw \exp_not:V }
- { \@@_replacement_error:NNN u #1#2 }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_rbrace:N}
-% Within a |\c{...}| or |\u{...}| construction, end the control
-% sequence, and decrease the brace count. Otherwise, this is a raw
-% right brace.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replacement_rbrace:N #1
- {
- \if_int_compare:w \l_@@_replacement_csnames_int > 0 \exp_stop_f:
- \__tl_build_one:n \cs_end:
- \int_decr:N \l_@@_replacement_csnames_int
- \else:
- \@@_replacement_normal:n {#1}
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Characters in replacement}
-%
-% \begin{macro}[aux]{\@@_replacement_cat:NNN}
-% Here, |#1| is a letter among |BEMTPUDSLOA| and |#2#3| denote the
-% next character. Complain if we reach the end of the replacement or
-% if the construction appears inside |\c{|\ldots{}|}| or
-% |\u{|\ldots{}|}|, and detect the case of a parenthesis. In that
-% case, store the current category in a sequence and switch to a new
-% one.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replacement_cat:NNN #1#2#3
- {
- \token_if_eq_meaning:NNTF \prg_do_nothing: #3
- { \__msg_kernel_error:nn { regex } { replacement-catcode-end } }
- {
- \int_compare:nNnTF { \l_@@_replacement_csnames_int } > 0
- {
- \__msg_kernel_error:nnnn
- { regex } { replacement-catcode-in-cs } {#1} {#3}
- #2 #3
- }
- {
- \str_if_eq:nnTF { #2 #3 } { \@@_replacement_normal:n ( } % )
- {
- \seq_push:NV \l_@@_replacement_category_seq
- \l_@@_replacement_category_tl
- \tl_set:Nn \l_@@_replacement_category_tl {#1}
- }
- { \use:c { @@_replacement_c_#1:w } #2 #3 }
- }
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% We will need to change the category code of the null character many
-% times, hence work in a group. The catcode-specific macros below are
-% defined in alphabetical order; if you are trying to understand the
-% code, start from the end of the alphabet as those categories are
-% simpler than active or begin-group.
-% \begin{macrocode}
-\group_begin:
-% \end{macrocode}
-%
-% \begin{macro}[aux]{\@@_replacement_char:nNN}
-% The only way to produce an arbitrary character--catcode pair is to
-% use the \tn{lowercase} or \tn{uppercase} primitives. This is a
-% wrapper for our purposes. The first argument is the null character
-% with various catcodes. The second and third arguments are grabbed
-% from the input stream: |#3| is the character whose character code to
-% reproduce. We could use \cs{char_generate:nn} but only for some
-% catcodes (active characters and spaces are not supported).
-% \begin{macrocode}
- \cs_new_protected:Npn \@@_replacement_char:nNN #1#2#3
- {
- \tex_lccode:D 0 = `#3 \scan_stop:
- \tex_lowercase:D { \__tl_build_one:n {#1} }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_A:w}
-% For an active character, expansion must be avoided, twice because we
-% later do two \texttt{x}-expansions, to unpack \tn{toks} for the
-% query, and to expand their contents to tokens of the query.
-% \begin{macrocode}
- \char_set_catcode_active:N \^^@
- \cs_new_protected:Npn \@@_replacement_c_A:w
- { \@@_replacement_char:nNN { \exp_not:n { \exp_not:N ^^@ } } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_B:w}
-% An explicit begin-group token increases the balance, unless within a
-% |\c{...}| or |\u{...}| construction. Add the desired begin-group
-% character, using the standard \cs{if_false:} trick. We eventually
-% \texttt{x}-expand twice. The first time must yield a balanced token
-% list, and the second one gives the bare begin-group token. The
-% \cs{exp_after:wN} is not strictly needed, but is more consistent
-% with \pkg{l3tl-analysis}.
-% \begin{macrocode}
- \char_set_catcode_group_begin:N \^^@
- \cs_new_protected:Npn \@@_replacement_c_B:w
- {
- \if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
- \int_incr:N \l_@@_balance_int
- \fi:
- \@@_replacement_char:nNN
- { \exp_not:n { \exp_after:wN ^^@ \if_false: } \fi: } }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_C:w}
-% This is not quite catcode-related: when the user requests a
-% character with category \enquote{control sequence}, the
-% one-character control symbol is returned. As for the active
-% character, we prepare for two \texttt{x}-expansions.
-% \begin{macrocode}
- \cs_new_protected:Npn \@@_replacement_c_C:w #1#2
- { \__tl_build_one:n { \exp_not:N \exp_not:N \exp_not:c {#2} } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_D:w}
-% Subscripts fit the mould: \tn{lowercase} the null byte with the
-% correct category.
-% \begin{macrocode}
- \char_set_catcode_math_subscript:N \^^@
- \cs_new_protected:Npn \@@_replacement_c_D:w
- { \@@_replacement_char:nNN { ^^@ } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_E:w}
-% Similar to the begin-group case, the second \texttt{x}-expansion
-% produces the bare end-group token.
-% \begin{macrocode}
- \char_set_catcode_group_end:N \^^@
- \cs_new_protected:Npn \@@_replacement_c_E:w
- {
- \if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
- \int_decr:N \l_@@_balance_int
- \fi:
- \@@_replacement_char:nNN
- { \exp_not:n { \if_false: { \fi: ^^@ } }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_L:w}
-% Simply \tn{lowercase} a letter null byte to produce an arbitrary letter.
-% \begin{macrocode}
- \char_set_catcode_letter:N \^^@
- \cs_new_protected:Npn \@@_replacement_c_L:w
- { \@@_replacement_char:nNN { ^^@ } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_M:w}
-% No surprise here, we lowercase the null math toggle.
-% \begin{macrocode}
- \char_set_catcode_math_toggle:N \^^@
- \cs_new_protected:Npn \@@_replacement_c_M:w
- { \@@_replacement_char:nNN { ^^@ } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_O:w}
-% Lowercase an other null byte.
-% \begin{macrocode}
- \char_set_catcode_other:N \^^@
- \cs_new_protected:Npn \@@_replacement_c_O:w
- { \@@_replacement_char:nNN { ^^@ } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_P:w}
-% For macro parameters, expansion is a tricky issue. We need to
-% prepare for two \texttt{x}-expansions and passing through various
-% macro definitions. Note that we cannot replace one \cs{exp_not:n} by
-% doubling the macro parameter characters because this would misbehave
-% if a mischievous user asks for |\c{\cP\#}|, since that macro
-% parameter character would be doubled.
-% \begin{macrocode}
- \char_set_catcode_parameter:N \^^@
- \cs_new_protected:Npn \@@_replacement_c_P:w
- {
- \@@_replacement_char:nNN
- { \exp_not:n { \exp_not:n { ^^@^^@^^@^^@ } } }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_S:w}
-% Spaces are normalized on input by \TeX{} to have character code
-% $32$. It is in fact impossible to get a token with character code
-% $0$ and category code $10$. Hence we use $32$ instead of $0$ as our
-% base character.
-% \begin{macrocode}
- \cs_new_protected:Npn \@@_replacement_c_S:w #1#2
- {
- \if_int_compare:w `#2 = 0 \exp_stop_f:
- \__msg_kernel_error:nn { regex } { replacement-null-space }
- \fi:
- \tex_lccode:D `\ = `#2 \scan_stop:
- \tex_lowercase:D { \__tl_build_one:n {~} }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_T:w}
-% No surprise for alignment tabs here. Those are surrounded by the
-% appropriate braces whenever necessary, hence they don't cause
-% trouble in alignment settings.
-% \begin{macrocode}
- \char_set_catcode_alignment:N \^^@
- \cs_new_protected:Npn \@@_replacement_c_T:w
- { \@@_replacement_char:nNN { ^^@ } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replacement_c_U:w}
-% Simple call to \cs{@@_replacement_char:nNN} which lowercases the
-% math superscript |^^@|.
-% \begin{macrocode}
- \char_set_catcode_math_superscript:N \^^@
- \cs_new_protected:Npn \@@_replacement_c_U:w
- { \@@_replacement_char:nNN { ^^@ } }
-% \end{macrocode}
-% \end{macro}
-%
-% Restore the catcode of the null byte.
-% \begin{macrocode}
-\group_end:
-% \end{macrocode}
-%
-% \subsubsection{An error}
-%
-% \begin{macro}[aux]{\@@_replacement_error:NNN}
-% Simple error reporting by calling one of the messages
-% \texttt{replacement-c}, \texttt{replacement-g}, or
-% \texttt{replacement-u}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replacement_error:NNN #1#2#3
- {
- \__msg_kernel_error:nnx { regex } { replacement-#1 } {#3}
- #2 #3
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsection{User functions}
-%
-% \begin{macro}{\regex_new:N}
-% Before being assigned a sensible value, a regex variable matches
-% nothing.
-% \begin{macrocode}
-\cs_new_protected:Npn \regex_new:N #1
- { \cs_new_eq:NN #1 \c_@@_no_match_regex }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}{\regex_set:Nn, \regex_gset:Nn, \regex_const:Nn}
-% Compile, then store the result in the user variable with the
-% appropriate assignment function.
-% \begin{macrocode}
-\cs_new_protected:Npn \regex_set:Nn #1#2
- {
- \@@_compile:n {#2}
- \tl_set_eq:NN #1 \l_@@_internal_regex
- }
-\cs_new_protected:Npn \regex_gset:Nn #1#2
- {
- \@@_compile:n {#2}
- \tl_gset_eq:NN #1 \l_@@_internal_regex
- }
-\cs_new_protected:Npn \regex_const:Nn #1#2
- {
- \@@_compile:n {#2}
- \tl_const:Nx #1 { \exp_not:o \l_@@_internal_regex }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}{\regex_show:N, \regex_show:n}
-% User functions: the \texttt{n} variant requires compilation first.
-% Then show the variable with some appropriate text. The auxiliary
-% \cs{@@_show:Nx} is defined in a different section.
-% \begin{macrocode}
-\cs_new_protected:Npn \regex_show:n #1
- {
- \@@_compile:n {#1}
- \@@_show:Nn \l_@@_internal_regex
- { { \tl_to_str:n {#1} } }
- }
-\cs_new_protected:Npn \regex_show:N #1
- { \@@_show:Nn #1 { variable~\token_to_str:N #1 } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[TF]{\regex_match:nn, \regex_match:Nn}
-% Those conditionals are based on a common auxiliary defined
-% later. Its first argument builds the \textsc{nfa} corresponding to
-% the regex, and the second argument is the query token list. Once we
-% have performed the match, convert the resulting boolean to
-% \cs{prg_return_true:} or \texttt{false}.
-% \begin{macrocode}
-\prg_new_protected_conditional:Npnn \regex_match:nn #1#2 { T , F , TF }
- {
- \@@_if_match:nn { \@@_build:n {#1} } {#2}
- \@@_return:
- }
-\prg_new_protected_conditional:Npnn \regex_match:Nn #1#2 { T , F , TF }
- {
- \@@_if_match:nn { \@@_build:N #1 } {#2}
- \@@_return:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}{\regex_count:nnN, \regex_count:NnN}
-% Again, use an auxiliary whose first argument builds the \textsc{nfa}.
-% \begin{macrocode}
-\cs_new_protected:Npn \regex_count:nnN #1
- { \@@_count:nnN { \@@_build:n {#1} } }
-\cs_new_protected:Npn \regex_count:NnN #1
- { \@@_count:nnN { \@@_build:N #1 } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}
-% {
-% \regex_extract_once:nnN, \regex_extract_once:NnN,
-% \regex_extract_all:nnN, \regex_extract_all:NnN,
-% \regex_replace_once:nnN, \regex_replace_once:NnN,
-% \regex_replace_all:nnN, \regex_replace_all:NnN,
-% \regex_split:nnN, \regex_split:NnN
-% }
-% \begin{macro}[TF]
-% {
-% \regex_extract_once:nnN, \regex_extract_once:NnN,
-% \regex_extract_all:nnN, \regex_extract_all:NnN,
-% \regex_replace_once:nnN, \regex_replace_once:NnN,
-% \regex_replace_all:nnN, \regex_replace_all:NnN,
-% \regex_split:nnN, \regex_split:NnN
-% }
-% We define here $40$ user functions, following a common pattern in
-% terms of \texttt{:nnN} auxiliaries, defined in the coming
-% subsections. The auxiliary is handed \cs{@@_build:n} or
-% \cs{@@_build:N} with the appropriate regex argument, then all
-% other necessary arguments (replacement text, token list, \emph{etc.}
-% The conditionals call \cs{@@_return:} to return either
-% \texttt{true} or \texttt{false} once matching has been performed.
-% \begin{macrocode}
-\cs_set_protected:Npn \@@_tmp:w #1#2#3
- {
- \cs_new_protected:Npn #2 ##1 { #1 { \@@_build:n {##1} } }
- \cs_new_protected:Npn #3 ##1 { #1 { \@@_build:N ##1 } }
- \prg_new_protected_conditional:Npnn #2 ##1##2##3 { T , F , TF }
- { #1 { \@@_build:n {##1} } {##2} ##3 \@@_return: }
- \prg_new_protected_conditional:Npnn #3 ##1##2##3 { T , F , TF }
- { #1 { \@@_build:N ##1 } {##2} ##3 \@@_return: }
- }
-\@@_tmp:w \@@_extract_once:nnN
- \regex_extract_once:nnN \regex_extract_once:NnN
-\@@_tmp:w \@@_extract_all:nnN
- \regex_extract_all:nnN \regex_extract_all:NnN
-\@@_tmp:w \@@_replace_once:nnN
- \regex_replace_once:nnN \regex_replace_once:NnN
-\@@_tmp:w \@@_replace_all:nnN
- \regex_replace_all:nnN \regex_replace_all:NnN
-\@@_tmp:w \@@_split:nnN \regex_split:nnN \regex_split:NnN
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \subsubsection{Variables and helpers for user functions}
-%
-% \begin{variable}{\l_@@_match_count_int}
-% The number of matches found so far is stored
-% in \cs{l_@@_match_count_int}. This is only used
-% in the \cs{regex_count:nnN} functions.
-% \begin{macrocode}
-\int_new:N \l_@@_match_count_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{@@_begin, @@_end}
-% Those flags are raised to indicate extra begin-group
-% or end-group tokens when extracting submatches.
-% \begin{macrocode}
-\flag_new:n { @@_begin }
-\flag_new:n { @@_end }
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_min_submatch_int, \l_@@_submatch_int, \l_@@_zeroth_submatch_int}
-% The end-points of each submatch are stored in two arrays whose index \meta{submatch} ranges
-% from \cs{l_@@_min_submatch_int} (inclusive) to
-% \cs{l_@@_submatch_int} (exclusive). Each successful match comes
-% with a $0$-th submatch (the full match), and one match for each
-% capturing group: submatches corresponding to the last successful
-% match are labelled starting at \texttt{zeroth_submatch}. The entry
-% \cs{l_@@_zeroth_submatch_int} in \cs{g_@@_submatch_prev_intarray} holds
-% the position at which that match attempt started: this is used for
-% splitting and replacements.
-% \begin{macrocode}
-\int_new:N \l_@@_min_submatch_int
-\int_new:N \l_@@_submatch_int
-\int_new:N \l_@@_zeroth_submatch_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\g_@@_submatch_prev_intarray, \g_@@_submatch_begin_intarray, \g_@@_submatch_end_intarray}
-% Hold the place where the match attempt begun and the end-points of each submatch.
-% \begin{macrocode}
-\__intarray_new:Nn \g_@@_submatch_prev_intarray { 65536 }
-\__intarray_new:Nn \g_@@_submatch_begin_intarray { 65536 }
-\__intarray_new:Nn \g_@@_submatch_end_intarray { 65536 }
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{macro}[aux]{\@@_return:}
-% This function triggers either \cs{prg_return_false:} or
-% \cs{prg_return_true:} as appropriate to whether a match was found or
-% not. It is used by all user conditionals.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_return:
- {
- \if_meaning:w \c_true_bool \g_@@_success_bool
- \prg_return_true:
- \else:
- \prg_return_false:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Matching}
-%
-% \begin{macro}[aux]{\@@_if_match:nn}
-% We don't track submatches, and stop after a single match. Build the
-% \textsc{nfa} with |#1|, and perform the match on the query |#2|.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_if_match:nn #1#2
- {
- \group_begin:
- \@@_disable_submatches:
- \@@_single_match:
- #1
- \@@_match:n {#2}
- \group_end:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_count:nnN}
-% Again, we don't care about submatches. Instead of aborting after the
-% first \enquote{longest match} is found, we search for multiple
-% matches, incrementing \cs{l_@@_match_count_int} every time to
-% record the number of matches. Build the \textsc{nfa} and match. At
-% the end, store the result in the user's variable.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_count:nnN #1#2#3
- {
- \group_begin:
- \@@_disable_submatches:
- \int_zero:N \l_@@_match_count_int
- \@@_multi_match:n { \int_incr:N \l_@@_match_count_int }
- #1
- \@@_match:n {#2}
- \exp_args:NNNo
- \group_end:
- \int_set:Nn #3 { \int_use:N \l_@@_match_count_int }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Extracting submatches}
-%
-% \begin{macro}[aux]{\@@_extract_once:nnN, \@@_extract_all:nnN}
-% Match once or multiple times. After each match (or after the only
-% match), extract the submatches using \cs{@@_extract:}. At the
-% end, store the sequence containing all the submatches into the user
-% variable |#3| after closing the group.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_extract_once:nnN #1#2#3
- {
- \group_begin:
- \@@_single_match:
- #1
- \@@_match:n {#2}
- \@@_extract:
- \@@_group_end_extract_seq:N #3
- }
-\cs_new_protected:Npn \@@_extract_all:nnN #1#2#3
- {
- \group_begin:
- \@@_multi_match:n { \@@_extract: }
- #1
- \@@_match:n {#2}
- \@@_group_end_extract_seq:N #3
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_split:nnN}
-% Splitting at submatches is a bit more tricky. For each match,
-% extract all submatches, and replace the zeroth submatch by the part
-% of the query between the start of the match attempt and the start of
-% the zeroth submatch. This is inhibited if the delimiter matched an
-% empty token list at the start of this match attempt. After the last
-% match, store the last part of the token list, which ranges from the
-% start of the match attempt to the end of the query. This step is
-% inhibited if the last match was empty and at the very end: decrement
-% \cs{l_@@_submatch_int}, which controls which matches will be used.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_split:nnN #1#2#3
- {
- \group_begin:
- \@@_multi_match:n
- {
- \if_int_compare:w \l_@@_start_pos_int < \l_@@_success_pos_int
- \@@_extract:
- \__intarray_gset_fast:Nnn \g_@@_submatch_prev_intarray
- { \l_@@_zeroth_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g_@@_submatch_end_intarray
- { \l_@@_zeroth_submatch_int }
- {
- \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray
- { \l_@@_zeroth_submatch_int }
- }
- \__intarray_gset_fast:Nnn \g_@@_submatch_begin_intarray
- { \l_@@_zeroth_submatch_int }
- { \l_@@_start_pos_int }
- \fi:
- }
- #1
- \@@_match:n {#2}
-%<assert>\assert_int:n { \l_@@_current_pos_int = \l_@@_max_pos_int }
- \__intarray_gset_fast:Nnn \g_@@_submatch_prev_intarray
- { \l_@@_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g_@@_submatch_end_intarray
- { \l_@@_submatch_int }
- { \l_@@_max_pos_int }
- \__intarray_gset_fast:Nnn \g_@@_submatch_begin_intarray
- { \l_@@_submatch_int }
- { \l_@@_start_pos_int }
- \int_incr:N \l_@@_submatch_int
- \if_meaning:w \c_true_bool \l_@@_empty_success_bool
- \if_int_compare:w \l_@@_start_pos_int = \l_@@_max_pos_int
- \int_decr:N \l_@@_submatch_int
- \fi:
- \fi:
- \@@_group_end_extract_seq:N #3
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_group_end_extract_seq:N}
-% The end-points of submatches are stored as entries of two arrays
-% from \cs{l_@@_min_submatch_int} to
-% \cs{l_@@_submatch_int} (exclusive). Extract the relevant ranges
-% into \cs{l_@@_internal_a_tl}. We detect unbalanced results using
-% the two flags \texttt{@@_begin} and \texttt{@@_end}, raised
-% whenever we see too many begin-group or end-group tokens in a
-% submatch. We disable \cs{__seq_item:n} to prevent two
-% \texttt{x}-expansions.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_group_end_extract_seq:N #1
- {
- \cs_set_eq:NN \__seq_item:n \scan_stop:
- \flag_clear:n { @@_begin }
- \flag_clear:n { @@_end }
- \tl_set:Nx \l_@@_internal_a_tl
- {
- \s__seq
- \int_step_function:nnnN
- { \l_@@_min_submatch_int }
- { 1 }
- { \l_@@_submatch_int - 1 }
- \@@_extract_seq_aux:n
- }
- \int_compare:nNnF
- { \flag_height:n { @@_begin } + \flag_height:n { @@_end } }
- = 0
- {
- \__msg_kernel_error:nnxxx { regex } { result-unbalanced }
- { splitting~or~extracting~submatches }
- { \flag_height:n { @@_end } }
- { \flag_height:n { @@_begin } }
- }
- \use:x
- {
- \group_end:
- \tl_set:Nn \exp_not:N #1 { \l_@@_internal_a_tl }
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux, EXP]{\@@_extract_seq_aux:n, \@@_extract_seq_aux:ww}
-% The \texttt{:n} auxiliary builds one item of the sequence of
-% submatches. First compute the brace balance of the submatch, then
-% extract the submatch from the query, adding the appropriate braces
-% and raising a flag if the submatch is not balanced.
-% \begin{macrocode}
-\cs_new:Npn \@@_extract_seq_aux:n #1
- {
- \__seq_item:n
- {
- \exp_after:wN \@@_extract_seq_aux:ww
- \__int_value:w \@@_submatch_balance:n {#1} ; #1;
- }
- }
-\cs_new:Npn \@@_extract_seq_aux:ww #1; #2;
- {
- \if_int_compare:w #1 < 0 \exp_stop_f:
- \flag_raise:n { @@_end }
- \prg_replicate:nn {-#1} { \exp_not:n { { \if_false: } \fi: } }
- \fi:
- \@@_query_submatch:n {#2}
- \if_int_compare:w #1 > 0 \exp_stop_f:
- \flag_raise:n { @@_begin }
- \prg_replicate:nn {#1} { \exp_not:n { \if_false: { \fi: } } }
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]
-% {\@@_extract:, \@@_extract_b:wn, \@@_extract_e:wn}
-% Our task here is to extract from the property list
-% \cs{l_@@_success_submatches_prop} the list of end-points of
-% submatches, and store them in appropriate array entries, from
-% \cs{l_@@_zeroth_submatch_int} upwards. We begin by emptying those
-% entries. Then for each \meta{key}--\meta{value} pair in
-% the property list update the appropriate entry. This
-% is somewhat a hack: the \meta{key} is a non-negative integer
-% followed by |<| or |>|, which we use in a comparison to $-1$. At the
-% end, store the information about the position at which the match
-% attempt started, in \cs{g_@@_submatch_prev_intarray}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_extract:
- {
- \if_meaning:w \c_true_bool \g_@@_success_bool
- \int_set_eq:NN \l_@@_zeroth_submatch_int \l_@@_submatch_int
- \prg_replicate:nn \l_@@_capturing_group_int
- {
- \__intarray_gset_fast:Nnn \g_@@_submatch_begin_intarray
- { \l_@@_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g_@@_submatch_end_intarray
- { \l_@@_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g_@@_submatch_prev_intarray
- { \l_@@_submatch_int } { 0 }
- \int_incr:N \l_@@_submatch_int
- }
- \prop_map_inline:Nn \l_@@_success_submatches_prop
- {
- \if_int_compare:w ##1 - 1 \exp_stop_f:
- \exp_after:wN \@@_extract_e:wn \__int_value:w
- \else:
- \exp_after:wN \@@_extract_b:wn \__int_value:w
- \fi:
- \__int_eval:w \l_@@_zeroth_submatch_int + ##1 {##2}
- }
- \__intarray_gset_fast:Nnn \g_@@_submatch_prev_intarray
- { \l_@@_zeroth_submatch_int } { \l_@@_start_pos_int }
- \fi:
- }
-\cs_new_protected:Npn \@@_extract_b:wn #1 < #2
- { \__intarray_gset_fast:Nnn \g_@@_submatch_begin_intarray {#1} {#2} }
-\cs_new_protected:Npn \@@_extract_e:wn #1 > #2
- { \__intarray_gset_fast:Nnn \g_@@_submatch_end_intarray {#1} {#2} }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Replacement}
-%
-% \begin{macro}[aux]{\@@_replace_once:nnN}
-% Build the \textsc{nfa} and the replacement functions, then find a
-% single match. If the match failed, simply exit the
-% group. Otherwise, we do the replacement. Extract submatches. Compute
-% the brace balance corresponding to replacing this match by the
-% replacement (this depends on submatches). Prepare the replaced token
-% list: the replacement function produces the tokens from the start of
-% the query to the start of the match and the replacement text for
-% this match; we need to add the tokens from the end of the match to
-% the end of the query. Finally, store the result in the user's
-% variable after closing the group: this step involves an additional
-% \texttt{x}-expansion, and checks that braces are balanced in the
-% final result.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replace_once:nnN #1#2#3
- {
- \group_begin:
- \@@_single_match:
- #1
- \@@_replacement:n {#2}
- \exp_args:No \@@_match:n { #3 }
- \if_meaning:w \c_false_bool \g_@@_success_bool
- \group_end:
- \else:
- \@@_extract:
- \int_set:Nn \l_@@_balance_int
- {
- \@@_replacement_balance_one_match:n
- { \l_@@_zeroth_submatch_int }
- }
- \tl_set:Nx \l_@@_internal_a_tl
- {
- \@@_replacement_do_one_match:n { \l_@@_zeroth_submatch_int }
- \@@_query_range:nn
- {
- \__intarray_item_fast:Nn \g_@@_submatch_end_intarray
- { \l_@@_zeroth_submatch_int }
- }
- { \l_@@_max_pos_int }
- }
- \@@_group_end_replace:N #3
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_replace_all:nnN}
-% Match multiple times, and for every match, extract submatches and
-% additionally store the position at which the match attempt started.
-% The entries from \cs{l_@@_min_submatch_int} to
-% \cs{l_@@_submatch_int} hold information about submatches of every
-% match in order; each match corresponds to
-% \cs{l_@@_capturing_group_int} consecutive entries.
-% Compute the brace balance corresponding to doing all the
-% replacements: this is the sum of brace balances for replacing each
-% match. Join together the replacement texts for each match (including
-% the part of the query before the match), and the end of the query.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_replace_all:nnN #1#2#3
- {
- \group_begin:
- \@@_multi_match:n { \@@_extract: }
- #1
- \@@_replacement:n {#2}
- \exp_args:No \@@_match:n {#3}
- \int_set:Nn \l_@@_balance_int
- {
- 0
- \int_step_function:nnnN
- { \l_@@_min_submatch_int }
- \l_@@_capturing_group_int
- { \l_@@_submatch_int - 1 }
- \@@_replacement_balance_one_match:n
- }
- \tl_set:Nx \l_@@_internal_a_tl
- {
- \int_step_function:nnnN
- { \l_@@_min_submatch_int }
- \l_@@_capturing_group_int
- { \l_@@_submatch_int - 1 }
- \@@_replacement_do_one_match:n
- \@@_query_range:nn
- \l_@@_start_pos_int \l_@@_max_pos_int
- }
- \@@_group_end_replace:N #3
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\@@_group_end_replace:N}
-% If the brace balance is not $0$, raise an error. Then set the user's
-% variable |#1| to the \texttt{x}-expansion of
-% \cs{l_@@_internal_a_tl}, adding the appropriate braces to produce
-% a balanced result. And end the group.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_group_end_replace:N #1
- {
- \if_int_compare:w \l_@@_balance_int = 0 \exp_stop_f:
- \else:
- \__msg_kernel_error:nnxxx { regex } { result-unbalanced }
- { replacing }
- { \int_max:nn { - \l_@@_balance_int } { 0 } }
- { \int_max:nn { \l_@@_balance_int } { 0 } }
- \fi:
- \use:x
- {
- \group_end:
- \tl_set:Nn \exp_not:N #1
- {
- \if_int_compare:w \l_@@_balance_int < 0 \exp_stop_f:
- \prg_replicate:nn { - \l_@@_balance_int }
- { { \if_false: } \fi: }
- \fi:
- \l_@@_internal_a_tl
- \if_int_compare:w \l_@@_balance_int > 0 \exp_stop_f:
- \prg_replicate:nn { \l_@@_balance_int }
- { \if_false: { \fi: } }
- \fi:
- }
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsubsection{Storing and showing compiled patterns}
-%
-% \subsection{Messages}
-%
-% Messages for the preparsing phase.
-% \begin{macrocode}
-\__msg_kernel_new:nnnn { regex } { trailing-backslash }
- { Trailing~escape~character~'\iow_char:N\\'. }
- {
- A~regular~expression~or~its~replacement~text~ends~with~
- the~escape~character~'\iow_char:N\\'.~It~will~be~ignored.
- }
-\__msg_kernel_new:nnnn { regex } { x-missing-rbrace }
- { Missing~closing~brace~in~'\iow_char:N\\x'~hexadecimal~sequence. }
- {
- You~wrote~something~like~
- '\iow_char:N\\x\{...#1'.~
- The~closing~brace~is~missing.
- }
-\__msg_kernel_new:nnnn { regex } { x-overflow }
- { Character~code~'#1'~too~large~in~'\iow_char:N\\x'~hexadecimal~sequence. }
- {
- You~wrote~something~like~
- '\iow_char:N\\x\{\int_to_Hex:n{#1}\}'.~
- The~character~code~#1~is~larger~than~
- the~maximum~value~\int_use:N \c_max_char_int.
- }
-% \end{macrocode}
-%
-% Invalid quantifier.
-% \begin{macrocode}
-\__msg_kernel_new:nnnn { regex } { invalid-quantifier }
- { Braced~quantifier~'#1'~may~not~be~followed~by~'#2'. }
- {
- The~character~'#2'~is~invalid~in~the~braced~quantifier~'#1'.~
- The~only~valid~quantifiers~are~'*',~'?',~'+',~'{<int>}',~
- '{<min>,}'~and~'{<min>,<max>}',~optionally~followed~by~'?'.
- }
-% \end{macrocode}
-%
-% Messages for missing or extra closing brackets and parentheses, with
-% some fancy singular/plural handling for the case of parentheses.
-% \begin{macrocode}
-\__msg_kernel_new:nnnn { regex } { missing-rbrack }
- { Missing~right~bracket~inserted~in~regular~expression. }
- {
- LaTeX~was~given~a~regular~expression~where~a~character~class~
- was~started~with~'[',~but~the~matching~']'~is~missing.
- }
-\__msg_kernel_new:nnnn { regex } { missing-rparen }
- {
- Missing~right~
- \int_compare:nTF { #1 = 1 } { parenthesis } { parentheses } ~
- inserted~in~regular~expression.
- }
- {
- LaTeX~was~given~a~regular~expression~with~\int_eval:n {#1} ~
- more~left~parentheses~than~right~parentheses.
- }
-\__msg_kernel_new:nnnn { regex } { extra-rparen }
- { Extra~right~parenthesis~ignored~in~regular~expression. }
- {
- LaTeX~came~across~a~closing~parenthesis~when~no~submatch~group~
- was~open.~The~parenthesis~will~be~ignored.
- }
-% \end{macrocode}
-%
-% Some escaped alphanumerics are not allowed everywhere.
-% \begin{macrocode}
-\__msg_kernel_new:nnnn { regex } { bad-escape }
- {
- Invalid~escape~'\iow_char:N\\#1'~
- \@@_if_in_cs:TF { within~a~control~sequence. }
- {
- \@@_if_in_class:TF
- { in~a~character~class. }
- { following~a~category~test. }
- }
- }
- {
- The~escape~sequence~'\iow_char:N\\#1'~may~not~appear~
- \@@_if_in_cs:TF
- {
- within~a~control~sequence~test~introduced~by~
- '\iow_char:N\\c\iow_char:N\{'.
- }
- {
- \@@_if_in_class:TF
- { within~a~character~class~ }
- { following~a~category~test~such~as~'\iow_char:N\\cL'~ }
- because~it~does~not~match~exactly~one~character.
- }
- }
-% \end{macrocode}
-%
-% Range errors.
-% \begin{macrocode}
-\__msg_kernel_new:nnnn { regex } { range-missing-end }
- { Invalid~end-point~for~range~'#1-#2'~in~character~class. }
- {
- The~end-point~'#2'~of~the~range~'#1-#2'~may~not~serve~as~an~
- end-point~for~a~range:~alphanumeric~characters~should~not~be~
- escaped,~and~non-alphanumeric~characters~should~be~escaped.
- }
-\__msg_kernel_new:nnnn { regex } { range-backwards }
- { Range~'[#1-#2]'~out~of~order~in~character~class. }
- {
- In~ranges~of~characters~'[x-y]'~appearing~in~character~classes,~
- the~first~character~code~must~not~be~larger~than~the~second.~
- Here,~'#1'~has~character~code~\int_eval:n {`#1},~while~
- '#2'~has~character~code~\int_eval:n {`#2}.
- }
-% \end{macrocode}
-%
-% Errors related to |\c| and |\u|.
-% \begin{macrocode}
-\__msg_kernel_new:nnnn { regex } { c-bad-mode }
- { Invalid~nested~'\iow_char:N\\c'~escape~in~regular~expression. }
- {
- The~'\iow_char:N\\c'~escape~cannot~be~used~within~
- a~control~sequence~test~'\iow_char:N\\c{...}'.~
- To~combine~several~category~tests,~use~'\iow_char:N\\c[...]'.
- }
-\__msg_kernel_new:nnnn { regex } { c-missing-rbrace }
- { Missing~right~brace~inserted~for~'\iow_char:N\\c'~escape. }
- {
- LaTeX~was~given~a~regular~expression~where~a~
- '\iow_char:N\\c\iow_char:N\{...'~construction~was~not~ended~
- with~a~closing~brace~'\iow_char:N\}'.
- }
-\__msg_kernel_new:nnnn { regex } { c-missing-rbrack }
- { Missing~right~bracket~inserted~for~'\iow_char:N\\c'~escape. }
- {
- A~construction~'\iow_char:N\\c[...'~appears~in~a~
- regular~expression,~but~the~closing~']'~is~not~present.
- }
-\__msg_kernel_new:nnnn { regex } { c-missing-category }
- { Invalid~character~'#1'~following~'\iow_char:N\\c'~escape. }
- {
- In~regular~expressions,~the~'\iow_char:N\\c'~escape~sequence~
- may~only~be~followed~by~a~left~brace,~a~left~bracket,~or~a~
- capital~letter~representing~a~character~category,~namely~
- one~of~'ABCDELMOPSTU'.
- }
-\__msg_kernel_new:nnnn { regex } { c-trailing }
- { Trailing~category~code~escape~'\iow_char:N\\c'... }
- {
- A~regular~expression~ends~with~'\iow_char:N\\c'~followed~
- by~a~letter.~It~will~be~ignored.
- }
-\__msg_kernel_new:nnnn { regex } { u-missing-lbrace }
- { Missing~left~brace~following~'\iow_char:N\\u'~escape. }
- {
- The~'\iow_char:N\\u'~escape~sequence~must~be~followed~by~
- a~brace~group~with~the~name~of~the~variable~to~use.
- }
-\__msg_kernel_new:nnnn { regex } { u-missing-rbrace }
- { Missing~right~brace~inserted~for~'\iow_char:N\\u'~escape. }
- {
- LaTeX~
- \str_if_eq_x:nnTF { } {#2}
- { reached~the~end~of~the~string~ }
- { encountered~an~escaped~alphanumeric~character '\iow_char:N\\#2'~ }
- when~parsing~the~argument~of~an~'\iow_char:N\\u\iow_char:N\{...\}'~escape.
- }
-% \end{macrocode}
-%
-% Errors when encountering the \textsc{posix} syntax |[:...:]|.
-% \begin{macrocode}
-\__msg_kernel_new:nnnn { regex } { posix-unsupported }
- { POSIX~collating~element~'[#1 ~ #1]'~not~supported. }
- {
- The~'[.foo.]'~and~'[=bar=]'~syntaxes~have~a~special~meaning~
- in~POSIX~regular~expressions.~This~is~not~supported~by~LaTeX.~
- Maybe~you~forgot~to~escape~a~left~bracket~in~a~character~class?
- }
-\__msg_kernel_new:nnnn { regex } { posix-unknown }
- { POSIX~class~'[:#1:]'~unknown. }
- {
- '[:#1:]'~is~not~among~the~known~POSIX~classes~
- '[:alnum:]',~'[:alpha:]',~'[:ascii:]',~'[:blank:]',~
- '[:cntrl:]',~'[:digit:]',~'[:graph:]',~'[:lower:]',~
- '[:print:]',~'[:punct:]',~'[:space:]',~'[:upper:]',~
- '[:word:]',~and~'[:xdigit:]'.
- }
-\__msg_kernel_new:nnnn { regex } { posix-missing-close }
- { Missing~closing~':]'~for~POSIX~class. }
- { The~POSIX~syntax~'#1'~must~be~followed~by~':]',~not~'#2'. }
-% \end{macrocode}
-%
-% In various cases, the result of a \pkg{l3regex} operation can leave us
-% with an unbalanced token list, which we must re-balance by adding
-% begin-group or end-group character tokens.
-% \begin{macrocode}
-\__msg_kernel_new:nnnn { regex } { result-unbalanced }
- { Missing~brace~inserted~when~#1. }
- {
- LaTeX~was~asked~to~do~some~regular~expression~operation,~
- and~the~resulting~token~list~would~not~have~the~same~number~
- of~begin-group~and~end-group~tokens.~Braces~were~inserted:~
- #2~left,~#3~right.
- }
-% \end{macrocode}
-%
-% Error message for unknown options.
-% \begin{macrocode}
-\__msg_kernel_new:nnnn { regex } { unknown-option }
- { Unknown~option~'#1'~for~regular~expressions. }
- {
- The~only~available~option~is~'case-insensitive',~toggled~by~
- '(?i)'~and~'(?-i)'.
- }
-\__msg_kernel_new:nnnn { regex } { special-group-unknown }
- { Unknown~special~group~'#1~...'~in~a~regular~expression. }
- {
- The~only~valid~constructions~starting~with~'(?'~are~
- '(?:~...~)',~'(?|~...~)',~'(?i)',~and~'(?-i)'.
- }
-% \end{macrocode}
-%
-% Errors in the replacement text.
-% \begin{macrocode}
-\__msg_kernel_new:nnnn { regex } { replacement-c }
- { Misused~'\iow_char:N\\c'~command~in~a~replacement~text. }
- {
- In~a~replacement~text,~the~'\iow_char:N\\c'~escape~sequence~
- can~be~followed~by~one~of~the~letters~'ABCDELMOPSTU'~
- or~a~brace~group,~not~by~'#1'.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-u }
- { Misused~'\iow_char:N\\u'~command~in~a~replacement~text. }
- {
- In~a~replacement~text,~the~'\iow_char:N\\u'~escape~sequence~
- must~be~~followed~by~a~brace~group~holding~the~name~of~the~
- variable~to~use.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-g }
- {
- Missing~brace~for~the~'\iow_char:N\\g'~construction~
- in~a~replacement~text.
- }
- {
- In~the~replacement~text~for~a~regular~expression~search,~
- submatches~are~represented~either~as~'\iow_char:N \\g{dd..d}',~
- or~'\\d',~where~'d'~are~single~digits.~Here,~a~brace~is~missing.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-catcode-end }
- {
- Missing~character~for~the~'\iow_char:N\\c<category><character>'~
- construction~in~a~replacement~text.
- }
- {
- In~a~replacement~text,~the~'\iow_char:N\\c'~escape~sequence~
- can~be~followed~by~one~of~the~letters~'ABCDELMOPSTU'~representing~
- the~character~category.~Then,~a~character~must~follow.~LaTeX~
- reached~the~end~of~the~replacement~when~looking~for~that.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-catcode-in-cs }
- {
- Category~code~'\iow_char:N\\c#1#3'~ignored~inside~
- '\iow_char:N\\c\{...\}'~in~a~replacement~text.
- }
- {
- In~a~replacement~text,~the~category~codes~of~the~argument~of~
- '\iow_char:N\\c\{...\}'~are~ignored~when~building~the~control~
- sequence~name.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-null-space }
- { TeX~cannot~build~a~space~token~with~character~code~0. }
- {
- You~asked~for~a~character~token~with~category~space,~
- and~character~code~0,~for~instance~through~
- '\iow_char:N\\cS\iow_char:N\\x00'.~
- This~specific~case~is~impossible~and~will~be~replaced~
- by~a~normal~space.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-missing-rbrace }
- { Missing~right~brace~inserted~in~replacement~text. }
- {
- There~ \int_compare:nTF { #1 = 1 } { was } { were } ~ #1~
- missing~right~\int_compare:nTF { #1 = 1 } { brace } { braces } .
- }
-\__msg_kernel_new:nnnn { regex } { replacement-missing-rparen }
- { Missing~right~parenthesis~inserted~in~replacement~text. }
- {
- There~ \int_compare:nTF { #1 = 1 } { was } { were } ~ #1~
- missing~right~\int_compare:nTF { #1 = 1 } { parenthesis } { parentheses } .
- }
-% \end{macrocode}
-%
-% \begin{macro}[aux]{\@@_msg_repeated:nnN}
-% This is not technically a message, but seems related enough to go
-% there. The arguments are: |#1| is the minimum number of repetitions;
-% |#2| is the number of allowed extra repetitions ($-1$ for infinite
-% number), and |#3| tells us about lazyness.
-% \begin{macrocode}
-\cs_new:Npn \@@_msg_repeated:nnN #1#2#3
- {
- \str_if_eq_x:nnF { #1 #2 } { 1 0 }
- {
- , ~ repeated ~
- \int_case:nnF {#2}
- {
- { -1 } { #1~or~more~times,~\bool_if:NTF #3 { lazy } { greedy } }
- { 0 } { #1~times }
- }
- {
- between~#1~and~\int_eval:n {#1+#2}~times,~
- \bool_if:NTF #3 { lazy } { greedy }
- }
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsection{Code for tracing}
-%
-% The tracing code is still very experimental, and is meant to be used
-% with the \pkg{l3trace} package, currently in \texttt{l3trial}.
-%
-% \begin{macro}[int]{\@@_trace_states:n}
-% This function lists the contents of all states of the \textsc{nfa},
-% stored in \tn{toks} from $0$ to \cs{l_@@_max_state_int}
-% (excluded).
-% \begin{macrocode}
-%<*trace>
-\cs_new_protected:Npn \@@_trace_states:n #1
- {
- \int_step_inline:nnnn
- \l_@@_min_state_int
- { 1 }
- { \l_@@_max_state_int - 1 }
- {
- \trace:nnx { regex } { #1 }
- { \iow_char:N \\toks ##1 = { \@@_toks_use:w ##1 } }
- }
- }
-%</trace>
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macrocode}
-%</initex|package>
-% \end{macrocode}
-%
-% \end{implementation}
-%
-% \PrintIndex
-% \endinput
-%^^A NOT IMPLEMENTED
-%^^A \p{xx} a character with the xx property
-%^^A \P{xx} a character without the xx property
-%^^A [[:xxx:]] positive POSIX named set
-%^^A [[:^xxx:]] negative POSIX named set
-%^^A (?=...) positive look ahead
-%^^A (?!...) negative look ahead
-%^^A (?<=...) positive look behind
-%^^A (?<!...) negative look behind
-%^^A (?<name>...) or (?'name'...) or (?P<name>...)
-%^^A named capturing group
-%^^A \R a newline sequence
-%^^A \X an extended Unicode sequence
-%^^A (?C) or (?Cn) callout with data n
-%^^A (?R) recurse whole pattern
-%^^A (?[+-]n) or \g<[+-]n> or (?&name) or (?P>name) or \g<name>
-%^^A call subpattern
-%^^A (?([+-]n)... or (?(<name>)...
-%^^A reference condition
-%^^A (?(R)... or (?(Rn)... or (?(R&name)...
-%^^A recursion condition
-%^^A (?(DEFINE)... define subpattern for reference
-%^^A (?(assert)... assertion condition
-%^^A (*ACCEPT) force successful match
-%^^A (*FAIL) force backtrack; synonym (*F)
-%^^A (*COMMIT) overall failure, no advance of starting point
-%^^A (*PRUNE) advance to next starting character
-%^^A (*SKIP) advance start to current matching position
-%^^A (*THEN) local failure, backtrack to next alternation
-%^^A (*CR) or (*LF) or (*CRLF) or (*ANYCRLF) or (*ANY)
-%^^A newline convention
-%^^A (*BSR_ANYCRLF) or (*BSR_UNICODE)
-%^^A change what \R matches.
-%^^A
-%^^A \cx "control-x", where x is any ASCII character
-%^^A \C one byte, even in UTF-8 mode (best avoided)
-%^^A + possessive quantifiers
-%^^A (?>...) atomic, non-capturing group
-%^^A (?#....) comment (not nestable)
-%^^A (?JmsUx) options (duplicate names; multiline; single line;
-%^^A ungreedy; extended)
-%^^A (*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE)
-%^^A (*UTF8) set UTF-8 mode (PCRE_UTF8)
-%^^A (*UCP) set PCRE_UCP (use Unicode properties for \d etc)
-%^^A \n or \gn or \g{[-]n} or \g{name} or (?P=name)
-%^^A or \k<name> or \k'name' or \k{name}
-%^^A back-references
Modified: trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str-convert.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str-convert.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str-convert.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -47,7 +47,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -252,9 +252,8 @@
% \end{macrocode}
%
% \begin{macrocode}
-\ProvidesExplPackage{l3str-convert}{2017/05/13}{}
+\ProvidesExplPackage{l3str-convert}{2017/05/29}{}
{L3 Experimental string encoding conversions}
-\RequirePackage{l3tl-analysis,l3tl-build}
% \end{macrocode}
%
% \subsection{Helpers}
Modified: trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str-format.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str-format.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str-format.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -47,7 +47,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -163,7 +163,7 @@
%
% \begin{macrocode}
%<*package>
-\ProvidesExplPackage{l3str-format}{2017/05/13}{}
+\ProvidesExplPackage{l3str-format}{2017/05/29}{}
{L3 Experimental string formatting}
\RequirePackage{l3str}
%</package>
Modified: trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str.ins
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str.ins 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3str.ins 2017-06-05 23:17:08 UTC (rev 44483)
@@ -51,13 +51,8 @@
\keepsilent
-\generate{\file{l3regex.sty} {\from{l3regex.dtx} {package}}}
\generate{\file{l3str-convert.sty} {\from{l3str-convert.dtx} {package}}}
\generate{\file{l3str-format.sty} {\from{l3str-format.dtx} {package}}}
-\generate{\file{l3tl-analysis.sty} {\from{l3tl-analysis.dtx} {package}}}
-\generate{\file{l3tl-build.sty} {\from{l3tl-build.dtx} {package}}}
-\generate{\file{l3regex-trace.sty} {\from{l3regex.dtx} {package,trace}}}
-\generate{\file{l3intarray.sty} {\from{l3intarray.dtx} {package}}}
% Escapings.
\generate{%
Deleted: trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3tl-analysis.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3tl-analysis.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3tl-analysis.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,1120 +0,0 @@
-% \iffalse meta-comment
-%
-%% File: l3tl-analysis.dtx Copyright (C) 2011-2012,2015-2017 The LaTeX3 Project
-%
-% It may be distributed and/or modified under the conditions of the
-% LaTeX Project Public License (LPPL), either version 1.3c of this
-% license or (at your option) any later version. The latest version
-% of this license is in the file
-%
-% http://www.latex-project.org/lppl.txt
-%
-% This file is part of the "l3experimental bundle" (The Work in LPPL)
-% and all files in that bundle must be distributed together.
-%
-% -----------------------------------------------------------------------
-%
-% The development version of the bundle can be found at
-%
-% https://github.com/latex3/latex3
-%
-% for those people who are interested.
-%
-%<*driver|package>
-\RequirePackage{expl3}
-%</driver|package>
-%<*driver>
-\documentclass[full]{l3doc}
-\usepackage{amsmath}
-\begin{document}
- \DocInput{\jobname.dtx}
-\end{document}
-%</driver>
-% \fi
-%
-%
-% \title{^^A
-% The \textsf{l3tl-analysis} package: analysing token lists^^A
-% }
-%
-% \author{^^A
-% The \LaTeX3 Project\thanks
-% {^^A
-% E-mail:
-% \href{mailto:latex-team at latex-project.org}
-% {latex-team at latex-project.org}^^A
-% }^^A
-% }
-%
-% \date{Released 2017/05/13}
-%
-% \maketitle
-%
-% \begin{documentation}
-%
-% \section{\pkg{l3tl-analysis} documentation}
-%
-% This module mostly provides internal functions for use in the
-% \pkg{l3regex} module. However, it provides as a side-effect a user
-% debugging function, very similar to the \cs{ShowTokens} macro from the
-% \pkg{ted} package.
-%
-% \begin{function}{\tl_show_analysis:N, \tl_show_analysis:n}
-% \begin{syntax}
-% \cs{tl_show_analysis:n} \Arg{token list}
-% \end{syntax}
-% Displays to the terminal the detailed decomposition of the
-% \meta{token list} into tokens, showing the category code of each
-% character token, the meaning of control sequences and active
-% characters, and the value of registers.
-% \end{function}
-%
-% \subsection{Internal functions}
-%
-% \begin{variable}{\s__tl}
-% The format used to store token lists internally uses the scan mark
-% \cs{s__tl} as a delimiter.
-% \end{variable}
-%
-% \begin{function}{\__tl_analysis_map_inline:nn}
-% \begin{syntax}
-% \cs{__tl_analysis_map_inline:nn} \Arg{token list} \Arg{inline function}
-% \end{syntax}
-% Applies the \meta{inline function} to each individual \meta{token}
-% in the \meta{token list}. The \meta{inline function} receives three
-% arguments:
-% \begin{itemize}
-% \item \meta{tokens}, which both \texttt{o}-expand and
-% \texttt{x}-expand to the \meta{token}. The detailed form of
-% \meta{token} may change in later releases.
-% \item \meta{catcode}, a capital hexadecimal digit which denotes
-% the category code of the \meta{token} (0: control sequence, 1:
-% begin-group, 2: end-group, 3: math shift, 4: alignment tab, 6:
-% parameter, 7: superscript, 8: subscript, A: space, B: letter,
-% C:other, D:active).
-% \item \meta{char code}, a decimal representation of the character
-% code of the token, $-1$ if it is a control sequence (with
-% \meta{catcode} $0$).
-% \end{itemize}
-% \end{function}
-%
-% For optimizations in \pkg{l3regex} (when matching control sequences),
-% it may be useful to provide a \cs{__tl_analysis_from_str_map_inline:nn}
-% function, perhaps named \cs{__str_analysis_map_inline:nn}.
-%
-% \subsection{Internal format}
-%
-% The task of the \pkg{l3tl-analysis} module is to convert token lists
-% to an internal format which allows us to extract all the relevant
-% information about individual tokens (category code, character code),
-% as well as reconstruct the token list quickly. This internal format is
-% used in \pkg{l3regex} where we need to support arbitrary tokens, and
-% it is used in conversion functions in \pkg{l3str-convert}, where we wish to
-% support clusters of characters instead of single tokens.
-%
-% We thus need a way to encode any \meta{token} (even begin-group and
-% end-group character tokens) in a way amenable to manipulating tokens
-% individually. The best we can do is to find \meta{tokens} which both
-% \texttt{o}-expand and \texttt{x}-expand to the given
-% \meta{token}. Collecting more information about the category code and
-% character code is also useful for regular expressions, since most
-% regexes are catcode-agnostic. The internal format thus takes the form
-% of a succession of items of the form
-% \begin{quote}
-% \meta{tokens} \cs{s__tl} \meta{catcode} \meta{char code} \cs{s__tl}
-% \end{quote}
-% The \meta{tokens} \texttt{o}- \emph{and} \texttt{x}-expand to the
-% original token in the token list or to the cluster of tokens
-% corresponding to one Unicode character in the given encoding (for
-% \pkg{l3str-convert}). The \meta{catcode} is given as a single hexadecimal
-% digit, $0$ for control sequences. The \meta{char code} is given as a
-% decimal number, $-1$ for control sequences.
-%
-% Using delimited arguments lets us build the \meta{tokens}
-% progressively when doing an encoding conversion in \pkg{l3str-convert}. On the
-% other hand, the delimiter \cs{s__tl} may not appear unbraced in
-% \meta{tokens}. This is not a problem because we are careful to wrap
-% control sequences in braces (as an argument to \cs{exp_not:n}) when
-% converting from a general token list to the internal format.
-%
-% The current rule for converting a \meta{token} to a balanced set of
-% \meta{tokens} which both \texttt{o}-expands and \texttt{x}-expands to
-% it is the following.
-% \begin{itemize}
-% \item A control sequence |\cs| becomes |\exp_not:n { \cs }|
-% \cs{s__tl} $0$ $-1$ \cs{s__tl}.
-% \item A begin-group character |{| becomes \cs{exp_after:wN} |{|
-% \cs{if_false:} |}| \cs{fi:} \cs{s__tl} $1$ \meta{char code}
-% \cs{s__tl}.
-% \item An end-group character |}| becomes \cs{if_false:} |{| \cs{fi:}
-% |}| \cs{s__tl} $2$ \meta{char code} \cs{s__tl}.
-% \item A character with any other category code becomes
-% \cs{exp_not:n} \Arg{character} \cs{s__tl} \meta{hex catcode}
-% \meta{char code} \cs{s__tl}.
-% \end{itemize}
-%
-% ^^A todo: ask LuaTeX list for an \ifx\undefined <active char>
-% ^^A which does not add the <active char> in memory.
-%
-% \end{documentation}
-%
-% \begin{implementation}
-%
-% \section{\pkg{l3tl-analysis} implementation}
-%
-% \begin{macrocode}
-%<*initex|package>
-% \end{macrocode}
-%
-% \begin{macrocode}
-%<@@=tl_analysis>
-% \end{macrocode}
-%
-% \begin{macrocode}
-\ProvidesExplPackage{l3tl-analysis}{2017/05/13}{}
- {L3 Experimental token list analysis}
-% \end{macrocode}
-%
-% \subsection{Variables and helper functions}
-%
-% \begin{variable}{\s__tl}
-% The scan mark \cs{s__tl} is used as a delimiter in the internal
-% format. This is more practical than using a quark, because we would
-% then need to control expansion much more carefully: compare
-% \cs{__int_value:w} |`#1| \cs{s__tl} with \cs{__int_value:w} |`#1|
-% \cs{exp_stop_f:} \cs{exp_not:N} \cs{q_mark} to extract a character
-% code followed by the delimiter in an \texttt{x}-expansion.
-% \begin{macrocode}
-\__scan_new:N \s__tl
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_internal_tl}
-% This token list variable is used to hand the argument of
-% \cs{tl_show_analysis:n} to \cs{tl_show_analysis:N}.
-% \begin{macrocode}
-\tl_new:N \l_@@_internal_tl
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_token}
-% \begin{variable}{\l_@@_char_token}
-% The tokens in the token list are probed with the \TeX{} primitive
-% \tn{futurelet}. We use \cs{l_@@_token} in that
-% construction. In some cases, we convert the following token to a
-% string before probing it: then the token variable used is
-% \cs{l_@@_char_token}.
-% \begin{macrocode}
-\cs_new_eq:NN \l_@@_token ?
-\cs_new_eq:NN \l_@@_char_token ?
-% \end{macrocode}
-% \end{variable}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_normal_int}
-% The number of normal (\texttt{N}-type argument) tokens since the
-% last special token.
-% \begin{macrocode}
-\int_new:N \l_@@_normal_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_index_int}
-% During the first pass, this is the index in the array being built.
-% During the second pass, it is equal to the maximum index in the
-% array from the first pass.
-% \begin{macrocode}
-\int_new:N \l_@@_index_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_nesting_int}
-% Nesting depth of explicit begin-group and end-group characters
-% during the first pass. This lets us detect the end of the token list
-% without a reserved end-marker.
-% \begin{macrocode}
-\int_new:N \l_@@_nesting_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_type_int}
-% When encountering special characters, we record their \enquote{type}
-% in this integer.
-% \begin{macrocode}
-\int_new:N \l_@@_type_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\g_@@_result_tl}
-% The result of the conversion is stored in this token list, with a
-% succession of items of the form
-% \begin{quote}
-% \meta{tokens} \cs{s__tl} \meta{catcode} \meta{char code} \cs{s__tl}
-% \end{quote}
-% \begin{macrocode}
-\tl_new:N \g_@@_result_tl
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{macro}[int, EXP]{\@@_extract_charcode:}
-% \begin{macro}[aux, EXP]{\@@_extract_charcode_aux:w}
-% Extracting the character code from the meaning of
-% \cs{l_@@_token}. This has no error checking, and should
-% only be assumed to work for begin-group and end-group character
-% tokens. It produces a number in the form |`|\meta{char}.
-% \begin{macrocode}
-\cs_new:Npn \@@_extract_charcode:
- {
- \exp_after:wN \@@_extract_charcode_aux:w
- \token_to_meaning:N \l_@@_token
- }
-\cs_new:Npn \@@_extract_charcode_aux:w #1 ~ #2 ~ { ` }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int, EXP]{\@@_cs_space_count:NN}
-% \begin{macro}[aux, EXP]{\@@_cs_space_count:w}
-% \begin{macro}[aux, EXP]{\@@_cs_space_count_end:w}
-% Counts the number of spaces in the string representation of its
-% second argument, as well as the number of characters following the
-% last space in that representation, and feeds the two numbers as
-% semicolon-delimited arguments to the first argument. When this
-% function is used, the escape character is printable and non-space.
-% \begin{macrocode}
-\cs_new:Npn \@@_cs_space_count:NN #1 #2
- {
- \exp_after:wN #1
- \__int_value:w \__int_eval:w 0
- \exp_after:wN \@@_cs_space_count:w
- \token_to_str:N #2
- \fi: \@@_cs_space_count_end:w ; ~ !
- }
-\cs_new:Npn \@@_cs_space_count:w #1 ~
- {
- \if_false: #1 #1 \fi:
- + 1
- \@@_cs_space_count:w
- }
-\cs_new:Npn \@@_cs_space_count_end:w ; #1 \fi: #2 !
- { \exp_after:wN ; \__int_value:w \str_count_ignore_spaces:n {#1} ; }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
-% \subsection{Plan of attack}
-%
-% Our goal is to produce a token list of the form roughly
-% \begin{quote}
-% \meta{token 1} \cs{s__tl} \meta{catcode 1} \meta{char code 1} \cs{s__tl} \\
-% \meta{token 2} \cs{s__tl} \meta{catcode 2} \meta{char code 2} \cs{s__tl} \\
-% \ldots{}
-% \meta{token N} \cs{s__tl} \meta{catcode N} \meta{char code N} \cs{s__tl}
-% \end{quote}
-% Most but not all tokens can be grabbed as an undelimited
-% (\texttt{N}-type) argument by \TeX{}. The plan is to have a two pass
-% system. In the first pass, locate special tokens, and store them in
-% various \tn{toks} registers. In the second pass, which is done within
-% an \texttt{x}-expanding assignment, normal tokens are taken in as
-% \texttt{N}-type arguments, and special tokens are retrieved from the
-% \tn{toks} registers, and removed from the input stream by some means.
-% The whole process takes linear time, because we avoid building the
-% result one item at a time.
-%
-% To ease the difficult first pass, we first do some setup with
-% \cs{@@_setup:n}. Active characters set equal to non-active
-% characters cause trouble, so we disable all active characters by
-% setting them equal to \texttt{undefined} locally. We also set there
-% the escape character to be printable (backslash, but this later
-% oscillates between slash and backslash): this makes it possible to
-% distinguish characters from control sequences.
-%
-% A token has two characteristics: its \tn{meaning}, and what it looks
-% like for \TeX{} when it is in scanning mode (\emph{e.g.}, when
-% capturing parameters for a macro). For our purposes, we distinguish
-% the following meanings:
-% \begin{itemize}
-% \item begin-group token (category code $1$), either space (character
-% code $32$), or non-space;
-% \item end-group token (category code $2$), either space (character
-% code $32$), or non-space;
-% \item space token (category code $10$, character code $32$);
-% \item anything else (then the token is always an \texttt{N}-type
-% argument).
-% \end{itemize}
-% The token itself can \enquote{look like} one of the following
-% \begin{itemize}
-% \item a non-active character, in which case its meaning is
-% automatically that associated to its character code and category
-% code, we call it \enquote{true} character;
-% \item an active character (we eliminate those in the setup step);
-% \item a control sequence.
-% \end{itemize}
-% The only tokens which are not valid \texttt{N}-type arguments are true
-% begin-group characters, true end-group characters, and true spaces.
-% We will detect those characters by scanning ahead with \tn{futurelet},
-% then distinguishing true characters from control sequences set equal
-% to them using the \tn{string} representation.
-%
-% The second pass is a simple exercise in expandable loops.
-%
-% \begin{macro}[int]{\@@:n}
-% Everything is done within a group, and all definitions will be
-% local. We use \cs{group_align_safe_begin/end:} to avoid problems in
-% case \cs{@@:n} is used within an alignment and its argument
-% contains alignment tab tokens.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@:n #1
- {
- \group_begin:
- \group_align_safe_begin:
- \@@_setup:n {#1}
- \@@_a:n {#1}
- \@@_b:n {#1}
- \group_align_safe_end:
- \group_end:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \subsection{Setup}
-%
-% \begin{macro}[int]{\@@_setup:n}
-% \begin{macro}[aux]{\@@_disable_loop:N}
-% Active characters can cause problems later on in the processing,
-% so the first step is to disable them, by setting them to
-% \texttt{undefined}. Since Unicode contains too many characters
-% to loop over all of them, we instead loop over the input token
-% list as a string: any active character in the token list
-% must appear in its string representation. The string is shortened
-% a little by making the escape character unprintable. The active
-% space must be disabled separately (the loop skips over it otherwise),
-% and we end the loop by feeding an odd non-\texttt{N}-type argument
-% to the looping macro. For \pTeX{} and \upTeX{} we skip characters
-% beyond $[0,255]$ because \tn{lccode} only allows those values.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_setup:n #1
- {
- \int_set:Nn \tex_escapechar:D { -1 }
- \exp_after:wN \@@_disable_loop:N
- \tl_to_str:n {#1} { ~ } { ? \__prg_break: }
- \__prg_break_point:
- \scan_stop:
- }
-\group_begin:
- \char_set_catcode_active:N \^^@
- \cs_new_protected:Npn \@@_disable_loop:N #1
- {
- \tex_lccode:D 0 = `#1 ~
- \tex_lowercase:D { \tex_let:D ^^@ } \tex_undefined:D
- \@@_disable_loop:N
- }
- \bool_lazy_or:nnT
- { \sys_if_engine_ptex_p: }
- { \sys_if_engine_uptex_p: }
- {
- \cs_gset_protected:Npn \@@_disable_loop:N #1
- {
- \use_none:n #1 \scan_stop:
- \if_int_compare:w 256 > `#1 \exp_stop_f:
- \tex_lccode:D 0 = `#1 ~
- \tex_lowercase:D { \tex_let:D ^^@ } \tex_undefined:D
- \fi:
- \@@_disable_loop:N
- }
- }
-\group_end:
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \subsection{First pass}
-%
-% The goal of this pass is to detect special (non-\texttt{N}-type) tokens,
-% and count how many \texttt{N}-type tokens lie between special tokens.
-% Also, we wish to store some representation of each special token
-% in a \tn{toks} register.
-%
-% After the setup step, we have $11$ types of tokens:
-% \begin{itemize}
-% \item[1.] a true non-space begin-group character;
-% \item[2.] a true space begin-group character;
-% \item[3.] a true non-space end-group character;
-% \item[4.] a true space end-group character;
-% \item[5.] a true space blank space character;
-% \item[6.] an undefined active character;
-% \item[7.] any other true character;
-% \item[8.] a control sequence equal to a begin-group token (category code $1$);
-% \item[9.] a control sequence equal to an end-group token (category code $2$);
-% \item[10.] a control sequence equal to a space token
-% (character code $32$, category code $10$);
-% \item[11.] any other control sequence.
-% \end{itemize}
-% Our first tool is \tn{futurelet}. This cannot distinguish
-% case $8$ from $1$ or $2$, nor case $9$ from $3$ or $4$,
-% nor case $10$ from case $5$. Those cases will be distinguished
-% by applying the \tn{string} primitive to the following token,
-% after possibly changing the escape character to ensure that
-% a control sequence's string representation cannot be mistaken
-% for the true character.
-%
-% In cases $6$, $7$, and $11$, the following token is a valid
-% \texttt{N}-type argument, so we grab it and distinguish the case
-% of a character from a control sequence: in the latter case,
-% \cs{str_tail:n} \Arg{token} is non-empty, because the
-% escape character is printable.
-%
-% \begin{macro}[int]{\@@_a:n}
-% We read tokens one by one using \tn{futurelet}.
-% While performing the loop, we keep track of the number of
-% true begin-group characters minus the number of
-% true end-group characters in \cs{l_@@_nesting_int}.
-% This reaches $-1$ when we read the closing brace.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_a:n #1
- {
- \int_set:Nn \tex_escapechar:D { 92 }
- \int_zero:N \l_@@_normal_int
- \int_zero:N \l_@@_index_int
- \int_zero:N \l_@@_nesting_int
- \if_false: { \fi: \@@_a_loop:w #1 }
- \int_decr:N \l_@@_index_int
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_a_loop:w}
-% Read one character and check its type.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_a_loop:w
- { \tex_futurelet:D \l_@@_token \@@_a_type:w }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_a_type:w}
-% At this point, \cs{l_@@_token} holds the meaning
-% of the following token. We store in \cs{l_@@_type_int}
-% the meaning of the token ahead:
-% \begin{itemize}
-% \item 0 space token;
-% \item 1 begin-group token;
-% \item -1 end-group token;
-% \item 2 other.
-% \end{itemize}
-% The values $0$, $1$, $-1$ correspond to how much a true such
-% character changes the nesting level ($2$ is used only here,
-% and is irrelevant later). Then call the auxiliary for each case.
-% Note that nesting conditionals here is safe because we only skip
-% over \cs{l_@@_token} if it matches with one of the
-% character tokens (hence is not a primitive conditional).
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_a_type:w
- {
- \l_@@_type_int =
- \if_meaning:w \l_@@_token \c_space_token
- 0
- \else:
- \if_catcode:w \exp_not:N \l_@@_token \c_group_begin_token
- 1
- \else:
- \if_catcode:w \exp_not:N \l_@@_token \c_group_end_token
- - 1
- \else:
- 2
- \fi:
- \fi:
- \fi:
- \exp_stop_f:
- \if_case:w \l_@@_type_int
- \exp_after:wN \@@_a_space:w
- \or: \exp_after:wN \@@_a_bgroup:w
- \or: \exp_after:wN \@@_a_safe:N
- \else: \exp_after:wN \@@_a_egroup:w
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_a_space:w}
-% \begin{macro}[aux]{\@@_a_space_test:w}
-% In this branch, the following token's meaning is a blank space.
-% Apply \tn{string} to that token: if it is a control sequence
-% the result starts with the escape character; otherwise it is
-% a true blank space, whose string representation is also a blank space.
-% We test for that in \cs{@@_a_space_test:w},
-% after grabbing as \cs{l_@@_char_token} the first character
-% of the string representation.
-% Also, since \cs{@@_a_store:} expects the special token to be
-% stored in the relevant \tn{toks} register, we do that. The extra
-% \cs{exp_not:n} is unnecessary of course, but it makes the treatment
-% of all tokens more homogeneous.
-% If we discover that the next token was actually a control sequence
-% instead of a true space, then we step the counter of normal tokens.
-% We now have in front of us the whole string representation of
-% the control sequence, including potential spaces; those will appear
-% to be true spaces later in this pass. Hence, all other branches of
-% the code in this first pass need to consider the string representation,
-% so that the second pass does not need to test the meaning of tokens,
-% only strings.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_a_space:w
- {
- \tex_afterassignment:D \@@_a_space_test:w
- \exp_after:wN \cs_set_eq:NN
- \exp_after:wN \l_@@_char_token
- \token_to_str:N
- }
-\cs_new_protected:Npn \@@_a_space_test:w
- {
- \if_meaning:w \l_@@_char_token \c_space_token
- \tex_toks:D \l_@@_index_int { \exp_not:n { ~ } }
- \@@_a_store:
- \else:
- \int_incr:N \l_@@_normal_int
- \fi:
- \@@_a_loop:w
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_a_bgroup:w, \@@_a_egroup:w}
-% \begin{macro}[aux]{\@@_a_group:nw}
-% \begin{macro}[aux]{\@@_a_group_test:w}
-% The token might be either a true character token with
-% catcode $1$ or $2$, or it could be a control sequence.
-% The only tricky case is if the character code happens
-% to be equal to the escape character: then we change
-% the escape character from backslash to solidus or back,
-% so that the string representation of the true character
-% and of a control sequence set equal to it start differently.
-% Then probe what the first character of that string
-% representation is: this is the place where we need
-% \cs{l_@@_char_token} to be a separate control
-% sequence from \cs{l_@@_token}, to compare them.
-% \begin{macrocode}
-\group_begin:
- \char_set_catcode_group_begin:N \^^@
- \char_set_catcode_group_end:N \^^E
- \cs_new_protected:Npn \@@_a_bgroup:w
- { \@@_a_group:nw { \exp_after:wN ^^@ \if_false: ^^E \fi: } }
- \char_set_catcode_group_begin:N \^^B
- \char_set_catcode_group_end:N \^^@
- \cs_new_protected:Npn \@@_a_egroup:w
- { \@@_a_group:nw { \if_false: ^^B \fi: ^^@ } }
-\group_end:
-\cs_new_protected:Npn \@@_a_group:nw #1
- {
- \tex_lccode:D 0 = \@@_extract_charcode: \scan_stop:
- \tex_lowercase:D { \tex_toks:D \l_@@_index_int {#1} }
- \if_int_compare:w \tex_lccode:D 0 = \tex_escapechar:D
- \int_set:Nn \tex_escapechar:D { 139 - \tex_escapechar:D }
- \fi:
- \tex_afterassignment:D \@@_a_group_test:w
- \exp_after:wN \cs_set_eq:NN
- \exp_after:wN \l_@@_char_token
- \token_to_str:N
- }
-\cs_new_protected:Npn \@@_a_group_test:w
- {
- \if_charcode:w \l_@@_token \l_@@_char_token
- \@@_a_store:
- \else:
- \int_incr:N \l_@@_normal_int
- \fi:
- \@@_a_loop:w
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_a_store:}
-% This function is called each time we meet a special token;
-% at this point, the \tn{toks} register \cs{l_@@_index_int}
-% holds a token list which expands to the given special token.
-% Also, the value of \cs{l_@@_type_int} indicates which case
-% we are in:
-% \begin{itemize}
-% \item -1 end-group character;
-% \item 0 space character;
-% \item 1 begin-group character.
-% \end{itemize}
-% We need to distinguish further the case of a space character
-% (code $32$) from other character codes, because those will
-% behave differently in the second pass. Namely, after testing
-% the \tn{lccode} of $0$ (which holds the present character code)
-% we change the cases above to
-% \begin{itemize}
-% \item -2 space end-group character;
-% \item -1 non-space end-group character;
-% \item 0 space blank space character;
-% \item 1 non-space begin-group character;
-% \item 2 space begin-group character.
-% \end{itemize}
-% This has the property that non-space characters correspond to odd
-% values of \cs{l_@@_type_int}.
-% The number of normal tokens, and the type of special token,
-% are packed into a \tn{skip} register.
-% Finally, we check whether we reached the last closing brace, in which
-% case we stop by disabling the looping function (locally).
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_a_store:
- {
- \tex_advance:D \l_@@_nesting_int \l_@@_type_int
- \if_int_compare:w \tex_lccode:D 0 = `\ \exp_stop_f:
- \tex_multiply:D \l_@@_type_int 2 \exp_stop_f:
- \fi:
- \tex_skip:D \l_@@_index_int
- = \l_@@_normal_int sp plus \l_@@_type_int sp \scan_stop:
- \int_incr:N \l_@@_index_int
- \int_zero:N \l_@@_normal_int
- \if_int_compare:w \l_@@_nesting_int = -1 \exp_stop_f:
- \cs_set_eq:NN \@@_a_loop:w \scan_stop:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int]{\@@_a_safe:N}
-% \begin{macro}[aux]{\@@_a_cs:ww}
-% This should be the simplest case: since the upcoming token is safe,
-% we can simply grab it in a second pass. However, other branches of
-% the code must pass their tokens through \tn{string}, hence we do it
-% here as well, with some optimizations. If the token is a single
-% character (including space), the \cs{if_charcode:w} test yields
-% true, and we simply count one \enquote{normal} token. On the other
-% hand, if the token is a control sequence, we should replace it by
-% its string representation for compatibility with other code
-% branches. Instead of slowly looping through the characters with
-% the main code, we use the knowledge of how the second pass works:
-% if the control sequence name contains no space, count that token
-% as a number of normal tokens equal to its string length. If the
-% control sequence contains spaces, they should be registered as
-% special characters by increasing \cs{l_@@_index_int}
-% (no need to carefully count character between each space), and
-% all characters after the last space should be counted in the
-% following sequence of \enquote{normal} tokens.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_a_safe:N #1
- {
- \if_charcode:w
- \scan_stop:
- \exp_after:wN \use_none:n \token_to_str:N #1 \prg_do_nothing:
- \scan_stop:
- \int_incr:N \l_@@_normal_int
- \else:
- \@@_cs_space_count:NN \@@_a_cs:ww #1
- \fi:
- \@@_a_loop:w
- }
-\cs_new_protected:Npn \@@_a_cs:ww #1; #2;
- {
- \if_int_compare:w #1 > 0 \exp_stop_f:
- \tex_skip:D \l_@@_index_int
- = \__int_eval:w \l_@@_normal_int + 1 sp \scan_stop:
- \tex_advance:D \l_@@_index_int #1 \exp_stop_f:
- \l_@@_normal_int #2 \exp_stop_f:
- \else:
- \tex_advance:D \l_@@_normal_int #2 \exp_stop_f:
- \fi:
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \subsection{Second pass}
-%
-% The second pass is an exercise in expandable loops.
-% All the necessary information is stored in \tn{skip}
-% and \tn{toks} registers.
-%
-% \begin{macro}[int]{\@@_b:n}
-% \begin{macro}[int, EXP]{\@@_b_loop:w}
-% Start the loop with the index $0$. No need for an end-marker:
-% the loop will stop by itself when the last index is read.
-% We will repeatedly oscillate between reading long stretches
-% of normal tokens, and reading special tokens.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_b:n #1
- {
- \tl_gset:Nx \g_@@_result_tl
- {
- \@@_b_loop:w 0; #1
- \__prg_break_point:
- }
- }
-\cs_new:Npn \@@_b_loop:w #1;
- {
- \exp_after:wN \@@_b_normals:ww
- \__int_value:w \tex_skip:D #1 ; #1 ;
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int, EXP]{\@@_b_normals:ww}
-% \begin{macro}[aux, EXP]{\@@_b_normal:wwN}
-% The first argument is the number of normal tokens which remain
-% to be read, and the second argument is the index in the array
-% produced in the first step.
-% A character's string representation is always one character long,
-% while a control sequence is always longer (we have set the escape
-% character to a printable value). In both cases, we leave
-% \cs{exp_not:n} \Arg{token} \cs{s__tl} in the input stream
-% (after \texttt{x}-expansion). Here, \cs{exp_not:n} is used
-% rather than \cs{exp_not:N} because |#3| could be \cs{s__tl},
-% hence must be hidden behind braces in the result.
-% \begin{macrocode}
-\cs_new:Npn \@@_b_normals:ww #1;
- {
- \if_int_compare:w #1 = 0 \exp_stop_f:
- \@@_b_special:w
- \fi:
- \@@_b_normal:wwN #1;
- }
-\cs_new:Npn \@@_b_normal:wwN #1; #2; #3
- {
- \exp_not:n { \exp_not:n { #3 } } \s__tl
- \if_charcode:w
- \scan_stop:
- \exp_after:wN \use_none:n \token_to_str:N #3 \prg_do_nothing:
- \scan_stop:
- \exp_after:wN \@@_b_char:Nww
- \else:
- \exp_after:wN \@@_b_cs:Nww
- \fi:
- #3 #1; #2;
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int, EXP]{\@@_b_char:Nww}
-% If the normal token we grab is a character, leave
-% \meta{catcode} \meta{charcode} followed by \cs{s__tl}
-% in the input stream, and call \cs{@@_b_normals:ww}
-% with its first argument decremented.
-% \begin{macrocode}
-\cs_new:Npx \@@_b_char:Nww #1
- {
- \exp_not:N \if_meaning:w #1 \exp_not:N \tex_undefined:D
- \token_to_str:N D \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_catcode_other_token
- \token_to_str:N C \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_catcode_letter_token
- \token_to_str:N B \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_math_toggle_token 3 \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_alignment_token 4 \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_math_superscript_token 7 \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_math_subscript_token 8 \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_space_token
- \token_to_str:N A \exp_not:N \else:
- 6
- \exp_not:n { \fi: \fi: \fi: \fi: \fi: \fi: \fi: \fi: }
- \exp_not:N \__int_value:w `#1 \s__tl
- \exp_not:N \exp_after:wN \exp_not:N \@@_b_normals:ww
- \exp_not:N \__int_value:w \exp_not:N \__int_eval:w - 1 +
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[int, EXP]{\@@_b_cs:Nww}
-% \begin{macro}[aux, EXP]{\@@_b_cs_test:ww}
-% If the token we grab is a control sequence, leave
-% |0 -1| (as category code and character code) in the input stream,
-% followed by \cs{s__tl},
-% and call \cs{@@_b_normals:ww} with updated arguments.
-% \begin{macrocode}
-\cs_new:Npn \@@_b_cs:Nww #1
- {
- 0 -1 \s__tl
- \@@_cs_space_count:NN \@@_b_cs_test:ww #1
- }
-\cs_new:Npn \@@_b_cs_test:ww #1 ; #2 ; #3 ; #4 ;
- {
- \exp_after:wN \@@_b_normals:ww
- \__int_value:w \__int_eval:w
- \if_int_compare:w #1 = 0 \exp_stop_f:
- #3
- \else:
- \tex_skip:D \__int_eval:w #4 + #1 \__int_eval_end:
- \fi:
- - #2
- \exp_after:wN ;
- \__int_value:w \__int_eval:w #4 + #1 ;
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[int, EXP]{\@@_b_special:w}
-% \begin{macro}[aux, EXP]{\@@_b_special_char:wN}
-% \begin{macro}[aux, EXP]{\@@_b_special_space:w}
-% Here, |#1| is the current index in the array built in the first pass.
-% Check now whether we reached the end (we shouldn't keep the trailing
-% end-group character that marked the end of the token list in the
-% first pass).
-% Unpack the \tn{toks} register: when \texttt{x}-expanding again,
-% we will get the special token.
-% Then leave the category code in the input stream, followed by
-% the character code, and call \cs{@@_b_loop:w} with the next index.
-% \begin{macrocode}
-\group_begin:
- \char_set_catcode_other:N A
- \cs_new:Npn \@@_b_special:w
- \fi: \@@_b_normal:wwN 0 ; #1 ;
- {
- \fi:
- \if_int_compare:w #1 = \l_@@_index_int
- \exp_after:wN \__prg_break:
- \fi:
- \tex_the:D \tex_toks:D #1 \s__tl
- \if_case:w \etex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
- A
- \or: 1
- \or: 1
- \else: 2
- \fi:
- \if_int_odd:w \etex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
- \exp_after:wN \@@_b_special_char:wN \__int_value:w
- \else:
- \exp_after:wN \@@_b_special_space:w \__int_value:w
- \fi:
- \__int_eval:w 1 + #1 \exp_after:wN ;
- \token_to_str:N
- }
-\group_end:
-\cs_new:Npn \@@_b_special_char:wN #1 ; #2
- {
- \__int_value:w `#2 \s__tl
- \@@_b_loop:w #1 ;
- }
-\cs_new:Npn \@@_b_special_space:w #1 ; ~
- {
- 32 \s__tl
- \@@_b_loop:w #1 ;
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
-% \subsection{Mapping through the analysis}
-%
-% \begin{macro}[int]{\@@_map_inline:nn}
-% \begin{macro}[aux]{\@@_map_inline_aux:Nn}
-% First obtain the analysis of the token list into
-% \cs{g_@@_result_tl}. To allow nested mappings, increase the
-% nesting depth \cs{g__prg_map_int} (shared between all modules), then
-% define the looping macro, which has a name specific to that nesting
-% depth. That looping grabs the \meta{tokens}, \meta{catcode} and
-% \meta{char code}; it checks for the end of the loop with
-% \cs{use_none:n} |##2|, normally empty, but which becomes
-% \cs{tl_map_break:} at the end; it then performs the user's code
-% |#2|, and loops by calling itself. When the loop ends, remember to
-% decrease the nesting depth.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_map_inline:nn #1
- {
- \@@:n {#1}
- \int_gincr:N \g__prg_map_int
- \exp_args:Nc \@@_map_inline_aux:Nn
- { @@_map_inline_ \int_use:N \g__prg_map_int :wNw }
- }
-\cs_new_protected:Npn \@@_map_inline_aux:Nn #1#2
- {
- \cs_gset_protected:Npn #1 ##1 \s__tl ##2 ##3 \s__tl
- {
- \use_none:n ##2
- #2
- #1
- }
- \exp_after:wN #1
- \g_@@_result_tl
- \s__tl { ? \tl_map_break: } \s__tl
- \__prg_break_point:Nn \tl_map_break: { \int_gdecr:N \g__prg_map_int }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \subsection{Showing the results}
-%
-% \begin{macro}{\tl_show_analysis:N, \tl_show_analysis:n}
-% \begin{macro}[int]{\@@_show:}
-% Add to \cs{@@:n} a third pass to display tokens to the terminal.
-% If the token list variable is not defined, throw the same error
-% as \cs{tl_show:N} by simply calling that function.
-% \begin{macrocode}
-\cs_new_protected:Npn \tl_show_analysis:N #1
- {
- \tl_if_exist:NTF #1
- {
- \exp_args:No \@@:n {#1}
- \__msg_show_pre:nnxxxx { LaTeX / kernel } { show-tl-analysis }
- { \token_to_str:N #1 } { \tl_if_empty:NTF #1 { } { ? } } { } { }
- \@@_show:
- }
- { \tl_show:N #1 }
- }
-\cs_new_protected:Npn \tl_show_analysis:n #1
- {
- \@@:n {#1}
- \__msg_show_pre:nnxxxx { LaTeX / kernel } { show-tl-analysis }
- { } { \tl_if_empty:nTF {#1} { } { ? } } { } { }
- \@@_show:
- }
-\cs_new_protected:Npn \@@_show:
- {
- \group_begin:
- \exp_args:NNx
- \group_end:
- \__msg_show_wrap:n
- {
- \exp_after:wN \@@_show_loop:wNw \g_@@_result_tl
- \s__tl { ? \__prg_break: } \s__tl
- \__prg_break_point:
- }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[aux, rEXP]{\@@_show_loop:wNw}
-% Here, |#1| \texttt{o}- and \texttt{x}-expands to the token;
-% |#2| is the category code (one uppercase hexadecimal digit),
-% $0$ for control sequences;
-% |#3| is the character code, which we ignore.
-% In the cases of control sequences and active characters,
-% the meaning may overflow one line, and we want to truncate
-% it. Those cases are thus separated out.
-% \begin{macrocode}
-\cs_new:Npn \@@_show_loop:wNw #1 \s__tl #2 #3 \s__tl
- {
- \use_none:n #2
- \exp_not:n { \\ > \ \ }
- \if_int_compare:w "#2 = 0 \exp_stop_f:
- \exp_after:wN \@@_show_cs:n
- \else:
- \if_int_compare:w "#2 = 13 \exp_stop_f:
- \exp_after:wN \exp_after:wN
- \exp_after:wN \@@_show_active:n
- \else:
- \exp_after:wN \exp_after:wN
- \exp_after:wN \@@_show_normal:n
- \fi:
- \fi:
- {#1}
- \@@_show_loop:wNw
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux, rEXP]{\@@_show_normal:n}
-% Non-active characters are a simple matter of printing
-% the character, and its meaning. Our test suite checks that
-% begin-group and end-group characters do not mess up
-% \TeX{}'s alignment status.
-% \begin{macrocode}
-\cs_new:Npn \@@_show_normal:n #1
- {
- \exp_after:wN \token_to_str:N #1 ~
- ( \exp_after:wN \token_to_meaning:N #1 )
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[EXP]{\@@_show_value:N}
-% This expands to the value of |#1| if it has any.
-% \begin{macrocode}
-\cs_new:Npn \@@_show_value:N #1
- {
- \token_if_expandable:NF #1
- {
- \token_if_chardef:NTF #1 \__prg_break: { }
- \token_if_mathchardef:NTF #1 \__prg_break: { }
- \token_if_dim_register:NTF #1 \__prg_break: { }
- \token_if_int_register:NTF #1 \__prg_break: { }
- \token_if_skip_register:NTF #1 \__prg_break: { }
- \token_if_toks_register:NTF #1 \__prg_break: { }
- \use_none:nnn
- \__prg_break_point:
- \use:n { \exp_after:wN = \tex_the:D #1 }
- }
- }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux, rEXP]{\@@_show_cs:n}
-% \begin{macro}[aux, rEXP]{\@@_show_active:n}
-% \begin{macro}[aux, rEXP]{\@@_show_long:nn}
-% \begin{macro}[aux, rEXP]{\@@_show_long_aux:nnnn}
-% Control sequences and active characters are printed in the same way,
-% making sure not to go beyond the \cs{l_iow_line_count_int}. In case
-% of an overflow, we replace the last characters by
-% \cs{c_@@_show_etc_str}.
-% \begin{macrocode}
-\cs_new:Npn \@@_show_cs:n #1
- { \exp_args:No \@@_show_long:nn {#1} { control~sequence= } }
-\cs_new:Npn \@@_show_active:n #1
- { \exp_args:No \@@_show_long:nn {#1} { active~character= } }
-\cs_new:Npn \@@_show_long:nn #1
- {
- \@@_show_long_aux:oofn
- { \token_to_str:N #1 }
- { \token_to_meaning:N #1 }
- { \@@_show_value:N #1 }
- }
-\cs_new:Npn \@@_show_long_aux:nnnn #1#2#3#4
- {
- \int_compare:nNnTF
- { \str_count:n { #1 ~ ( #4 #2 #3 ) } }
- > { \l_iow_line_count_int - 3 }
- {
- \str_range:nnn { #1 ~ ( #4 #2 #3 ) } { 1 }
- {
- \l_iow_line_count_int - 3
- - \str_count:N \c_@@_show_etc_str
- }
- \c_@@_show_etc_str
- }
- { #1 ~ ( #4 #2 #3 ) }
- }
-\cs_generate_variant:Nn \@@_show_long_aux:nnnn { oof }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
-% \subsection{Messages}
-%
-% \begin{variable}{\c_@@_show_etc_str}
-% When a control sequence (or active character)
-% and its meaning are too long to fit in one line
-% of the terminal, the end is replaced by this token list.
-% \begin{macrocode}
-\tl_const:Nx \c_@@_show_etc_str % (
- { \token_to_str:N \ETC.) }
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{macrocode}
-\__msg_kernel_new:nnn { kernel } { show-tl-analysis }
- {
- The~token~list~ \tl_if_empty:nF {#1} { #1 ~ }
- \tl_if_empty:nTF {#2}
- { is~empty }
- { contains~the~tokens: }
- }
-% \end{macrocode}
-%
-% \begin{macrocode}
-%</initex|package>
-% \end{macrocode}
-%
-% \end{implementation}
-%
-% \PrintIndex
Deleted: trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3tl-build.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3tl-build.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3experimental/l3str/l3tl-build.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,303 +0,0 @@
-% \iffalse meta-comment
-%
-%% File: l3tl-build.dtx Copyright (C) 2011-2017 The LaTeX3 Project
-%
-% It may be distributed and/or modified under the conditions of the
-% LaTeX Project Public License (LPPL), either version 1.3c of this
-% license or (at your option) any later version. The latest version
-% of this license is in the file
-%
-% http://www.latex-project.org/lppl.txt
-%
-% This file is part of the "l3experimental bundle" (The Work in LPPL)
-% and all files in that bundle must be distributed together.
-%
-% -----------------------------------------------------------------------
-%
-% The development version of the bundle can be found at
-%
-% https://github.com/latex3/latex3
-%
-% for those people who are interested.
-%
-%<*driver|package>
-% The version of expl3 required is tested as early as possible, as
-% some really old versions do not define \ProvidesExplPackage.
-\RequirePackage{expl3}[2017/05/13]
-%<package>\@ifpackagelater{expl3}{2017/05/13}
-%<package> {}
-%<package> {%
-%<package> \PackageError{l3tl-build}{Support package l3kernel too old}
-%<package> {%
-%<package> Please install an up to date version of l3kernel\MessageBreak
-%<package> using your TeX package manager or from CTAN.\MessageBreak
-%<package> \MessageBreak
-%<package> Loading l3tl-build will abort!%
-%<package> }%
-%<package> \endinput
-%<package> }
-%</driver|package>
-%<*driver>
-\documentclass[full]{l3doc}
-\usepackage{amsmath}
-\begin{document}
- \DocInput{\jobname.dtx}
-\end{document}
-%</driver>
-% \fi
-%
-%
-% \title{^^A
-% The \textsf{l3tl-build} package: building token lists^^A
-% }
-%
-% \author{^^A
-% The \LaTeX3 Project\thanks
-% {^^A
-% E-mail:
-% \href{mailto:latex-team at latex-project.org}
-% {latex-team at latex-project.org}^^A
-% }^^A
-% }
-%
-% \date{Released 2017/05/13}
-%
-% \maketitle
-%
-% \begin{documentation}
-%
-% \section{\pkg{l3tl-build} documentation}
-%
-% This module provides no user function: it is meant for kernel use
-% only.
-%
-% There are two main ways of building token lists from individual
-% tokens. Either in one go within an \texttt{x}-expanding assignment, or
-% by repeatedly using \cs{tl_put_right:Nn}. The first method takes a
-% linear time, but only allows expandable operations. The second method
-% takes a time quadratic in the length of the token list, but allows
-% expandable and non-expandable operations.
-%
-% The goal of this module is to provide functions to build a token list
-% piece by piece in linear time, while allowing non-expandable
-% operations. This is achieved by abusing \tn{toks}: adding some tokens
-% to the token list is done by storing them in a free token register
-% (time $O(1)$ for each such operation). Those token registers are only
-% put together at the end, within an \texttt{x}-expanding assignment,
-% which takes a linear time.\footnote{If we run out of token registers,
-% then the currently filled-up \tn{toks} are put together in a
-% temporary token list, and cleared, and we ultimately use
-% \cs{tl_put_right:Nx} to put those chunks together. Hence the true
-% asymptotic is quadratic, with a very small constant.} Of course,
-% all this must be done in a group: we can't go and clobber the values
-% of legitimate \tn{toks} used by \LaTeXe{}.
-%
-% Since none of the current applications need the ability to insert
-% material on the left of the token list, I have not implemented
-% that. This could be done for instance by using odd-numbered \tn{toks}
-% for the left part, and even-numbered \tn{toks} for the right part.
-%
-% \subsection{Internal functions}
-%
-% \begin{function}
-% {
-% \__tl_build:Nw, \__tl_gbuild:Nw,
-% \__tl_build_x:Nw, \__tl_gbuild_x:Nw
-% }
-% \begin{syntax}
-% \cs{__tl_build:Nw} \meta{tl~var} \texttt{\ldots{}}
-% \cs{__tl_build_one:n} \Arg{tokens_1} \texttt{\ldots{}}
-% \cs{__tl_build_one:n} \Arg{tokens_2} \texttt{\ldots{}}
-% \ldots{}
-% \cs{__tl_build_end:}
-% \end{syntax}
-% Defines the \meta{tl~var} to contain the contents of \meta{tokens1}
-% followed by \meta{tokens2}, \emph{etc.} This is built in such a way
-% to be more efficient than repeatedly using \cs{tl_put_right:Nn}. The
-% code in \enquote{\texttt{\ldots{}}} does not need to be
-% expandable. The commands \cs{__tl_build:Nw} and \cs{__tl_build_end:}
-% start and end a group. The assignment to the \meta{tl~var} occurs
-% just after the end of that group, using \cs{tl_set:Nn},
-% \cs{tl_gset:Nn}, \cs{tl_set:Nx}, or \cs{tl_gset:Nx}.
-% \end{function}
-%
-% \begin{function}{\__tl_build_one:n, \__tl_build_one:o, \__tl_build_one:x}
-% \begin{syntax}
-% \cs{__tl_build_one:n} \Arg{tokens}
-% \end{syntax}
-% This function may only be used within the scope of a
-% \cs{__tl_build:Nw} function. It adds the \meta{tokens} on the
-% right of the current token list.
-% \end{function}
-%
-% \begin{function}{\__tl_build_end:}
-% Ends the scope started by \cs{__tl_build:Nw}, and performs the
-% relevant assignment.
-% \end{function}
-%
-% \end{documentation}
-%
-% \begin{implementation}
-%
-% \section{\pkg{l3tl-build} implementation}
-%
-% \begin{macrocode}
-%<*initex|package>
-% \end{macrocode}
-%
-% \begin{macrocode}
-%<@@=tl_build>
-% \end{macrocode}
-%
-% \begin{macrocode}
-\ProvidesExplPackage{l3tl-build}{2017/05/13}{}
- {L3 Experimental token list construction}
-% \end{macrocode}
-%
-% \subsection{Variables and helper functions}
-%
-% \begin{variable}{\l_@@_start_index_int, \l_@@_index_int}
-% Integers pointing to the starting index (currently always starts at
-% zero), and the current index. The corresponding \tn{toks} are
-% accessed directly by number.
-% \begin{macrocode}
-\int_new:N \l_@@_start_index_int
-\int_new:N \l_@@_index_int
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{variable}{\l_@@_result_tl}
-% The resulting token list is normally built in one go by unpacking
-% all \tn{toks} in some range. In the rare cases where there are too
-% many \cs{@@_one:n} commands, leading to the depletion of
-% registers, the contents of the current set of \tn{toks} is unpacked
-% into \cs{l_@@_result_tl}. This prevents overflow from
-% affecting the end-user (beyond an obvious performance hit).
-% \begin{macrocode}
-\tl_new:N \l_@@_result_tl
-% \end{macrocode}
-% \end{variable}
-%
-% \begin{macro}{\@@_unpack:}
-% \begin{macro}[aux, EXP]{\@@_unpack_loop:w}
-% The various pieces of the token list are built in \tn{toks} from the
-% \texttt{start_index} (inclusive) to the (current) \texttt{index}
-% (excluded). Those \tn{toks} are unpacked and stored in order in the
-% \texttt{result} token list. Optimizations would be possible here,
-% for instance, unpacking $10$ \tn{toks} at a time with a macro
-% expanding to |\the\toks#10...\the\toks#19|, but this should be kept
-% for much later.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_unpack:
- {
- \tl_put_right:Nx \l_@@_result_tl
- {
- \exp_after:wN \@@_unpack_loop:w
- \int_use:N \l_@@_start_index_int ;
- \__prg_break_point:
- }
- }
-\cs_new:Npn \@@_unpack_loop:w #1 ;
- {
- \if_int_compare:w #1 = \l_@@_index_int
- \exp_after:wN \__prg_break:
- \fi:
- \tex_the:D \tex_toks:D #1 \exp_stop_f:
- \exp_after:wN \@@_unpack_loop:w
- \int_use:N \__int_eval:w #1 + 1 ;
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \subsection{Building the token list}
-%
-% \begin{macro}
-% {
-% \@@:Nw , \@@_x:Nw ,
-% \__tl_gbuild:Nw , \__tl_gbuild_x:Nw
-% }
-% \begin{macro}[aux]{\@@_aux:NNw}
-% Similar to what is done for coffins: redefine some command, here
-% \cs{@@_end_aux:n} to hold the relevant assignment (see
-% \cs{@@_end:} for details). Then initialize the start index and
-% the current index at zero, and empty the \texttt{result} token list.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@:Nw
- { \@@_aux:NNw \tl_set:Nn }
-\cs_new_protected:Npn \@@_x:Nw
- { \@@_aux:NNw \tl_set:Nx }
-\cs_new_protected:Npn \__tl_gbuild:Nw
- { \@@_aux:NNw \tl_gset:Nn }
-\cs_new_protected:Npn \__tl_gbuild_x:Nw
- { \@@_aux:NNw \tl_gset:Nx }
-\cs_new_protected:Npn \@@_aux:NNw #1#2
- {
- \group_begin:
- \cs_set:Npn \@@_end_assignment:n
- { \group_end: #1 #2 }
- \int_zero:N \l_@@_start_index_int
- \int_zero:N \l_@@_index_int
- \tl_clear:N \l_@@_result_tl
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}{\@@_end:}
-% \begin{macro}[aux]{\@@_end_assignment:n}
-% When we are done building a token list, unpack all \tn{toks} into
-% the \texttt{result} token list, and expand this list before closing
-% the group. The \cs{@@_end_assignment:n} function is defined by
-% \cs{@@_aux:NNw} to end the group and hold the relevant
-% assignment. Its value outside is irrelevant, but just in case, we
-% set it to a function which would clean up the contents of
-% \cs{l_@@_result_tl}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_end:
- {
- \@@_unpack:
- \exp_args:No
- \@@_end_assignment:n \l_@@_result_tl
- }
-\cs_new_eq:NN \@@_end_assignment:n \use_none:n
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}{\@@_one:n, \@@_one:o, \@@_one:x}
-% Store the tokens in a free \tn{toks}, then move the pointer to the
-% next one. If we overflow, unpack the current \tn{toks}, and reset
-% the current index, preparing to fill more \tn{toks}. This could be
-% optimized by avoiding to read |#1|, using \tn{afterassignment}.
-% \begin{macrocode}
-\cs_new_protected:Npn \@@_one:n #1
- {
- \tex_toks:D \l_@@_index_int {#1}
- \int_incr:N \l_@@_index_int
- \if_int_compare:w \l_@@_index_int > \c_max_register_int
- \@@_unpack:
- \l_@@_index_int \l_@@_start_index_int
- \fi:
- }
-\cs_new_protected:Npn \@@_one:o #1
- {
- \tex_toks:D \l_@@_index_int \exp_after:wN {#1}
- \int_incr:N \l_@@_index_int
- \if_int_compare:w \l_@@_index_int > \c_max_register_int
- \@@_unpack:
- \l_@@_index_int \l_@@_start_index_int
- \fi:
- }
-\cs_new_protected:Npn \@@_one:x #1
- { \use:x { \@@_one:n {#1} } }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macrocode}
-%</initex|package>
-% \end{macrocode}
-%
-% \end{implementation}
-%
-% \PrintIndex
Modified: trunk/Master/texmf-dist/source/latex/l3experimental/xcoffins/xcoffins.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3experimental/xcoffins/xcoffins.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3experimental/xcoffins/xcoffins.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -54,7 +54,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -673,7 +673,7 @@
% \end{macrocode}
%
% \begin{macrocode}
-\ProvidesExplPackage{xcoffins}{2017/05/13}{}
+\ProvidesExplPackage{xcoffins}{2017/05/29}{}
{L3 Experimental design level coffins}
% \end{macrocode}
%
Modified: trunk/Master/texmf-dist/source/latex/l3experimental/xgalley/l3galley.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3experimental/xgalley/l3galley.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3experimental/xgalley/l3galley.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -24,8 +24,8 @@
%<*driver|package>
% The version of expl3 required is tested as early as possible, as
% some really old versions do not define \ProvidesExplPackage.
-\RequirePackage{expl3}[2017/05/13]
-%<package>\@ifpackagelater{expl3}{2017/05/13}
+\RequirePackage{expl3}[2017/05/29]
+%<package>\@ifpackagelater{expl3}{2017/05/29}
%<package> {}
%<package> {%
%<package> \PackageError{l3galley}{Support package l3kernel too old}
@@ -59,7 +59,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -685,7 +685,7 @@
%
% \begin{macrocode}
%<*package>
-\ProvidesExplPackage{l3galley}{2017/05/13}{}
+\ProvidesExplPackage{l3galley}{2017/05/29}{}
{L3 Experimental galley code}
%</package>
% \end{macrocode}
Modified: trunk/Master/texmf-dist/source/latex/l3experimental/xgalley/xgalley.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3experimental/xgalley/xgalley.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3experimental/xgalley/xgalley.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -45,7 +45,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -732,7 +732,7 @@
% \end{macrocode}
%
% \begin{macrocode}
-\ProvidesExplPackage{xgalley}{2017/05/13}{}
+\ProvidesExplPackage{xgalley}{2017/05/29}{}
{L3 Experimental galley}
\RequirePackage{xparse,xtemplate,l3galley}
% \end{macrocode}
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/expl3.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/expl3.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/expl3.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -21,7 +21,7 @@
% for those people who are interested.
%
%<*driver|generic|package>
-\def\ExplFileDate{2017/05/13}%
+\def\ExplFileDate{2017/05/29}%
%</driver|generic|package>
%<*driver>
\documentclass[full]{l3doc}
@@ -49,7 +49,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3.ins
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3.ins 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3.ins 2017-06-05 23:17:08 UTC (rev 44483)
@@ -67,6 +67,7 @@
\from{l3str.dtx} {package}
\from{l3seq.dtx} {package}
\from{l3int.dtx} {package}
+ \from{l3intarray.dtx} {package}
\from{l3flag.dtx} {package}
\from{l3quark.dtx} {package}
\from{l3prg.dtx} {package}
@@ -91,6 +92,9 @@
\from{l3fp-random.dtx} {package}
\from{l3fp-assign.dtx} {package}
\from{l3sort.dtx} {package}
+ \from{l3tl-build.dtx} {package}
+ \from{l3tl-analysis.dtx}{package}
+ \from{l3regex.dtx} {package}
\from{l3box.dtx} {package}
\from{l3coffins.dtx} {package}
\from{l3color.dtx} {package}
@@ -138,11 +142,13 @@
\generate{\file{l3prg.sty} {\from{l3oldmodules.dtx} {l3prg,oldmodules}}}
\generate{\file{l3prop.sty} {\from{l3oldmodules.dtx} {l3prop,oldmodules}}}
\generate{\file{l3quark.sty} {\from{l3oldmodules.dtx} {l3quark,oldmodules}}}
+\generate{\file{l3regex.sty} {\from{l3oldmodules.dtx} {l3regex,oldmodules}}}
\generate{\file{l3seq.sty} {\from{l3oldmodules.dtx} {l3seq,oldmodules}}}
\generate{\file{l3skip.sty} {\from{l3oldmodules.dtx} {l3skip,oldmodules}}}
-\generate{\file{l3sort.sty} {\from{l3oldmodules.dtx} {l3sort,oldmodules}}}
+\generate{\file{l3sort.sty} {\from{l3oldmodules.dtx} {l3sort,oldmodules}}}
\generate{\file{l3str.sty} {\from{l3oldmodules.dtx} {l3str,oldmodules}}}
\generate{\file{l3tl.sty} {\from{l3oldmodules.dtx} {l3tl,oldmodules}}}
+\generate{\file{l3tl-analysis.sty}{\from{l3oldmodules.dtx} {l3tl-analysis,oldmodules}}}
\generate{\file{l3token.sty} {\from{l3oldmodules.dtx} {l3token,oldmodules}}}
% Lua code
@@ -150,13 +156,18 @@
\def\MetaPrefix{--}
\preamble
-EXPERIMENTAL CODE
+Copyright (C) 1990-2017 The LaTeX3 Project
-Do not distribute this file without also distributing the
-source files specified above.
+It may be distributed and/or modified under the conditions of
+the LaTeX Project Public License (LPPL), either version 1.3c of
+this license or (at your option) any later version. The latest
+version of this license is in the file:
-Do not distribute a modified version of this file.
+ http://www.latex-project.org/lppl.txt
+This file is part of the "l3kernel bundle" (The Work in LPPL)
+and all files in that bundle must be distributed together.
+
\endpreamble
\nopostamble
\generate{\file{expl3.lua}{\from{l3luatex.dtx}{package,lua}}}
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3alloc.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3alloc.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3alloc.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3basics.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3basics.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3basics.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3bootstrap.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3bootstrap.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3bootstrap.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -139,7 +139,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3box.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3box.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3box.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3candidates.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3candidates.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3candidates.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -42,7 +42,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -201,89 +201,6 @@
% an error if the file is not found, in contrast to \cs{file_input:n}.
% \end{function}
%
-% \begin{function}[added = 2012-02-11]{\ior_map_inline:Nn}
-% \begin{syntax}
-% \cs{ior_map_inline:Nn} \meta{stream} \Arg{inline function}
-% \end{syntax}
-% Applies the \meta{inline function} to \meta{lines} obtained by
-% reading one or more lines (until an equal number of left and right
-% braces are found) from the \meta{stream}. The \meta{inline function}
-% should consist of code which will receive the \meta{line} as |#1|.
-% Note that \TeX{} removes trailing space and tab characters
-% (character codes 32 and 9) from every line upon input. \TeX{} also
-% ignores any trailing new-line marker from the file it reads.
-% \end{function}
-%
-% \begin{function}[added = 2012-02-11]{\ior_str_map_inline:Nn}
-% \begin{syntax}
-% \cs{ior_str_map_inline:Nn} \Arg{stream} \Arg{inline function}
-% \end{syntax}
-% Applies the \meta{inline function} to every \meta{line}
-% in the \meta{stream}. The material is read from the \meta{stream}
-% as a series of tokens with category code $12$ (other), with the
-% exception of space characters which are given category code $10$
-% (space). The \meta{inline function} should consist of code which
-% will receive the \meta{line} as |#1|.
-% Note that \TeX{} removes trailing space and tab characters
-% (character codes 32 and 9) from every line upon input. \TeX{} also
-% ignores any trailing new-line marker from the file it reads.
-% \end{function}
-%
-% \begin{function}[added = 2012-06-29]{\ior_map_break:}
-% \begin{syntax}
-% \cs{ior_map_break:}
-% \end{syntax}
-% Used to terminate a \cs[no-index]{ior_map_\ldots} function before all
-% lines from the \meta{stream} have been processed. This will
-% normally take place within a conditional statement, for example
-% \begin{verbatim}
-% \ior_map_inline:Nn \l_my_ior
-% {
-% \str_if_eq:nnTF { #1 } { bingo }
-% { \ior_map_break: }
-% {
-% % Do something useful
-% }
-% }
-% \end{verbatim}
-% Use outside of a \cs[no-index]{ior_map_\ldots} scenario will lead to low
-% level \TeX{} errors.
-% \begin{texnote}
-% When the mapping is broken, additional tokens may be inserted by the
-% internal macro \cs{__prg_break_point:Nn} before further items are taken
-% from the input stream. This will depend on the design of the mapping
-% function.
-% \end{texnote}
-% \end{function}
-%
-% \begin{function}[added = 2012-06-29]{\ior_map_break:n}
-% \begin{syntax}
-% \cs{ior_map_break:n} \Arg{tokens}
-% \end{syntax}
-% Used to terminate a \cs[no-index]{ior_map_\ldots} function before all
-% lines in the \meta{stream} have been processed, inserting
-% the \meta{tokens} after the mapping has ended. This will
-% normally take place within a conditional statement, for example
-% \begin{verbatim}
-% \ior_map_inline:Nn \l_my_ior
-% {
-% \str_if_eq:nnTF { #1 } { bingo }
-% { \ior_map_break:n { <tokens> } }
-% {
-% % Do something useful
-% }
-% }
-% \end{verbatim}
-% Use outside of a \cs[no-index]{ior_map_\ldots} scenario will lead to low
-% level \TeX{} errors.
-% \begin{texnote}
-% When the mapping is broken, additional tokens may be inserted by the
-% internal macro \cs{__prg_break_point:Nn} before the \meta{tokens} are
-% inserted into the input stream.
-% This will depend on the design of the mapping function.
-% \end{texnote}
-% \end{function}
-%
% \begin{function}[added = 2014-08-22]
% {\ior_log_streams:, \iow_log_streams:}
% \begin{syntax}
@@ -475,7 +392,7 @@
%
% \section{Additions to \pkg{l3sys}}
%
-% \begin{function}[added = 2017-04-12, EXP, pTF]{\sys_if_rand_exist:}
+% \begin{function}[added = 2017-05-27, EXP, pTF]{\sys_if_rand_exist:}
% \begin{syntax}
% \cs{sys_if_rand_exist_p:}
% \cs{sys_if_rand_exist:TF} \Arg{true code} \Arg{false code}
@@ -484,7 +401,7 @@
% this is the case in \pdfTeX{} and \LuaTeX{}.
% \end{function}
%
-% \begin{function}[added = 2017-04-12, EXP]{\sys_rand_seed:}
+% \begin{function}[added = 2017-05-27, EXP]{\sys_rand_seed:}
% \begin{syntax}
% \cs{sys_rand_seed:}
% \end{syntax}
@@ -493,7 +410,7 @@
% expands to $0$.
% \end{function}
%
-% \begin{function}[added = 2017-04-12]{\sys_gset_rand_seed:n}
+% \begin{function}[added = 2017-05-27]{\sys_gset_rand_seed:n}
% \begin{syntax}
% \cs{sys_gset_rand_seed:n} \Arg{intexpr}
% \end{syntax}
@@ -506,6 +423,61 @@
% random number support this produces an error.
% \end{function}
%
+% \begin{variable}[added = 2017-05-27]{\c_sys_shell_escape_int}
+% This variable exposes the internal triple of the shell escape
+% status. The possible values are
+% \begin{description}
+% \item[0] Shell escape is disabled
+% \item[1] Unrestricted shell escape is enabled
+% \item[2] Restricted shell escape is enabled
+% \end{description}
+% \end{variable}
+%
+% \begin{function}[added = 2017-05-27, EXP, pTF]{\sys_if_shell:}
+% \begin{syntax}
+% \cs{sys_if_shell_p:}
+% \cs{sys_if_shell:TF} \Arg{true code} \Arg{false code}
+% \end{syntax}
+% Performs a check for whether shell escape is enabled. This will
+% return true if either of restricted or unrestircted shell escape
+% is enabled.
+% \end{function}
+%
+% \begin{function}[added = 2017-05-27, EXP, pTF]{\sys_if_shell_unrestricted:}
+% \begin{syntax}
+% \cs{sys_if_shell_unrestricted_p:}
+% \cs{sys_if_shell_unrestricted:TF} \Arg{true code} \Arg{false code}
+% \end{syntax}
+% Performs a check for whether \emph{unrestricted} shell escape is
+% enabled.
+% \end{function}
+%
+% \begin{function}[added = 2017-05-27, EXP, pTF]{\sys_if_shell_restricted:}
+% \begin{syntax}
+% \cs{sys_if_shell_restricted_p:}
+% \cs{sys_if_shell_restricted:TF} \Arg{true code} \Arg{false code}
+% \end{syntax}
+% Performs a check for whether \emph{restricted} shell escape is
+% enabled. This will return false if unrestricted shell escape is
+% enabled. Unrestricted shell escape is not considered a superset
+% of restricted shell escape in this case. To find whether any
+% shell escape is enabled use \cs{sys_if_shell:}.
+% \end{function}
+%
+% \begin{function}[added = 2017-05-27]{\sys_shell_now:n, \sys_shell_now:x}
+% \begin{syntax}
+% \cs{sys_shell_now:n} \Arg{tokens}
+% \end{syntax}
+% Execute \meta{tokens} through shell escape immediately.
+% \end{function}
+%
+% \begin{function}[added = 2017-05-27]{\sys_shell_shipout:n, \sys_shell_shipout:x}
+% \begin{syntax}
+% \cs{sys_shell_shipout:n} \Arg{tokens}
+% \end{syntax}
+% Execute \meta{tokens} through shell escape at shipout.
+% \end{function}
+%
% \section{Additions to \pkg{l3tl}}
%
% \begin{function}[EXP,pTF]{\tl_if_single_token:n}
@@ -1523,67 +1495,6 @@
}
% \end{macrocode}
% \end{macro}
-%
-% \begin{macrocode}
-%<@@=ior>
-% \end{macrocode}
-%
-% \begin{macro}[EXP]{\ior_map_break:, \ior_map_break:n}
-% Usual map breaking functions. Those are not yet in \pkg{l3kernel}
-% proper since the mapping below is the first of its kind.
-% \begin{macrocode}
-\cs_new:Npn \ior_map_break:
- { \__prg_map_break:Nn \ior_map_break: { } }
-\cs_new:Npn \ior_map_break:n
- { \__prg_map_break:Nn \ior_map_break: }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}{\ior_map_inline:Nn, \ior_str_map_inline:Nn}
-% \begin{macro}[aux]{\@@_map_inline:NNn}
-% \begin{macro}[aux]{\@@_map_inline:NNNn}
-% \begin{macro}[aux]{\@@_map_inline_loop:NNN}
-% \begin{variable}{\l_@@_internal_tl}
-% Mapping to an input stream can be done on either a token or a string
-% basis, hence the set up. Within that, there is a check to avoid reading
-% past the end of a file, hence the two applications of \cs{ior_if_eof:N}.
-% This mapping cannot be nested with twice the same stream, as the
-% stream has only one \enquote{current line}.
-% \begin{macrocode}
-\cs_new_protected:Npn \ior_map_inline:Nn
- { \@@_map_inline:NNn \ior_get:NN }
-\cs_new_protected:Npn \ior_str_map_inline:Nn
- { \@@_map_inline:NNn \ior_str_get:NN }
-\cs_new_protected:Npn \@@_map_inline:NNn
- {
- \int_gincr:N \g__prg_map_int
- \exp_args:Nc \@@_map_inline:NNNn
- { __prg_map_ \int_use:N \g__prg_map_int :n }
- }
-\cs_new_protected:Npn \@@_map_inline:NNNn #1#2#3#4
- {
- \cs_gset_protected:Npn #1 ##1 {#4}
- \ior_if_eof:NF #3 { \@@_map_inline_loop:NNN #1#2#3 }
- \__prg_break_point:Nn \ior_map_break:
- { \int_gdecr:N \g__prg_map_int }
- }
-\cs_new_protected:Npn \@@_map_inline_loop:NNN #1#2#3
- {
- #2 #3 \l_@@_internal_tl
- \ior_if_eof:NF #3
- {
- \exp_args:No #1 \l_@@_internal_tl
- \@@_map_inline_loop:NNN #1#2#3
- }
- }
-\tl_new:N \l_@@_internal_tl
-% \end{macrocode}
-% \end{variable}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
% \begin{macro}{\ior_log_streams:}
% Redirect output of \cs{ior_list_streams:} to the log.
% \begin{macrocode}
@@ -1592,10 +1503,6 @@
% \end{macrocode}
% \end{macro}
%
-% \begin{macrocode}
-%<@@=iow>
-% \end{macrocode}
-%
% \begin{macro}{\iow_log_streams:}
% Redirect output of \cs{iow_list_streams:} to the log.
% \begin{macrocode}
@@ -1976,6 +1883,10 @@
%
% \subsection{Additions to \pkg{l3sys}}
%
+% \begin{macrocode}
+%<@@=sys>
+% \end{macrocode}
+%
% \begin{macro}[EXP, pTF]{\sys_if_rand_exist:}
% Currently, randomness exists under \pdfTeX{} and \LuaTeX{}.
% \begin{macrocode}
@@ -2006,6 +1917,133 @@
% \end{macrocode}
% \end{macro}
%
+% \begin{variable}{\c_sys_shell_escape_int}
+% Expose the engine's shell escape status to the user.
+% \begin{macrocode}
+\int_const:Nn \c_sys_shell_escape_int
+ {
+ \sys_if_engine_luatex:TF
+ {
+ \luatex_directlua:D
+ {
+ tex.sprint((status.shell_escape~or~os.execute()) .. " ")
+ }
+ }
+ {
+ \pdftex_shellescape:D
+ }
+ }
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{macro}[EXP, pTF]{\sys_if_shell:}
+% Performs a check for whether shell escape is enabled. This will
+% return true if either of restricted or unrestricted shell escape
+% is enabled.
+% \begin{macrocode}
+\prg_new_conditional:Nnn \sys_if_shell: { p , T , F , TF }
+ {
+ \if_int_compare:w \c_sys_shell_escape_int = 0 ~
+ \prg_return_false:
+ \else:
+ \prg_return_true:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[EXP, pTF]{\sys_if_shell_unrestricted:}
+% Performs a check for whether \emph{unrestricted} shell escape is
+% enabled.
+% \begin{macrocode}
+\prg_new_conditional:Nnn \sys_if_shell_unrestricted: { p , T , F , TF }
+ {
+ \if_int_compare:w \c_sys_shell_escape_int = 1 ~
+ \prg_return_true:
+ \else:
+ \prg_return_false:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[EXP, pTF]{\sys_if_shell_unrestricted:}
+% Performs a check for whether \emph{restricted} shell escape is
+% enabled. This will return false if unrestricted shell escape is
+% enabled. Unrestricted shell escape is not considered a superset
+% of restricted shell escape in this case. To find whether any
+% shell escape is enabled use \cs{sys_if_shell:}.
+% \begin{macrocode}
+\prg_new_conditional:Nnn \sys_if_shell_restricted: { p , T , F , TF }
+ {
+ \if_int_compare:w \c_sys_shell_escape_int = 2 ~
+ \prg_return_true:
+ \else:
+ \prg_return_false:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{variable}{\c_@@_shell_stream_int}
+% This is not needed for \LuaTeX{}: shell escape there isn't done using
+% a \TeX{} interface
+% \begin{macrocode}
+\sys_if_engine_luatex:F
+ { \int_const:Nn \c_@@_shell_stream_int { 18 } }
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{macro}{\sys_shell_now:n}
+% Execute commands through shell escape immediately.
+% \begin{macrocode}
+\sys_if_engine_luatex:TF
+ {
+ \cs_new_protected:Npn \sys_shell_now:n #1
+ {
+ \luatex_directlua:D
+ {
+ os.execute("
+ \luatex_luaescapestring:D { \etex_detokenize:D {#1} }
+ ")
+ }
+ }
+ }
+ {
+ \cs_new_protected:Npn \sys_shell_now:n #1
+ {
+ \iow_now:Nn \c_@@_shell_stream_int { #1 }
+ }
+ }
+\cs_generate_variant:Nn \sys_shell_now:n { x }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\sys_shell_shipout:n}
+% Execute commands through shell escape at shipout.
+% \begin{macrocode}
+\sys_if_engine_luatex:TF
+ {
+ \cs_new_protected:Npn \sys_shell_shipout:n #1
+ {
+ \luatex_latelua:D
+ {
+ os.execute("
+ \luatex_luaescapestring:D { \etex_detokenize:D {#1} }
+ ")
+ }
+ }
+ }
+ {
+ \cs_new_protected:Npn \sys_shell_shipout:n #1
+ {
+ \iow_shipout:Nn \c_@@_shell_stream_int { #1 }
+ }
+ }
+\cs_generate_variant:Nn \sys_shell_shipout:n { x }
+% \end{macrocode}
+% \end{macro}
+%
% \subsection{Additions to \pkg{l3tl}}
%
% \begin{macrocode}
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3clist.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3clist.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3clist.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -43,7 +43,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3coffins.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3coffins.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3coffins.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3color.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3color.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3color.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3deprecation.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3deprecation.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3deprecation.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3doc.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3doc.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3doc.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -70,7 +70,7 @@
% This isn't included in the typeset documentation because it's a bit
% ugly:
%<*class>
-\ProvidesExplClass{l3doc}{2017/05/13}{}
+\ProvidesExplClass{l3doc}{2017/05/29}{}
{L3 Experimental documentation class}
%</class>
% \fi
@@ -77,7 +77,7 @@
%
% \title{The \cls{l3doc} class}
% \author{\Team}
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
% \maketitle
% \tableofcontents
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3docstrip.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3docstrip.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3docstrip.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -61,7 +61,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3drivers.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3drivers.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3drivers.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -45,7 +45,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -640,17 +640,17 @@
%
% \subsubsection{Color}
%
-% \begin{variable}{\l_@@_current_color_tl}
+% \begin{variable}{\l_@@_color_current_tl}
% The current color in driver-dependent format: pick up the package-mode
% data if available.
% \begin{macrocode}
-\tl_new:N \l_@@_current_color_tl
-\tl_set:Nn \l_@@_current_color_tl { 0~g~0~G }
+\tl_new:N \l_@@_color_current_tl
+\tl_set:Nn \l_@@_color_current_tl { 0~g~0~G }
%<*package>
\AtBeginDocument
{
\@ifpackageloaded { color }
- { \tl_set:Nn \l_@@_current_color_tl { \current at color } }
+ { \tl_set:Nn \l_@@_color_current_tl { \current at color } }
{ }
}
%</package>
@@ -676,7 +676,7 @@
{ \luatex_pdfextension:D colorstack }
{ \pdftex_pdfcolorstack:D }
\exp_not:N \l_@@_color_stack_int push
- { \exp_not:N \l_@@_current_color_tl }
+ { \exp_not:N \l_@@_color_current_tl }
\group_insert_after:N \exp_not:N \@@_color_reset:
}
\cs_new_protected:Npx \@@_color_reset:
@@ -690,7 +690,109 @@
% \end{macro}
% \end{macro}
%
+% \subsection{Images}
+%
+% \begin{variable}{\l_@@_image_attr_tl}
+% In PDF mode, additional attributes of an image (such as page number) are
+% needed both to obtain the bounding box and when inserting the image: this
+% occurs as the image dictionary approach means they are read as part of
+% the bounding box operation. As such, it is easier to track additional
+% attributes using a dedicated |tl| rather than build up the same data
+% twice.
% \begin{macrocode}
+\tl_new:N \l_@@_image_attr_tl
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{macro}[int]
+% {\@@_image_getbb_jpg:n, \@@_image_getbb_pdf:n, \@@_image_getbb_png:n}
+% \begin{macro}[aux]
+% {\@@_image_getbb_auxi:n, \@@_image_getbb_auxii:n}
+% Getting the bounding box here requires us to box up the image and
+% measure it. To deal with the difference in feature support in bitmap
+% and vector images but keeping the common parts, there is a little work
+% to do in terms of auxiliaries. The key here is to notice that we need
+% two forms of the attributes: a \enquote{short} set to allow us to
+% track for caching, and the full form to pass to the primitive. Note that
+% in |pdftex.def| the short reference is stored to be used in the inclusion
+% stage: may be required when there are more aspects to track.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_image_getbb_jpg:n #1
+ {
+ \int_zero:N \l__image_page_int
+ \tl_set:Nx \l_@@_image_attr_tl
+ {
+ \bool_if:NT \l__image_interpolate_bool
+ { :I }
+ }
+ \@@_image_getbb_auxi:n {#1}
+ }
+\cs_new_eq:NN \@@_image_getbb_png:n \@@_image_getbb_jpg:n
+\cs_new_protected:Npn \@@_image_getbb_pdf:n #1
+ {
+ \bool_set_false:N \l__image_interpolate_bool
+ \tl_set:Nx \l_@@_image_attr_tl
+ {
+ \int_compare:nNnT \l__image_page_int > 0
+ { :P \int_use:N \l__image_page_int }
+ }
+ \@@_image_getbb_auxi:n {#1}
+ }
+\cs_new_protected:Npn \@@_image_getbb_auxi:n #1
+ {
+ \dim_if_exist:cTF { c__image_ #1 \l_@@_image_attr_tl _ht_dim }
+ {
+ \dim_set_eq:Nc \l__image_ht_dim
+ { c__image_ #1 \l_@@_image_attr_tl _ht_dim }
+ \dim_set_eq:Nc \l__image_wd_dim
+ { c__image_ #1 \l_@@_image_attr_tl _wd_dim }
+ }
+ { \@@_image_getbb_auxii:n {#1} }
+ }
+% \begin{macrocode}
+% Measuring the image is done by boxing up: for PDF images we could
+% use |\pdftex_pdfximagebbox:D|, but if doesn't work for other types.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_image_getbb_auxii:n #1
+ {
+ \tex_immediate:D \pdftex_pdfximage:D
+ \bool_if:NT \l__image_interpolate_bool
+ { attr ~ { /Interpolate~true } }
+ \int_compare:nNnT \l__image_page_int > 0
+ { page ~ \int_use:N \l__image_page_int }
+ {#1}
+ \hbox_set:Nn \l__image_tmp_box
+ { \pdftex_pdfrefximage:D \pdftex_pdflastximage:D }
+ \dim_set:Nn \l__image_ht_dim { \box_ht:N \l__image_tmp_box }
+ \dim_set:Nn \l__image_wd_dim { \box_wd:N \l__image_tmp_box }
+ \int_const:cn { c__image_ #1 \l_@@_image_attr_tl _int }
+ { \tex_the:D \pdftex_pdflastximage:D }
+ \dim_const:cn { c__image_ #1 \l_@@_image_attr_tl _ht_dim }
+ { \l__image_ht_dim }
+ \dim_const:cn { c__image_ #1 \l_@@_image_attr_tl _wd_dim }
+ { \l__image_wd_dim }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int]
+% {\@@_image_include_jpg:n, \@@_image_include_pdf:n, \@@_image_include_png:n}
+% Images are already loaded for the measurement part of the code, so
+% inclusion is straight-forward, with only any attributes to worry about. The
+% latter carry through from determination of the bounding box.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_image_include_jpg:n #1
+ {
+ \pdftex_pdfrefximage:D
+ \int_use:c { c__image_ #1 \l_@@_image_attr_tl _int }
+ }
+\cs_new_eq:NN \@@_image_include_pdf:n \@@_image_include_jpg:n
+\cs_new_eq:NN \@@_image_include_png:n \@@_image_include_jpg:n
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macrocode}
%</pdfmode>
% \end{macrocode}
%
@@ -799,16 +901,16 @@
%
% \subsubsection{Color}
%
-% \begin{variable}{\l_@@_current_color_tl}
+% \begin{variable}{\l_@@_color_current_tl}
% The current color in driver-dependent format.
% \begin{macrocode}
-\tl_new:N \l_@@_current_color_tl
-\tl_set:Nn \l_@@_current_color_tl { [ 0 ] }
+\tl_new:N \l_@@_color_current_tl
+\tl_set:Nn \l_@@_color_current_tl { [ 0 ] }
%<*package>
\AtBeginDocument
{
\@ifpackageloaded { color }
- { \tl_set:Nn \l_@@_current_color_tl { \current at color } }
+ { \tl_set:Nn \l_@@_color_current_tl { \current at color } }
{ }
}
%</package>
@@ -821,7 +923,7 @@
% \begin{macrocode}
\cs_new_protected:Npn \@@_color_ensure_current:
{
- \tex_special:D { pdf:bcolor~\l_@@_current_color_tl }
+ \tex_special:D { pdf:bcolor~\l_@@_color_current_tl }
\group_insert_after:N \@@_color_reset:
}
\cs_new_protected:Npn \@@_color_reset:
@@ -830,7 +932,108 @@
% \end{macro}
% \end{macro}
%
+% \subsection{Images}
+%
+% \begin{macro}[int]
+% {
+% \@@_image_getbb_eps:n, \@@_image_getbb_jpg:n,
+% \@@_image_getbb_pdf:n, \@@_image_getbb_png:n
+% }
+% Simply use the generic functions: only for \texttt{dvipdfmx} in the
+% extraction cases.
% \begin{macrocode}
+\cs_new_eq:NN \@@_image_getbb_eps:n \__image_read_bb:n
+%<*dvipdfmx>
+\cs_new_protected:Npn \@@_image_getbb_jpg:n #1
+ {
+ \int_zero:N \l__image_page_int
+ \__image_extract_bb:n {#1}
+ }
+\cs_new_eq:NN \@@_image_getbb_png:n \@@_image_getbb_jpg:n
+\cs_new_protected:Npn \@@_image_getbb_pdf:n #1
+ {
+ \bool_set_false:N \l__image_interpolate_tl
+ \__image_extract_bb:n {#1}
+ }
+%</dvipdfmx>
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{variable}[aux]{\g_@@_image_int}
+% Used to track the object number associated with each image.
+% \begin{macrocode}
+\int_new:N \g_@@_image_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{macro}[int]
+% {
+% \@@_image_include_eps:n, \@@_image_include_jpg:n,
+% \@@_image_include_pdf:n, \@@_image_include_png:n
+% }
+% \begin{macro}[aux]{\@@_image_include_auxi:nn}
+% \begin{macro}[aux]{\@@_image_include_auxii:nnn, \@@_image_include_auxii:xnn}
+% \begin{macro}[aux]{\@@_image_include_auxiii:nn}
+% The special syntax depends on the file type.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_image_include_eps:n #1
+ {
+ \tex_special:D { PSfile = #1 }
+ }
+\cs_new_protected:Npn \@@_image_include_jpg:n #1
+ { \@@_image_include_auxi:nn {#1} { image } }
+\cs_new_eq:NN \@@_image_include_png:n \@@_image_include_png:n
+\cs_new_protected:Npn \@@_image_include_pdf:n #1
+ { \@@_image_include_auxi:nn {#1} { epdf } }
+% \end{macrocode}
+% Image inclusion is set up to use the fact that each image is stored in
+% the PDF as an XObject. This means that we can include repeated images
+% only once and refer to them. To allow that, track the nature of each
+% image: much the same as for the direct PDF mode case.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_image_include_auxi:nn #1#2
+ {
+ \@@_image_include_auxii:xnn
+ {
+ \int_compare:nNnT \l__image_page_int > 0
+ { :P \int_use:N \l__image_page_int }
+ \bool_if:NT \l__image_interpolate_bool
+ { :I }
+ }
+ {#1} {#2}
+ }
+\cs_new_protected:Npn \@@_image_include_auxii:nnn #1#2#3
+ {
+ \int_if_exist:cTF { c__image_ #2#1 _int }
+ {
+ \tex_special:D
+ { pdf:usexobj~@image \int_use:c { c__image_ #2#1 _int } }
+ }
+ { \@@_image_include_auxiii:nn {#2} {#1} {#3} }
+ }
+\cs_generate_variant:Nn \@@_image_include_auxii:nnn { x }
+\cs_new_protected:Npn \@@_image_include_auxiii:nnn #1#2#3
+ {
+ \int_gincr:N \g_@@_image_int
+ \int_const:cn { c__image_ #1#2 _int } { \g_@@_image_int }
+ \tex_special:D
+ {
+ pdf:#3~
+ @image \int_use:c { c__image_ #1#2 _int }
+ \int_compare:nNnT \l__image_page_int > 0
+ { page ~ \int_use:N \l__image_page_int \c_space_tl }
+ (#1)
+ \bool_if:NT \l__image_interpolate_bool
+ { <</Interpolate~true>> }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macrocode}
%</dvipdfmx|xdvipdfmx>
% \end{macrocode}
%
@@ -844,8 +1047,9 @@
%
% \begin{macro}[int]{\@@_color_ensure_current:}
% \begin{macro}[aux]{\@@_color_reset:}
-% The \LaTeXe{} driver uses \texttt{dvips}-like specials so there has to
-% be a change of set up if \pkg{color} is loaded.
+% Older \LaTeXe{} drivers uses \texttt{dvips}-like specials so there has to
+% be a change of set up if \pkg{color} is loaded and if the current color
+% doesn't match the pattern expected for |dvipdfmx|.
% \begin{macrocode}
%<*package>
\AtBeginDocument
@@ -852,13 +1056,20 @@
{
\@ifpackageloaded { color }
{
- \cs_set_protected:Npn \@@_color_ensure_current:
+ \cs_set_protected:Npn \@@_tmp:w #1 [ #2 ] #3 \q_stop
{
- \tex_special:D { color~push~\l_@@_current_color_tl }
- \group_insert_after:N \@@_color_reset:
+ \tl_if_empty:nT {#2}
+ {
+ \cs_set_protected:Npn \@@_color_ensure_current:
+ {
+ \tex_special:D { color~push~\l_@@_color_current_tl }
+ \group_insert_after:N \@@_color_reset:
+ }
+ \cs_set_protected:Npn \@@_color_reset:
+ { \tex_special:D { color~pop } }
+ }
}
- \cs_set_protected:Npn \@@_color_reset:
- { \tex_special:D { color~pop } }
+ \exp_after:wN \@@_tmp:w \current at color [ ] \q_stop
}
{ }
}
@@ -867,7 +1078,59 @@
% \end{macro}
% \end{macro}
%
+% \subsection{Images}
+%
+% \begin{macro}[int]
+% {\@@_image_getbb_jpg:n, \@@_image_getbb_pdf:n, \@@_image_getbb_png:n}
+% \begin{macro}[aux]{\@@_image_getbb_auxi:nN}
+% \begin{macro}[aux]{\@@_image_getbb_auxii:nnN, \@@_image_getbb_auxii:VnN}
+% \begin{macro}[aux]{\@@_image_getbb_auxiii:nNnn}
+% For \texttt{xdvipdfmx}, there are two primitives that allow us to obtain
+% the bounding box without needing \texttt{extractbb}.
% \begin{macrocode}
+\cs_new_protected:Npn \@@_image_getbb_jpg:n #1
+ {
+ \int_zero:N \l__image_page_int
+ \@@_image_getbb_auxi:nN {#1} \xetex_picfile:D
+ }
+\cs_new_eq:NN \@@_image_getbb_png:n \@@_image_getbb_jpg:n
+\cs_new_protected:Npn \@@_image_getbb_pdf:n #1
+ { \@@_image_getbb_auxi:nN {#1} \xetex_pdffile:D }
+\cs_new_protected:Npn \@@_image_getbb_auxi:nN #1#2
+ {
+ \int_compare:nNnTF \l__image_page_int > 0
+ { \@@_image_getbb_auxii:VnN \l__image_page_int {#1} #2 }
+ { \@@_image_getbb_auxiii:nNnn {#1} #2 }
+ }
+\cs_new_protected:Npn \@@_image_getbb_auxii:nnN #1#2#3
+ { \@@_image_getbb_auxiii:nNnn {#2} #3 { :P #1 } { page #1 } }
+\cs_generate_variant:Nn \@@_image_getbb_auxii:nnN { V }
+\cs_new_protected:Npn \@@_image_getbb_auxiii:nNnn #1#2#3#4
+ {
+ \dim_if_exist:cTF { c__image_ #1#3 _ht_dim }
+ {
+ \dim_set_eq:Nc \l__image_ht_dim { c__image_ #1#3 _ht_dim }
+ \dim_set_eq:Nc \l__image_wd_dim { c__image_ #1#3 _wd_dim }
+ }
+ { \@@_image_getbb_auxvi:nNnn {#1} #2 {#3} {#4} }
+ }
+\cs_new_protected:Npn \@@_image_getbb_auxvi:nNnn #1#2#3#4
+ {
+ \hbox_set:Nn \l__image_tmp_box { #2 #1 ~ #4 }
+ \dim_set:Nn \l__image_ht_dim { \box_ht:N \l__image_tmp_box }
+ \dim_set:Nn \l__image_wd_dim { \box_wd:N \l__image_tmp_box }
+ \dim_const:cn { c__image_ #1#3 _ht_dim }
+ { \l__image_ht_dim }
+ \dim_const:cn { c__image_ #1#3 _wd_dim }
+ { \l__image_wd_dim }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macrocode}
%</xdvipdfmx>
% \end{macrocode}
%
@@ -874,7 +1137,7 @@
% \subsection{Drawing commands: \texttt{pdfmode} and \texttt{(x)dvipdfmx}}
%
% Both \texttt{pdfmode} and \texttt{(x)dvipdfmx} directly produce PDF output
-% and undertand a shared set of specials for drawing commands.
+% and understand a shared set of specials for drawing commands.
%
% \begin{macrocode}
%<*dvipdfmx|pdfmode|xdvipdfmx>
@@ -954,7 +1217,7 @@
% \end{macro}
%
% \begin{macro}[int]{\@@_draw_evenodd_rule:, \@@_draw_nonzero_rule:}
-% \begin{variable}[aux]{\g_@@_draw_eor_bool}
+% \begin{variable}[int]{\g_@@_draw_eor_bool}
% The even-odd rule here can be implemented as a simply switch.
% \begin{macrocode}
\cs_new_protected:Npn \@@_draw_evenodd_rule:
@@ -1341,16 +1604,16 @@
%
% \subsubsection{Color}
%
-% \begin{variable}{\l_@@_current_color_tl}
+% \begin{variable}{\l_@@_color_current_tl}
% The current color in driver-dependent format.
% \begin{macrocode}
-\tl_new:N \l_@@_current_color_tl
-\tl_set:Nn \l_@@_current_color_tl { gray~0 }
+\tl_new:N \l_@@_color_current_tl
+\tl_set:Nn \l_@@_color_current_tl { gray~0 }
%<*package>
\AtBeginDocument
{
\@ifpackageloaded { color }
- { \tl_set:Nn \l_@@_current_color_tl { \current at color } }
+ { \tl_set:Nn \l_@@_color_current_tl { \current at color } }
{ }
}
%</package>
@@ -1363,7 +1626,7 @@
% \begin{macrocode}
\cs_new_protected:Npn \@@_color_ensure_current:
{
- \tex_special:D { color~push~\l_@@_current_color_tl }
+ \tex_special:D { color~push~\l_@@_color_current_tl }
\group_insert_after:N \@@_color_reset:
}
\cs_new_protected:Npn \@@_color_reset:
@@ -1372,6 +1635,26 @@
% \end{macro}
% \end{macro}
%
+% \subsection{Images}
+%
+% \begin{macro}[int]{\@@_image_getbb_eps:n}
+% Simply use the generic function.
+% \begin{macrocode}
+\cs_new_eq:NN \@@_image_getbb_eps:n \__image_read_bb:n
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_image_include_eps:n}
+% The special syntax is relatively clear here: remember we need PostScript
+% sizes here.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_image_include_eps:n #1
+ {
+ \tex_special:D { PSfile = #1 }
+ }
+% \end{macrocode}
+% \end{macro}
+%
% \subsection{Drawing}
%
% \begin{macro}[aux]{\@@_draw_literal:n, \@@_draw_literal:x}
@@ -1972,17 +2255,17 @@
%
% \subsubsection{Color}
%
-% \begin{variable}{\l_@@_current_color_tl}
+% \begin{variable}{\l_@@_color_current_tl}
% The current color in driver-dependent format: the same as for
% \texttt{dvips}.
% \begin{macrocode}
-\tl_new:N \l_@@_current_color_tl
-\tl_set:Nn \l_@@_current_color_tl { gray~0 }
+\tl_new:N \l_@@_color_current_tl
+\tl_set:Nn \l_@@_color_current_tl { gray~0 }
%<*package>
\AtBeginDocument
{
\@ifpackageloaded { color }
- { \tl_set:Nn \l_@@_current_color_tl { \current at color } }
+ { \tl_set:Nn \l_@@_color_current_tl { \current at color } }
{ }
}
%</package>
@@ -1995,7 +2278,7 @@
% \begin{macrocode}
\cs_new_protected:Npn \@@_color_ensure_current:
{
- \tex_special:D { color~push~\l_@@_current_color_tl }
+ \tex_special:D { color~push~\l_@@_color_current_tl }
\group_insert_after:N \@@_color_reset:
}
\cs_new_protected:Npn \@@_color_reset:
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3expan.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3expan.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3expan.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3file.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3file.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3file.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -232,28 +232,34 @@
% and right braces are found) from the input \meta{stream} and stores
% the result locally in the \meta{token list} variable. If the
% \meta{stream} is not open, input is requested from the terminal.
-% The material read from the \meta{stream} will be tokenized by
-% \TeX{} according to the category codes in force when the function
-% is used. Note that any blank lines will be converted to the token
-% \cs{par}. Therefore, if skipping blank lines is requires a test such as
+% The material read from the \meta{stream} will be tokenized by \TeX{}
+% according to the category codes and \tn{endlinechar} in force when
+% the function is used. Assuming normal settings, any lines which do
+% not end in a comment character~|%| will have the line ending
+% converted to a space, so for example input
% \begin{verbatim}
+% a b c
+% \end{verbatim}
+% will result in a token list \verb*|a b c |. Any blank line is
+% converted to the token \cs{par}. Therefore, blank lines can be
+% skipped by using a test such as
+% \begin{verbatim}
% \ior_get:NN \l_my_stream \l_tmpa_tl
% \tl_set:Nn \l_tmpb_tl { \par }
% \tl_if_eq:NNF \l_tmpa_tl \l_tmpb_tl
% ...
% \end{verbatim}
-% may be used. Also notice that if multiple lines are read to match braces
-% then the resulting token list will contain \cs{par} tokens. As normal
-% \TeX{} tokenization is in force, any lines which do not end in a comment
-% character (usually |%|) will have the line ending converted to a space,
-% so for example input
-% \begin{verbatim}
-% a b c
-% \end{verbatim}
-% will result in a token list |a b c |.
+% Also notice that if multiple lines are read to match braces
+% then the resulting token list can contain \cs{par} tokens.
% \begin{texnote}
-% This protected macro expands to the \TeX{} primitive \tn{read}
-% along with the |to| keyword.
+% This protected macro is a wrapper around the \TeX{} primitive
+% \tn{read}. Regardless of settings, \TeX{} replaces trailing space
+% and tab characters (character codes 32 and~9) in each line by an
+% end-of-line character (character code \tn{endlinechar}, omitted if
+% \tn{endlinechar} is negative or too large) before turning
+% characters into tokens according to current category codes. With
+% default settings, spaces appearing at the beginning of lines are
+% also ignored.
% \end{texnote}
% \end{function}
%
@@ -279,12 +285,94 @@
% having category code~12.
% \begin{texnote}
% This protected macro is a wrapper around the \eTeX{} primitive
-% \tn{readline}. However, the end-line character normally added by
-% this primitive is not included in the result of
-% \cs{ior_str_get:NN}.
+% \tn{readline}. Regardless of settings, \TeX{} removes trailing
+% space and tab characters (character codes 32 and~9). However, the
+% end-line character normally added by this primitive is not
+% included in the result of \cs{ior_str_get:NN}.
% \end{texnote}
% \end{function}
%
+% \begin{function}[added = 2012-02-11]{\ior_map_inline:Nn}
+% \begin{syntax}
+% \cs{ior_map_inline:Nn} \meta{stream} \Arg{inline function}
+% \end{syntax}
+% Applies the \meta{inline function} to each set of \meta{lines}
+% obtained by calling \cs{ior_get:NN} until reaching the end of the
+% file. \TeX{} ignores any trailing new-line marker from the file it
+% reads. The \meta{inline function} should consist of code which will
+% receive the \meta{line} as |#1|.
+% \end{function}
+%
+% \begin{function}[added = 2012-02-11]{\ior_str_map_inline:Nn}
+% \begin{syntax}
+% \cs{ior_str_map_inline:Nn} \Arg{stream} \Arg{inline function}
+% \end{syntax}
+% Applies the \meta{inline function} to every \meta{line}
+% in the \meta{stream}. The material is read from the \meta{stream}
+% as a series of tokens with category code $12$ (other), with the
+% exception of space characters which are given category code $10$
+% (space). The \meta{inline function} should consist of code which
+% will receive the \meta{line} as |#1|.
+% Note that \TeX{} removes trailing space and tab characters
+% (character codes 32 and 9) from every line upon input. \TeX{} also
+% ignores any trailing new-line marker from the file it reads.
+% \end{function}
+%
+% \begin{function}[added = 2012-06-29]{\ior_map_break:}
+% \begin{syntax}
+% \cs{ior_map_break:}
+% \end{syntax}
+% Used to terminate a \cs[no-index]{ior_map_\ldots} function before all
+% lines from the \meta{stream} have been processed. This will
+% normally take place within a conditional statement, for example
+% \begin{verbatim}
+% \ior_map_inline:Nn \l_my_ior
+% {
+% \str_if_eq:nnTF { #1 } { bingo }
+% { \ior_map_break: }
+% {
+% % Do something useful
+% }
+% }
+% \end{verbatim}
+% Use outside of a \cs[no-index]{ior_map_\ldots} scenario will lead to low
+% level \TeX{} errors.
+% \begin{texnote}
+% When the mapping is broken, additional tokens may be inserted by the
+% internal macro \cs{__prg_break_point:Nn} before further items are taken
+% from the input stream. This will depend on the design of the mapping
+% function.
+% \end{texnote}
+% \end{function}
+%
+% \begin{function}[added = 2012-06-29]{\ior_map_break:n}
+% \begin{syntax}
+% \cs{ior_map_break:n} \Arg{tokens}
+% \end{syntax}
+% Used to terminate a \cs[no-index]{ior_map_\ldots} function before all
+% lines in the \meta{stream} have been processed, inserting
+% the \meta{tokens} after the mapping has ended. This will
+% normally take place within a conditional statement, for example
+% \begin{verbatim}
+% \ior_map_inline:Nn \l_my_ior
+% {
+% \str_if_eq:nnTF { #1 } { bingo }
+% { \ior_map_break:n { <tokens> } }
+% {
+% % Do something useful
+% }
+% }
+% \end{verbatim}
+% Use outside of a \cs[no-index]{ior_map_\ldots} scenario will lead to low
+% level \TeX{} errors.
+% \begin{texnote}
+% When the mapping is broken, additional tokens may be inserted by the
+% internal macro \cs{__prg_break_point:Nn} before the \meta{tokens} are
+% inserted into the input stream.
+% This will depend on the design of the mapping function.
+% \end{texnote}
+% \end{function}
+%
%\begin{function}[updated = 2012-02-10, EXP, pTF]{\ior_if_eof:N}
% \begin{syntax}
% \cs{ior_if_eof_p:N} \meta{stream} \\
@@ -1183,6 +1271,61 @@
% \end{macrocode}
% \end{macro}
%
+% \begin{macro}[EXP]{\ior_map_break:, \ior_map_break:n}
+% Usual map breaking functions.
+% \begin{macrocode}
+\cs_new:Npn \ior_map_break:
+ { \__prg_map_break:Nn \ior_map_break: { } }
+\cs_new:Npn \ior_map_break:n
+ { \__prg_map_break:Nn \ior_map_break: }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\ior_map_inline:Nn, \ior_str_map_inline:Nn}
+% \begin{macro}[aux]{\@@_map_inline:NNn}
+% \begin{macro}[aux]{\@@_map_inline:NNNn}
+% \begin{macro}[aux]{\@@_map_inline_loop:NNN}
+% \begin{variable}{\l_@@_internal_tl}
+% Mapping to an input stream can be done on either a token or a string
+% basis, hence the set up. Within that, there is a check to avoid reading
+% past the end of a file, hence the two applications of \cs{ior_if_eof:N}.
+% This mapping cannot be nested with twice the same stream, as the
+% stream has only one \enquote{current line}.
+% \begin{macrocode}
+\cs_new_protected:Npn \ior_map_inline:Nn
+ { \@@_map_inline:NNn \ior_get:NN }
+\cs_new_protected:Npn \ior_str_map_inline:Nn
+ { \@@_map_inline:NNn \ior_str_get:NN }
+\cs_new_protected:Npn \@@_map_inline:NNn
+ {
+ \int_gincr:N \g__prg_map_int
+ \exp_args:Nc \@@_map_inline:NNNn
+ { __prg_map_ \int_use:N \g__prg_map_int :n }
+ }
+\cs_new_protected:Npn \@@_map_inline:NNNn #1#2#3#4
+ {
+ \cs_gset_protected:Npn #1 ##1 {#4}
+ \ior_if_eof:NF #3 { \@@_map_inline_loop:NNN #1#2#3 }
+ \__prg_break_point:Nn \ior_map_break:
+ { \int_gdecr:N \g__prg_map_int }
+ }
+\cs_new_protected:Npn \@@_map_inline_loop:NNN #1#2#3
+ {
+ #2 #3 \l_@@_internal_tl
+ \ior_if_eof:NF #3
+ {
+ \exp_args:No #1 \l_@@_internal_tl
+ \@@_map_inline_loop:NNN #1#2#3
+ }
+ }
+\tl_new:N \l_@@_internal_tl
+% \end{macrocode}
+% \end{variable}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
% \begin{variable}{\g__file_internal_ior}
% Needed by the higher-level code, but cannot be created until here.
% \begin{macrocode}
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3final.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3final.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3final.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3flag.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3flag.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3flag.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3format.ins
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3format.ins 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3format.ins 2017-06-05 23:17:08 UTC (rev 44483)
@@ -67,6 +67,7 @@
\from{l3alloc.dtx} {initex}
% ==============================
\from{l3int.dtx} {initex}
+ \from{l3intarray.dtx} {initex}
\from{l3flag.dtx} {initex}
\from{l3quark.dtx} {initex}
\from{l3prg.dtx} {initex}
@@ -91,6 +92,9 @@
\from{l3fp-random.dtx} {initex}
\from{l3fp-assign.dtx} {initex}
\from{l3sort.dtx} {initex}
+ \from{l3tl-build.dtx} {initex}
+ \from{l3tl-analysis.dtx}{initex}
+ \from{l3regex.dtx} {initex}
\from{l3box.dtx} {initex}
\from{l3coffins.dtx} {initex}
\from{l3color.dtx} {initex}
@@ -117,13 +121,18 @@
\def\MetaPrefix{--}
\preamble
-EXPERIMENTAL CODE
+Copyright (C) 1990-2017 The LaTeX3 Project
-Do not distribute this file without also distributing the
-source files specified above.
+It may be distributed and/or modified under the conditions of
+the LaTeX Project Public License (LPPL), either version 1.3c of
+this license or (at your option) any later version. The latest
+version of this license is in the file:
-Do not distribute a modified version of this file.
+ http://www.latex-project.org/lppl.txt
+This file is part of the "l3kernel bundle" (The Work in LPPL)
+and all files in that bundle must be distributed together.
+
\endpreamble
\nopostamble
\generate{\file{expl3.lua} {\from{l3luatex.dtx}{lua,package}}}
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-assign.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-assign.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-assign.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -38,7 +38,7 @@
% {latex-team at latex-project.org}^^A
% }^^A
% }
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
% \maketitle
%
% \begin{documentation}
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-aux.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-aux.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-aux.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-basics.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-basics.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-basics.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -38,7 +38,7 @@
% {latex-team at latex-project.org}^^A
% }^^A
% }
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-convert.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-convert.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-convert.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-expo.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-expo.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-expo.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -38,7 +38,7 @@
% {latex-team at latex-project.org}^^A
% }^^A
% }
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-extended.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-extended.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-extended.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -38,7 +38,7 @@
% {latex-team at latex-project.org}^^A
% }^^A
% }
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-logic.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-logic.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-logic.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -38,7 +38,7 @@
% {latex-team at latex-project.org}^^A
% }^^A
% }
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-parse.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-parse.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-parse.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -38,7 +38,7 @@
% {latex-team at latex-project.org}^^A
% }^^A
% }
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-random.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-random.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-random.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -38,7 +38,7 @@
% {latex-team at latex-project.org}^^A
% }^^A
% }
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-round.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-round.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-round.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-traps.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-traps.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-traps.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -38,7 +38,7 @@
% {latex-team at latex-project.org}^^A
% }^^A
% }
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
% \maketitle
%
% \begin{documentation}
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-trig.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-trig.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp-trig.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -38,7 +38,7 @@
% {latex-team at latex-project.org}^^A
% }^^A
% }
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3fp.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3fp.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3fp.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -47,7 +47,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3int.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3int.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3int.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Added: trunk/Master/texmf-dist/source/latex/l3kernel/l3intarray.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3intarray.dtx (rev 0)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3intarray.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -0,0 +1,231 @@
+% \iffalse meta-comment
+%
+%% File: l3intarray.dtx Copyright (C) 2017 The LaTeX3 Project
+%
+% It may be distributed and/or modified under the conditions of the
+% LaTeX Project Public License (LPPL), either version 1.3c of this
+% license or (at your option) any later version. The latest version
+% of this license is in the file
+%
+% http://www.latex-project.org/lppl.txt
+%
+% This file is part of the "l3kernel bundle" (The Work in LPPL)
+% and all files in that bundle must be distributed together.
+%
+% -----------------------------------------------------------------------
+%
+% The development version of the bundle can be found at
+%
+% https://github.com/latex3/latex3
+%
+% for those people who are interested.
+%
+%<*driver>
+\documentclass[full]{l3doc}
+\begin{document}
+ \DocInput{\jobname.dtx}
+\end{document}
+%</driver>
+% \fi
+%
+%
+% \title{^^A
+% The \textsf{l3intarray} package: low-level arrays of small integers^^A
+% }
+%
+% \author{^^A
+% The \LaTeX3 Project\thanks
+% {^^A
+% E-mail:
+% \href{mailto:latex-team at latex-project.org}
+% {latex-team at latex-project.org}^^A
+% }^^A
+% }
+%
+% \date{Released 2017/05/29}
+%
+% \maketitle
+%
+% \begin{documentation}
+%
+% \section{\pkg{l3intarray} documentation}
+%
+% This module provides no user function: at present it is meant for
+% kernel use only.
+%
+% It is a wrapper around the \tn{fontdimen} primitive, used to store
+% arrays of integers (with a restricted range: absolute value at most
+% $2^{30}-1$). In contrast to \pkg{l3seq} sequences the access to
+% individual entries is done in constant time rather than linear time,
+% but only integers can be stored. More precisely, the primitive
+% \tn{fontdimen} stores dimensions but the \pkg{l3intarray} package
+% transparently converts these from/to integers. Assignments are always
+% global.
+%
+% While \LuaTeX{}'s memory is extensible, other engines can
+% \enquote{only} deal with a bit less than $4\times 10^6$ entries in all
+% \tn{fontdimen} arrays combined (with default \TeX{}Live settings).
+%
+% \subsection{Internal functions}
+%
+% \begin{function}{\__intarray_new:Nn}
+% \begin{syntax}
+% \cs{__intarray_new:Nn} \meta{intarray~var} \Arg{size}
+% \end{syntax}
+% Evaluates the integer expression \meta{size} and allocates an
+% \meta{integer array variable} with that number of (zero) entries.
+% \end{function}
+%
+% \begin{function}[EXP]{\__intarray_count:N}
+% \begin{syntax}
+% \cs{__intarray_count:N} \meta{intarray~var}
+% \end{syntax}
+% Expands to the number of entries in the \meta{integer array variable}.
+% Contrarily to \cs{seq_count:N} this is performed in constant time.
+% \end{function}
+%
+% \begin{function}{\__intarray_gset:Nnn, \__intarray_gset_fast:Nnn}
+% \begin{syntax}
+% \cs{__intarray_gset:Nnn} \meta{intarray~var} \Arg{position} \Arg{value}
+% \cs{__intarray_gset_fast:Nnn} \meta{intarray~var} \Arg{position} \Arg{value}
+% \end{syntax}
+% Stores the result of evaluating the integer expression \meta{value}
+% into the \meta{integer array variable} at the (integer expression)
+% \meta{position}. While \cs{__intarray_gset:Nnn} checks that the
+% \meta{position} is between $1$ and the \cs{__intarray_count:N} and that
+% the \meta{value}'s absolute value is at most $2^{30}-1$, the
+% \enquote{fast} function performs no such bound check.
+% Assignments are always global.
+% \end{function}
+%
+% \begin{function}[EXP]{\__intarray_item:Nn, \__intarray_item_fast:Nn}
+% \begin{syntax}
+% \cs{__intarray_item:Nn} \meta{intarray~var} \Arg{position}
+% \cs{__intarray_item_fast:Nn} \meta{intarray~var} \Arg{position}
+% \end{syntax}
+% Expands to the integer entry stored at the (integer expression)
+% \meta{position} in the \meta{integer array variable}. While
+% \cs{__intarray_item:Nn} checks that the \meta{position} is between $1$
+% and the \cs{__intarray_count:N}, the \enquote{fast} function performs
+% no such bound check.
+% \end{function}
+%
+% \end{documentation}
+%
+% \begin{implementation}
+%
+% \section{\pkg{l3intarray} implementation}
+%
+% \begin{macrocode}
+%<*initex|package>
+% \end{macrocode}
+%
+% \begin{macrocode}
+%<@@=intarray>
+% \end{macrocode}
+%
+% \subsection{Allocating arrays}
+%
+% \begin{variable}{\g_@@_font_int}
+% Used to assign one font per array.
+% \begin{macrocode}
+\int_new:N \g_@@_font_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{macro}[int]{\@@_new:Nn}
+% Declare |#1| to be a font (arbitrarily |cmr10| at a never-used
+% size). Store the array's size as the \tn{hyphenchar} of that font
+% and make sure enough \tn{fontdimen} are allocated, by setting the
+% last one. Then clear any \tn{fontdimen} that |cmr10| starts with.
+% It seems \LuaTeX{}'s |cmr10| has an extra \tn{fontdimen} parameter
+% number $8$ compared to other engines (for a math font we would
+% replace $8$ by $22$ or some such).
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_new:Nn #1#2
+ {
+ \__chk_if_free_cs:N #1
+ \int_gincr:N \g_@@_font_int
+ \tex_global:D \tex_font:D #1 = cmr10~at~ \g_@@_font_int sp \scan_stop:
+ \tex_hyphenchar:D #1 = \int_eval:n {#2} \scan_stop:
+ \int_compare:nNnT { \tex_hyphenchar:D #1 } > 0
+ { \tex_fontdimen:D \tex_hyphenchar:D #1 #1 = 0 sp \scan_stop: }
+ \int_step_inline:nnnn { 1 } { 1 } { 8 }
+ { \tex_fontdimen:D ##1 #1 = 0 sp \scan_stop: }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int, EXP]{\@@_count:N}
+% Size of an array.
+% \begin{macrocode}
+\cs_new:Npn \@@_count:N #1 { \tex_the:D \tex_hyphenchar:D #1 }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsection{Array items}
+%
+% \begin{macro}[int]{\@@_gset:Nnn, \@@_gset_fast:Nnn}
+% \begin{macro}[aux]{\@@_gset_aux:Nnn}
+% Set the appropriate \tn{fontdimen}. The slow version checks the
+% position and value are within bounds.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_gset_fast:Nnn #1#2#3
+ { \tex_fontdimen:D \int_eval:n {#2} #1 = \int_eval:n {#3} sp \scan_stop: }
+\cs_new_protected:Npn \@@_gset:Nnn #1#2#3
+ {
+ \exp_args:Nff \@@_gset_aux:Nnn #1
+ { \int_eval:n {#2} } { \int_eval:n {#3} }
+ }
+\cs_new_protected:Npn \@@_gset_aux:Nnn #1#2#3
+ {
+ \int_compare:nTF { 1 <= #2 <= \@@_count:N #1 }
+ {
+ \int_compare:nTF { - \c_max_dim <= \int_abs:n {#3} <= \c_max_dim }
+ { \@@_gset_fast:Nnn #1 {#2} {#3} }
+ {
+ \__msg_kernel_error:nnxxxx { kernel } { overflow }
+ { \token_to_str:N #1 } {#2} {#3}
+ { \int_compare:nNnT {#3} < 0 { - } \__int_value:w \c_max_dim }
+ \@@_gset_fast:Nnn #1 {#2}
+ { \int_compare:nNnT {#3} < 0 { - } \c_max_dim }
+ }
+ }
+ {
+ \__msg_kernel_error:nnxxx { kernel } { out-of-bounds }
+ { \token_to_str:N #1 } {#2} { \@@_count:N #1 }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[EXP]{\@@_item:Nn, \@@_item_fast:Nn}
+% \begin{macro}[aux]{\@@_item_aux:Nn}
+% Get the appropriate \tn{fontdimen} and perform bound checks if requested.
+% \begin{macrocode}
+\cs_new:Npn \@@_item_fast:Nn #1#2
+ { \__int_value:w \tex_fontdimen:D \int_eval:n {#2} #1 }
+\cs_new:Npn \@@_item:Nn #1#2
+ { \exp_args:Nf \@@_item_aux:Nn #1 { \int_eval:n {#2} } }
+\cs_new:Npn \@@_item_aux:Nn #1#2
+ {
+ \int_compare:nTF { 1 <= #2 <= \@@_count:N #1 }
+ { \@@_item_fast:Nn #1 {#2} }
+ {
+ \__msg_kernel_expandable_error:nnnnn { kernel } { out-of-bounds }
+ { \token_to_str:N #1 } {#2} { \@@_count:N #1 }
+ 0
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macrocode}
+%</initex|package>
+% \end{macrocode}
+%
+% \end{implementation}
+%
+% \PrintIndex
Property changes on: trunk/Master/texmf-dist/source/latex/l3kernel/l3intarray.dtx
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3keys.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3keys.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3keys.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -656,7 +656,7 @@
% }
% \end{verbatim}
%
-% \begin{function}[added = 2011-08-23, updated = 2015-11-07]
+% \begin{function}[added = 2011-08-23, updated = 2017-05-27]
% {
% \keys_set_known:nnN, \keys_set_known:nVN,
% \keys_set_known:nvN, \keys_set_known:noN,
@@ -720,7 +720,7 @@
% groups to be made \enquote{active}, or by marking one or more groups to
% be ignored in key setting.
%
-% \begin{function}[added = 2013-07-14, updated = 2015-11-07]
+% \begin{function}[added = 2013-07-14, updated = 2017-05-27]
% {
% \keys_set_filter:nnnN, \keys_set_filter:nnVN,
% \keys_set_filter:nnvN, \keys_set_filter:nnoN,
@@ -742,7 +742,7 @@
% \meta{keyval list} returned at each stage.
% \end{function}
%
-% \begin{function}[added = 2013-07-14, updated = 2015-11-07]
+% \begin{function}[added = 2013-07-14, updated = 2017-05-27]
% {
% \keys_set_groups:nnn, \keys_set_groups:nnV,
% \keys_set_groups:nnv, \keys_set_groups:nno
@@ -2022,6 +2022,7 @@
% \keys_set_known:nn, \keys_set_known:nV,
% \keys_set_known:nv, \keys_set_known:no
% }
+% \begin{macro}[aux]{\@@_keys_set_known:nn}
% Setting known keys simply means setting the appropriate flag, then
% running the standard code. To allow for nested setting, any existing
% value of \cs{l_@@_unused_clist} is saved on the stack and reset
@@ -2041,15 +2042,23 @@
\cs_generate_variant:Nn \@@_set_known:nnnN { o }
\cs_new_protected:Npn \keys_set_known:nn #1#2
{
+ \bool_if:NTF \l_@@_only_known_bool
+ { \keys_set:nn }
+ { \@@_set_known:nn }
+ {#1} {#2}
+ }
+\cs_generate_variant:Nn \keys_set_known:nn { nV , nv , no }
+\cs_new_protected:Npn \@@_set_known:nn #1#2
+ {
\bool_set_true:N \l_@@_only_known_bool
\keys_set:nn {#1} {#2}
\bool_set_false:N \l_@@_only_known_bool
}
-\cs_generate_variant:Nn \keys_set_known:nn { nV , nv , no }
% \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
+% \end{macro}
%
% \begin{macro}
% {
@@ -2062,14 +2071,20 @@
% \keys_set_filter:nnn, \keys_set_filter:nnV, \keys_set_filter:nnv,
% \keys_set_filter:nno
% }
+% \begin{macro}[aux]{\@@_set_filter:nnn}
% \begin{macro}
% {
% \keys_set_groups:nnn, \keys_set_groups:nnV, \keys_set_groups:nnv,
% \keys_set_groups:nno
% }
+% \begin{macro}[aux]{\@@_set_groups:nnn}
+% \begin{macro}[aux]{\@@_set_selective:nnn}
+% \begin{macro}[aux]{\@@_set_selective:nnnn, \@@_set_selective:onnn}
+% \begin{macro}[aux]{\@@_set_selective:nn}
% The idea of setting keys in a selective manner again uses flags
% wrapped around the basic code. The comments on \cs{keys_set_known:nnN}
-% also apply here.
+% also apply here. We have a bit more shuffling to do to keep everything
+% nestable.
% \begin{macrocode}
\cs_new_protected:Npn \keys_set_filter:nnnN
{ \@@_set_filter:onnnN \l_@@_unused_clist }
@@ -2084,27 +2099,60 @@
\cs_generate_variant:Nn \@@_set_filter:nnnnN { o }
\cs_new_protected:Npn \keys_set_filter:nnn #1#2#3
{
- \bool_set_true:N \l_@@_selective_bool
+ \bool_if:NTF \l_@@_filtered_bool
+ { \@@_set_selective:nnn }
+ { \@@_set_filter:nnn }
+ {#1} {#2} {#3}
+ }
+\cs_generate_variant:Nn \keys_set_filter:nnn { nnV , nnv , nno }
+\cs_new_protected:Npn \@@_set_filter:nnn #1#2#3
+ {
\bool_set_true:N \l_@@_filtered_bool
- \seq_set_from_clist:Nn \l_@@_selective_seq {#2}
- \keys_set:nn {#1} {#3}
- \bool_set_false:N \l_@@_selective_bool
+ \@@_set_selective:nnn {#1} {#2} {#3}
+ \bool_set_false:N \l_@@_filtered_bool
}
-\cs_generate_variant:Nn \keys_set_filter:nnn { nnV , nnv , nno }
\cs_new_protected:Npn \keys_set_groups:nnn #1#2#3
{
- \bool_set_true:N \l_@@_selective_bool
+ \bool_if:NTF \l_@@_filtered_bool
+ { \@@_set_groups:nnn }
+ { \@@_set_selective:nnn }
+ {#1} {#2} {#3}
+ }
+\cs_generate_variant:Nn \keys_set_groups:nnn { nnV , nnv , nno }
+\cs_new_protected:Npn \@@_set_groups:nnn #1#2#3
+ {
\bool_set_false:N \l_@@_filtered_bool
- \seq_set_from_clist:Nn \l_@@_selective_seq {#2}
- \keys_set:nn {#1} {#3}
+ \@@_set_selective:nnn {#1} {#2} {#3}
+ \bool_set_true:N \l_@@_filtered_bool
+ }
+\cs_new_protected:Npn \@@_set_selective:nnn
+ { \@@_set_selective:onnn \l_@@_selective_seq }
+\cs_new_protected:Npn \@@_set_selective:nnnn #1#2#3#4
+ {
+ \seq_set_from_clist:Nn \l_@@_selective_seq {#3}
+ \bool_if:NTF \l_@@_selective_bool
+ { \keys_set:nn }
+ { \@@_set_selective:nn }
+ {#2} {#4}
+ \tl_set:Nn \l_@@_selective_seq {#1}
+ }
+\cs_generate_variant:Nn \@@_set_selective:nnnn { o }
+\cs_new_protected:Npn \@@_set_selective:nn #1#2
+ {
+ \bool_set_true:N \l_@@_selective_bool
+ \keys_set:nn {#1} {#2}
\bool_set_false:N \l_@@_selective_bool
}
-\cs_generate_variant:Nn \keys_set_groups:nnn { nnV , nnv , nno }
% \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
%
% \begin{macro}[int]{\@@_set:n, \@@_set:nn}
% \begin{macro}[aux]{\@@_set_aux:nnn, \@@_set_aux:onn}
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3luatex.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3luatex.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3luatex.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -186,7 +186,7 @@
}
}
\clist_map_inline:nn
- { \lua_shipout_x :n , \lua_shipout:n }
+ { \lua_shipout_x:n , \lua_shipout:n }
{
\cs_set_protected:Npn #1 ##1
{
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3msg.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3msg.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3msg.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -1902,6 +1902,18 @@
needed~when~defining~conditionals~or~variants,~or~when~building~a~
parameter~text~from~the~number~of~arguments~of~the~function.
}
+\@@_kernel_new:nnnn { kernel } { overflow }
+ { Integers~larger~than~2^{30}-1~cannot~be~stored~in~arrays. }
+ {
+ An~attempt~was~made~to~store~#3~at~position~#2~in~the~array~'#1'.~
+ The~largest~allowed~value~#4~will~be~used~instead.
+ }
+\@@_kernel_new:nnnn { kernel } { out-of-bounds }
+ { Access~to~an~entry~beyond~an~array's~bounds. }
+ {
+ An~attempt~was~made~to~access~or~store~data~at~position~#2~of~the~
+ array~'#1',~but~this~array~has~entries~at~positions~from~1~to~#3.
+ }
\@@_kernel_new:nnnn { kernel } { protected-predicate }
{ Predicate~'#1'~must~be~expandable. }
{
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3names.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3names.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3names.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3oldmodules.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3oldmodules.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3oldmodules.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -42,7 +42,7 @@
% }
%
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -103,11 +103,13 @@
%<l3prg>{l3prg}
%<l3prop>{l3prop}
%<l3quark>{l3quark}
+%<l3regex>{l3regex}
%<l3seq>{l3seq}
%<l3skip>{l3skip}
%<l3sort>{l3sort}
%<l3str>{l3str}
%<l3tl>{l3tl}
+%<l3tl-analysis>{l3tl-analysis}
%<l3token>{l3token}
% \end{macrocode}
%
@@ -127,20 +129,20 @@
\typeout{** }
\typeout{** Its functionality is now only provided as part of the expl3 package.}
\typeout{** }
-%<!l3sort>\typeout{** After showing you an error message you can hit <return> we will continue}
-%<!l3sort>\typeout{** for now by loading expl3 for you. However, the old packages will be}
-%<!l3sort>\typeout{** removed entirely at the end of 2017.}
-%<l3sort>\typeout{** The old packages will be removed entirely at the end of 2018.}
+%<!l3regex|l3sort|l3tl-analysis>\typeout{** After showing you an error message you can hit <return> we will continue}
+%<!l3regex|l3sort|l3tl-analysis>\typeout{** for now by loading expl3 for you. However, the old packages will be}
+%<!l3regex|l3sort|l3tl-analysis>\typeout{** removed entirely at the end of 2017.}
+%<l3regex|l3sort|l3tl-analysis>\typeout{** The old packages will be removed entirely at the end of 2018.}
\typeout{** }
\typeout{** Therefore, please replace '\string\usepackage{\old at liii@module at name}'}
\typeout{** with '\string\usepackage{expl3}' in your documents as soon as possible.}
\typeout{** }
\typeout{*******************************************************************}
-%<!l3sort>\PackageError
-%<l3sort>\PackageWarning
+%<!l3regex|l3sort|l3tl-analysis>\PackageError
+%<l3regex|l3sort|l3tl-analysis>\PackageWarning
\old at liii@module at name{This package is obsolete ---
use 'expl3' instead}
-%<!l3sort> \@ehc
+%<!l3regex|l3sort|l3tl-analysis> \@ehc
% \end{macrocode}
% Finally load \texttt{expl3} so that the user can continue for now.
% \begin{macrocode}
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3prg.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3prg.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3prg.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3prop.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3prop.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3prop.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3quark.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3quark.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3quark.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Added: trunk/Master/texmf-dist/source/latex/l3kernel/l3regex.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3regex.dtx (rev 0)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3regex.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -0,0 +1,6395 @@
+% \iffalse meta-comment
+%
+%% File: l3regex.dtx Copyright (C) 2011-2017 The LaTeX3 Project
+%
+% It may be distributed and/or modified under the conditions of the
+% LaTeX Project Public License (LPPL), either version 1.3c of this
+% license or (at your option) any later version. The latest version
+% of this license is in the file
+%
+% http://www.latex-project.org/lppl.txt
+%
+% This file is part of the "l3kernel bundle" (The Work in LPPL)
+% and all files in that bundle must be distributed together.
+%
+% -----------------------------------------------------------------------
+%
+% The development version of the bundle can be found at
+%
+% https://github.com/latex3/latex3
+%
+% for those people who are interested.
+%
+%<*driver>
+\documentclass[full]{l3doc}
+\begin{document}
+ \DocInput{\jobname.dtx}
+\end{document}
+%</driver>
+% \fi
+%
+% \title{^^A
+% The \textsf{l3regex} package: regular expressions in \TeX{}^^A
+% }
+%
+% \author{^^A
+% The \LaTeX3 Project\thanks
+% {^^A
+% E-mail:
+% \href{mailto:latex-team at latex-project.org}
+% {latex-team at latex-project.org}^^A
+% }^^A
+% }
+%
+% \date{Released 2017/05/29}
+%
+% \maketitle
+%
+% \begin{documentation}
+% \newenvironment{l3regex-syntax}
+% {\begin{itemize}\def\\{\char`\\}\def\makelabel##1{\hss\llap{\ttfamily##1}}}
+% {\end{itemize}}
+%
+% \section{Regular expressions}
+%
+% The \pkg{l3regex} package provides regular expression testing,
+% extraction of submatches, splitting, and replacement, all acting
+% on token lists. The syntax of regular expressions is mostly a subset
+% of the \textsc{pcre} syntax (and very close to \textsc{posix}),
+% with some additions
+% due to the fact that \TeX{} manipulates tokens rather than characters.
+% For performance reasons, only a limited set of features are implemented.
+% Notably, back-references are not supported.
+%
+% Let us give a few examples. After
+% \begin{verbatim}
+% \tl_set:Nn \l_my_tl { That~cat. }
+% \regex_replace_once:nnN { at } { is } \l_my_tl
+% \end{verbatim}
+% the token list variable \cs{l_my_tl} holds the text
+% \enquote{\texttt{This cat.}}, where the first
+% occurrence of \enquote{\texttt{at}} was replaced
+% by \enquote{\texttt{is}}. A more complicated example is
+% a pattern to add a comma at the end of each word:
+% \begin{verbatim}
+% \regex_replace_all:nnN { \w+ } { \0 , } \l_my_tl
+% \end{verbatim}
+% The |\w| sequence represents any \enquote{word} character,
+% and |+| indicates that the |\w| sequence should be repeated
+% as many times as possible (at least once), hence matching a word in the
+% input token list. In the replacement text, |\0| denotes the full match
+% (here, a word).
+%
+% If a regular expression is to be used several times,
+% it can be compiled once, and stored in a regex
+% variable using \cs{regex_const:Nn}. For example,
+% \begin{verbatim}
+% \regex_const:Nn \c_foo_regex { \c{begin} \cB. (\c[^BE].*) \cE. }
+% \end{verbatim}
+% stores in \cs{c_foo_regex} a regular expression which matches the
+% starting marker for an environment: \cs{begin}, followed by a
+% begin-group token (|\cB.|), then any number of tokens which are
+% neither begin-group nor end-group character tokens (|\c[^BE].*|),
+% ending with an end-group token (|\cE.|). As explained in the next
+% section, the parentheses \enquote{capture} the result of |\c[^BE].*|,
+% giving us access to the name of the environment when doing
+% replacements.
+%
+% \subsection{Syntax of regular expressions}
+%
+% Most characters match exactly themselves,
+% with an arbitrary category code. Some characters are
+% special and must be escaped with a backslash (\emph{e.g.}, |\*|
+% matches a star character). Some escape sequences of
+% the form backslash--letter also have a special meaning
+% (for instance |\d| matches any digit). As a rule,
+% \begin{itemize}
+% \item every alphanumeric character (\texttt{A}--\texttt{Z},
+% \texttt{a}--\texttt{z}, \texttt{0}--\texttt{9}) matches
+% exactly itself, and should not be escaped, because
+% |\A|, |\B|, \ldots{} have special meanings;
+% \item non-alphanumeric printable ascii characters can (and should)
+% always be escaped: many of them have special meanings (\emph{e.g.},
+% use |\(|, |\)|, |\?|, |\.|);
+% \item spaces should always be escaped (even in character
+% classes);
+% \item any other character may be escaped or not, without any
+% effect: both versions will match exactly that character.
+% \end{itemize}
+% Note that these rules play nicely with the fact that many
+% non-alphanumeric characters are difficult to input into \TeX{}
+% under normal category codes. For instance, |\\abc\%|
+% matches the characters |\abc%| (with arbitrary category codes),
+% but does not match the control sequence |\abc| followed by a
+% percent character. Matching control sequences can be done
+% using the |\c|\Arg{regex} syntax (see below).
+%
+% Any special character which appears at a place where its special
+% behaviour cannot apply matches itself instead (for instance, a
+% quantifier appearing at the beginning of a string), after raising a
+% warning.
+%
+% Characters.
+% \begin{l3regex-syntax}
+% \item[\\x\{hh\ldots{}\}] Character with hex code \texttt{hh\ldots{}}
+% \item[\\xhh] Character with hex code \texttt{hh}.
+% \item[\\a] Alarm (hex 07).
+% \item[\\e] Escape (hex 1B).
+% \item[\\f] Form-feed (hex 0C).
+% \item[\\n] New line (hex 0A).
+% \item[\\r] Carriage return (hex 0D).
+% \item[\\t] Horizontal tab (hex 09).
+% \end{l3regex-syntax}
+%
+% Character types.
+% \begin{l3regex-syntax}
+% \item[.] A single period matches any token.
+% \item[\\d] Any decimal digit.
+% \item[\\h] Any horizontal space character,
+% equivalent to |[\ \^^I]|: space and tab.
+% \item[\\s] Any space character,
+% equivalent to |[\ \^^I\^^J\^^L\^^M]|.
+% \item[\\v] Any vertical space character,
+% equivalent to |[\^^J\^^K\^^L\^^M]|. Note that |\^^K| is a vertical space,
+% but not a space, for compatibility with Perl.
+% \item[\\w] Any word character, \emph{i.e.},
+% alpha-numerics and underscore, equivalent to |[A-Za-z0-9\_]|.
+% \item[\\D] Any token not matched by |\d|.
+% \item[\\H] Any token not matched by |\h|.
+% \item[\\N] Any token other than the |\n| character (hex 0A).
+% \item[\\S] Any token not matched by |\s|.
+% \item[\\V] Any token not matched by |\v|.
+% \item[\\W] Any token not matched by |\w|.
+% \end{l3regex-syntax}
+% Of those, |.|, |\D|, |\H|, |\N|, |\S|, |\V|, and |\W| will match arbitrary
+% control sequences.
+%
+% Character classes match exactly one token in the subject.
+% \begin{l3regex-syntax}
+% \item[{[\ldots{}]}] Positive character class.
+% Matches any of the specified tokens.
+% \item[{[\char`\^\ldots{}]}] Negative character class.
+% Matches any token other than the specified characters.
+% \item[{x-y}] Within a character class, this denotes a range (can be
+% used with escaped characters).
+% \item[{[:\meta{name}:]}] Within a character class (one more set of
+% brackets), this denotes the \textsc{posix} character class
+% \meta{name}, which can be \texttt{alnum}, \texttt{alpha},
+% \texttt{ascii}, \texttt{blank}, \texttt{cntrl}, \texttt{digit},
+% \texttt{graph}, \texttt{lower}, \texttt{print}, \texttt{punct},
+% \texttt{space}, \texttt{upper}, \texttt{word}, or \texttt{xdigit}.
+% \item[{[:\char`\^\meta{name}:]}] Negative \textsc{posix} character class.
+% \end{l3regex-syntax}
+% For instance, |[a-oq-z\cC.]| matches any lowercase latin letter
+% except |p|, as well as control sequences (see below for a description
+% of |\c|).
+%
+% Quantifiers (repetition).
+% \begin{l3regex-syntax}
+% \item[?] $0$ or $1$, greedy.
+% \item[??] $0$ or $1$, lazy.
+% \item[*] $0$ or more, greedy.
+% \item[*?] $0$ or more, lazy.
+% \item[+] $1$ or more, greedy.
+% \item[+?] $1$ or more, lazy.
+% \item[\{$n$\}] Exactly $n$.
+% \item[\{$n,$\}] $n$ or more, greedy.
+% \item[\{$n,$\}?] $n$ or more, lazy.
+% \item[\{$n,m$\}] At least $n$, no more than $m$, greedy.
+% \item[\{$n,m$\}?] At least $n$, no more than $m$, lazy.
+% \end{l3regex-syntax}
+%
+% Anchors and simple assertions.
+% \begin{l3regex-syntax}
+% \item[\\b] Word boundary: either the previous token is matched by
+% |\w| and the next by |\W|, or the opposite. For this purpose,
+% the ends of the token list are considered as |\W|.
+% \item[\\B] Not a word boundary: between two |\w| tokens
+% or two |\W| tokens (including the boundary).
+% \item[\char`^ \textrm{or} \\A]
+% Start of the subject token list.
+% \item[\char`$\textrm{,} \\Z \textrm{or} \\z]
+% End of the subject token list.
+% \item[\\G] Start of the current match. This is only different from |^|
+% in the case of multiple matches: for instance
+% |\regex_count:nnN { \G a } { aaba } \l_tmpa_int| yields $2$, but
+% replacing |\G| by |^| would result in \cs{l_tmpa_int} holding the
+% value $1$.
+% \end{l3regex-syntax}
+%
+% Alternation and capturing groups.
+% \begin{l3regex-syntax}
+% \item[A\char`|B\char`|C] Either one of \texttt{A}, \texttt{B},
+% or \texttt{C}.
+% \item[(\ldots{})] Capturing group.
+% \item[(?:\ldots{})] Non-capturing group.
+% \item[(?\char`|\ldots{})] Non-capturing group which resets
+% the group number for capturing groups in each alternative.
+% The following group will be numbered with the first unused
+% group number.
+% \end{l3regex-syntax}
+%
+% The |\c| escape sequence allows to test the category code of tokens,
+% and match control sequences. Each character category is represented
+% by a single uppercase letter:
+% \begin{itemize}
+% \item |C| for control sequences;
+% \item |B| for begin-group tokens;
+% \item |E| for end-group tokens;
+% \item |M| for math shift;
+% \item |T| for alignment tab tokens;
+% \item |P| for macro parameter tokens;
+% \item |U| for superscript tokens (up);
+% \item |D| for subscript tokens (down);
+% \item |S| for spaces;
+% \item |L| for letters;
+% \item |O| for others; and
+% \item |A| for active characters.
+% \end{itemize}
+% The |\c| escape sequence is used as follows.
+% \begin{l3regex-syntax}
+% \item[\\c\Arg{regex}] A control sequence whose csname matches the
+% \meta{regex}, anchored at the beginning and end, so that |\c{begin}|
+% matches exactly \cs{begin}, and nothing else.
+% \item[\\cX] Applies to the next object, which can be a character,
+% character property, class, or group, and forces this object to
+% only match tokens with category |X| (any of |CBEMTPUDSLOA|. For
+% instance, |\cL[A-Z\d]| matches uppercase letters and digits of
+% category code letter, |\cC.| matches any control sequence, and
+% |\cO(abc)| matches |abc| where each character has category other.
+% \item[{\\c[XYZ]}] Applies to the next object, and forces it to only
+% match tokens with category |X|, |Y|, or |Z| (each being any of
+% |CBEMTPUDSLOA|). For instance, |\c[LSO](..)| matches two tokens of
+% category letter, space, or other.
+% \item[{\\c[\char`\^XYZ]}] Applies to the next object and prevents it
+% from matching any token with category |X|, |Y|, or |Z| (each being
+% any of |CBEMTPUDSLOA|). For instance, |\c[^O]\d| matches digits
+% which have any category different from other.
+% \end{l3regex-syntax}
+% The category code tests can be used inside classes; for instance,
+% |[\cO\d \c[LO][A-F]]| matches what \TeX{} considers as hexadecimal
+% digits, namely digits with category other, or uppercase letters from
+% |A| to |F| with category either letter or other. Within a group
+% affected by a category code test, the outer test can be overridden by
+% a nested test: for instance, |\cL(ab\cO\*cd)| matches |ab*cd| where
+% all characters are of category letter, except |*| which has category
+% other.
+%
+% The |\u| escape sequence allows to insert the contents of a token list
+% directly into a regular expression or a replacement, avoiding the need
+% to escape special characters. Namely, |\u|\Arg{tl~var~name} matches
+% the exact contents of the token list \meta{tl~var}. Within a |\c{...}|
+% control sequence matching, the |\u| escape sequence only expands its
+% argument once, in effect performing \cs{tl_to_str:v}. Quantifiers are
+% not supported directly: use a group.
+%
+% The option |(?i)| makes the match case insensitive (identifying
+% \texttt{A}--\texttt{Z} with \texttt{a}--\texttt{z}; no Unicode support
+% yet). This applies until the end of the group in which it appears, and
+% can be reverted using |(?-i)|. For instance, in
+% \verb"(?i)(a(?-i)b|c)d", the letters |a| and |d| are affected by the
+% |i| option. Characters within ranges and classes are affected
+% individually: |(?i)[Y-\\]| is equivalent to |[YZ\[\\yz]|, and
+% |(?i)[^aeiou]| matches any character which is not a vowel. Neither
+% character properties, nor |\c{...}| nor |\u{...}| are affected by the
+% |i| option.
+% ^^A \]
+%
+% In character classes, only |[|, |^|, |-|, |]|, |\| and spaces are
+% special, and should be escaped. Other non-alphanumeric characters can
+% still be escaped without harm. Any escape sequence which matches a
+% single character (|\d|, |\D|, \emph{etc.}) is supported in character
+% classes. If the first character is |^|, then
+% the meaning of the character class is inverted; |^| appearing anywhere
+% else in the range is not special. If the first character (possibly
+% following a leading |^|) is |]| then it does not need to be escaped
+% since ending the range there would make it empty.
+% Ranges of characters
+% can be expressed using |-|, for instance, |[\D 0-5]| and |[^6-9]| are
+% equivalent.
+%
+% Capturing groups are a means of extracting information about the
+% match. Parenthesized groups are labelled in the order of their
+% opening parenthesis, starting at $1$. The contents of those groups
+% corresponding to the \enquote{best} match (leftmost longest)
+% can be extracted and stored in a sequence of token lists using for
+% instance \cs{regex_extract_once:nnNTF}.
+%
+% The |\K| escape sequence resets the beginning of the match to the
+% current position in the token list. This only affects what is reported
+% as the full match. For instance,
+% \begin{verbatim}
+% \regex_extract_all:nnN { a \K . } { a123aaxyz } \l_foo_seq
+% \end{verbatim}
+% results in \cs{l_foo_seq} containing the items |{1}| and |{a}|: the
+% true matches are |{a1}| and |{aa}|, but they are trimmed by the use of
+% |\K|. The |\K| command does not affect capturing groups: for instance,
+% \begin{verbatim}
+% \regex_extract_once:nnN { (. \K c)+ \d } { acbc3 } \l_foo_seq
+% \end{verbatim}
+% results in \cs{l_foo_seq} containing the items |{c3}| and |{bc}|: the
+% true match is |{acbc3}|, with first submatch |{bc}|, but |\K| resets
+% the beginning of the match to the last position where it appears.
+%
+% \subsection{Syntax of the replacement text}
+%
+% Most of the features described in regular expressions do not make
+% sense within the replacement text. Backslash introduces various
+% special constructions:
+% \begin{itemize}
+% \item |\0| is the whole match;
+% \item |\1|, |\2|, \ldots{}, |\9| or |\g{|\meta{number}|}| are the
+% submatches (empty if there are fewer than \meta{number} capturing
+% groups);
+% \item \verb*|\ | inserts a space (spaces are ignored when not
+% escaped);
+% \item |\a|, |\e|, |\f|, |\n|, |\r|, |\t|, |\xhh|, |\x{hhh}|
+% correspond to single characters as in regular expressions;
+% \item |\c|\Arg{cs~name} inserts a control sequence;
+% \item |\c|\meta{category}\meta{character} (see below);
+% \item |\u|\Arg{tl~var~name} inserts the contents of the
+% \meta{tl~var} (see below).
+% \end{itemize}
+% Characters other than backslash and space are simply inserted in the
+% result (but since the replacement text is first converted to a string,
+% one should also escape characters that are special for \TeX{}, for
+% instance use~|\#|). Non-alphanumeric characters can always be safely
+% escaped with a backslash.
+%
+% For instance,
+% \begin{verbatim}
+% \tl_set:Nn \l_my_tl { Hello,~world! }
+% \regex_replace_all:nnN { ([er]?l|o) . } { (\0--\1) } \l_my_tl
+% \end{verbatim}
+% results in \cs{l_my_tl} holding |H(ell--el)(o,--o) w(or--o)(ld--l)!|
+%
+% Submatches always keep the same category codes as in the original
+% token list.
+% The characters inserted by the replacement have category code $12$
+% (other) by default, with the exception of space characters. Spaces
+% inserted through \verb*|\ | have category code $10$, while spaces
+% inserted through |\x20| or |\x{20}| have category code $12$.
+% The escape sequence |\c| allows to insert characters
+% with arbitrary category codes, as well as control sequences.
+% \begin{l3regex-syntax}
+% \item[\\cX(\ldots{})] Produces the characters \enquote{\ldots{}} with
+% category~|X|, which must be one of |CBEMTPUDSLOA| as in regular
+% expressions. Parentheses are optional for a single character (which
+% can be an escape sequence). This can be nested, for instance
+% |\cL(Hello\cS\ world)!|
+% \item[\\c\Arg{text}] Produces the control sequence with csname
+% \meta{text}. The \meta{text} may contain references to the
+% submatches |\0|, |\1|, and so on, as in the example for |\u| below.
+% \end{l3regex-syntax}
+%
+% The escape sequence |\u|\Arg{tl~var~name} allows to insert the
+% contents of the token list with name \meta{tl~var~name} directly into
+% the replacement, giving an easier control of category codes.
+% Within |\c{|\ldots{}|}| and |\u{|\ldots{}|}| constructions, the |\u|
+% and |\c|~escape sequences perform \cs{tl_to_str:v}, namely extract the
+% value of the control sequence and turn it into a string.
+%
+% Matches can be used within the arguments of |\c| and |\u|. For
+% instance,
+% \begin{verbatim}
+% \tl_set:Nn \l_my_one_tl { first }
+% \tl_set:Nn \l_my_two_tl { \emph{second} }
+% \tl_set:Nn \l_my_tl { one , two , one , one }
+% \regex_replace_all:nnN { [^,]+ } { \u{l_my_\0_tl} } \l_my_tl
+% \end{verbatim}
+% results in \cs{l_my_tl} holding |first,\emph{second},first,first|.
+%
+% \subsection{Pre-compiling regular expressions}
+%
+% If a regular expression is to be used several times,
+% it is better to compile it once rather than doing it
+% each time the regular expression is used. The compiled
+% regular expression is stored in a variable. All
+% of the \pkg{l3regex} module's functions can be given their
+% regular expression argument either as an explicit string
+% or as a compiled regular expression.
+%
+% \begin{function}[added = 2017-05-26]{\regex_new:N}
+% \begin{syntax}
+% \cs{regex_new:N} \meta{regex~var}
+% \end{syntax}
+% Creates a new \meta{regex~var} or raises an error if the
+% name is already taken. The declaration is global. The
+% \meta{regex~var} will initially be such that it never matches.
+% \end{function}
+%
+% \begin{function}[added = 2017-05-26]
+% {\regex_set:Nn, \regex_gset:Nn, \regex_const:Nn}
+% \begin{syntax}
+% \cs{regex_set:Nn} \meta{regex~var} \Arg{regex}
+% \end{syntax}
+% Stores a compiled version of the \meta{regular expression}
+% in the \meta{regex~var}. For instance, this function can be used
+% as
+% \begin{verbatim}
+% \regex_new:N \l_my_regex
+% \regex_set:Nn \l_my_regex { my\ (simple\ )? reg(ex|ular\ expression) }
+% \end{verbatim}
+% The assignment is local for \cs{regex_set:Nn} and global for
+% \cs{regex_gset:Nn}. Use \cs{regex_const:Nn} for compiled expressions
+% which will never change.
+% \end{function}
+%
+% \begin{function}[added = 2017-05-26]{\regex_show:n, \regex_show:N}
+% \begin{syntax}
+% \cs{regex_show:n} \Arg{regex}
+% \end{syntax}
+% Shows how \pkg{l3regex} interprets the \meta{regex}. For instance,
+% \cs{regex_show:n} \verb+{\A X|Y}+ shows
+% \begin{verbatim}
+% +-branch
+% anchor at start (\A)
+% char code 88
+% +-branch
+% char code 89
+% \end{verbatim}
+% indicating that the anchor |\A| only applies to the first branch:
+% the second branch is not anchored to the beginning of the match.
+% \end{function}
+%
+% \subsection{Matching}
+%
+% All regular expression functions are available in both |:n| and |:N|
+% variants. The former require a \enquote{standard} regular expression,
+% while the later require a compiled expression as generated by
+% \cs{regex_(g)set:Nn}.
+%
+% \begin{function}[TF, added = 2017-05-26]{\regex_match:nn, \regex_match:Nn}
+% \begin{syntax}
+% \cs{regex_match:nnTF} \Arg{regex} \Arg{token list} \Arg{true code} \Arg{false code}
+% \end{syntax}
+% Tests whether the \meta{regular expression} matches any part
+% of the \meta{token list}. For instance,
+% \begin{verbatim}
+% \regex_match:nnTF { b [cde]* } { abecdcx } { TRUE } { FALSE }
+% \regex_match:nnTF { [b-dq-w] } { example } { TRUE } { FALSE }
+% \end{verbatim}
+% leaves \texttt{TRUE} then \texttt{FALSE} in the input stream.
+% \end{function}
+%
+% \begin{function}[added = 2017-05-26]{\regex_count:nnN, \regex_count:NnN}
+% \begin{syntax}
+% \cs{regex_count:nnN} \Arg{regex} \Arg{token list} \meta{int var}
+% \end{syntax}
+% Sets \meta{int var} within the current \TeX{} group level
+% equal to the number of times
+% \meta{regular expression} appears in \meta{token list}.
+% The search starts by finding the left-most longest match,
+% respecting greedy and ungreedy operators. Then the search
+% starts again from the character following the last character
+% of the previous match, until reaching the end of the token list.
+% Infinite loops are prevented in the case where the regular expression
+% can match an empty token list: then we count one match between each
+% pair of characters.
+% For instance,
+% \begin{verbatim}
+% \int_new:N \l_foo_int
+% \regex_count:nnN { (b+|c) } { abbababcbb } \l_foo_int
+% \end{verbatim}
+% results in \cs{l_foo_int} taking the value $5$.
+% \end{function}
+%
+% \subsection{Submatch extraction}
+%
+% \begin{function}[TF, added = 2017-05-26]
+% {\regex_extract_once:nnN, \regex_extract_once:NnN}
+% \begin{syntax}
+% \cs{regex_extract_once:nnN} \Arg{regex} \Arg{token list} \meta{seq~var}
+% \cs{regex_extract_once:nnNTF} \Arg{regex} \Arg{token list} \meta{seq~var} \Arg{true code} \Arg{false code}
+% \end{syntax}
+% Finds the first match of the \meta{regular expression}
+% in the \meta{token list}. If it exists, the match is stored
+% as the zeroeth item of the \meta{seq~var}, and further
+% items are the contents of capturing groups, in the order
+% of their opening parenthesis. The \meta{seq~var}
+% is assigned locally. If there is no match,
+% the \meta{seq~var} is cleared.
+% The testing versions insert the \meta{true code} into the input
+% stream if a match was found, and the \meta{false code} otherwise.
+% For instance, assume that you type
+% \begin{verbatim}
+% \regex_extract_once:nnNTF { \A(La)?TeX(!*)\Z } { LaTeX!!! } \l_foo_seq
+% { true } { false }
+% \end{verbatim}
+% Then the regular expression (anchored at the start with |\A| and
+% at the end with |\Z|) will match the whole token list. The first
+% capturing group, |(La)?|, matches |La|, and the second capturing
+% group, |(!*)|, matches |!!!|. Thus, |\l_foo_seq| will contain
+% the items |{LaTeX!!!}|, |{La}|, and |{!!!}|, and the \texttt{true}
+% branch is left in the input stream.
+% \end{function}
+%
+% \begin{function}[TF, added = 2017-05-26]
+% {\regex_extract_all:nnN, \regex_extract_all:NnN}
+% \begin{syntax}
+% \cs{regex_extract_all:nnN} \Arg{regex} \Arg{token list} \meta{seq~var}
+% \cs{regex_extract_all:nnNTF} \Arg{regex} \Arg{token list} \meta{seq~var} \Arg{true code} \Arg{false code}
+% \end{syntax}
+% Finds all matches of the \meta{regular expression}
+% in the \meta{token list}, and stores all the submatch information
+% in a single sequence (concatenating the results of
+% multiple \cs{regex_extract_once:nnN} calls).
+% The \meta{seq~var} is assigned locally. If there is no match,
+% the \meta{seq~var} is cleared.
+% The testing versions insert the \meta{true code} into the input
+% stream if a match was found, and the \meta{false code} otherwise.
+% For instance, assume that you type
+% \begin{verbatim}
+% \regex_extract_all:nnNTF { \w+ } { Hello,~world! } \l_foo_seq
+% { true } { false }
+% \end{verbatim}
+% Then the regular expression will match twice, and the resulting
+% sequence contains the two items |{Hello}| and |{world}|,
+% and the \texttt{true} branch is left in the input stream.
+% \end{function}
+%
+% \begin{function}[TF, added = 2017-05-26]{\regex_split:nnN, \regex_split:NnN}
+% \begin{syntax}
+% \cs{regex_split:nnN} \Arg{regular expression} \Arg{token list} \meta{seq~var}
+% \cs{regex_split:nnNTF} \Arg{regular expression} \Arg{token list} \meta{seq~var} \Arg{true code} \Arg{false code}
+% \end{syntax}
+% Splits the \meta{token list} into a sequence of parts, delimited by
+% matches of the \meta{regular expression}. If the \meta{regular expression}
+% has capturing groups, then the token lists that they match are stored as
+% items of the sequence as well. The assignment to \meta{seq~var} is local.
+% If no match is found the resulting \meta{seq~var} has the
+% \meta{token list} as its sole item. If the \meta{regular expression}
+% matches the empty token list, then the \meta{token list} is split
+% into single tokens.
+% The testing versions insert the \meta{true code} into the input
+% stream if a match was found, and the \meta{false code} otherwise.
+% For example, after
+% \begin{verbatim}
+% \seq_new:N \l_path_seq
+% \regex_split:nnNTF { / } { the/path/for/this/file.tex } \l_path_seq
+% { true } { false }
+% \end{verbatim}
+% the sequence |\l_path_seq| contains the items |{the}|, |{path}|,
+% |{for}|, |{this}|, and |{file.tex}|, and the \texttt{true} branch
+% is left in the input stream.
+% \end{function}
+%
+% \subsection{Replacement}
+%
+% \begin{function}[TF, added = 2017-05-26]
+% {\regex_replace_once:nnN,\regex_replace_once:NnN}
+% \begin{syntax}
+% \cs{regex_replace_once:nnN} \Arg{regular expression} \Arg{replacement} \meta{tl~var}
+% \cs{regex_replace_once:nnNTF} \Arg{regular expression} \Arg{replacement} \meta{tl~var} \Arg{true code} \Arg{false code}
+% \end{syntax}
+% Searches for the \meta{regular expression} in the \meta{token list}
+% and replaces the first match with the \meta{replacement}. The result
+% is assigned locally to \meta{tl~var}. In the \meta{replacement},
+% |\0| represents the full match, |\1| represent the contents of the
+% first capturing group, |\2| of the second, \emph{etc.}
+% \end{function}
+%
+% \begin{function}[TF, added = 2017-05-26]
+% {\regex_replace_all:nnN, \regex_replace_all:NnN}
+% \begin{syntax}
+% \cs{regex_replace_all:nnN} \Arg{regular expression} \Arg{replacement} \meta{tl~var}
+% \cs{regex_replace_all:nnNTF} \Arg{regular expression} \Arg{replacement} \meta{tl~var} \Arg{true code} \Arg{false code}
+% \end{syntax}
+% Replaces all occurrences of the \cs{regular expression} in the
+% \meta{token list} by the \meta{replacement}, where |\0| represents
+% the full match, |\1| represent the contents of the first capturing
+% group, |\2| of the second, \emph{etc.} Every match is treated
+% independently, and matches cannot overlap. The result is assigned
+% locally to \meta{tl~var}.
+% \end{function}
+%
+% \subsection{Bugs, misfeatures, future work, and other possibilities}
+%
+% The following need to be done now.
+% \begin{itemize}
+% \item Change user function names!
+% \item Clean up the use of messages.
+% \item Rewrite the documentation in a more ordered way, perhaps add a
+% \textsc{bnf}?
+% \end{itemize}
+%
+% Additional error-checking to come.
+% \begin{itemize}
+% \item Currently, |a{\x34}| is recognized as |a{4}|.
+% \item Cleaner error reporting in the replacement phase.
+% \item Add tracing information.
+% \item Detect attempts to use back-references and other
+% non-implemented syntax.
+% \item Test for the maximum register \cs{c_max_register_int}.
+% \item Find out whether the fact that |\W| and friends match the
+% end-marker leads to bugs. Possibly update \cs{__regex_item_reverse:n}.
+% \item Enforce that |\cC| can only be followed by a match-all dot.
+% \item The empty cs should be matched by |\c{}|, not by
+% |\c{csname.?endcsname\s?}|.
+% \end{itemize}
+%
+% Code improvements to come.
+% \begin{itemize}
+% \item Shift arrays so that the useful information starts at
+% position~$1$.
+% \item Only build \c{...} once.
+% \item Use arrays for the left and right state stacks when
+% compiling a regex.
+% \item Should \cs{__regex_action_free_group:n} only be used for greedy
+% |{n,}| quantifier? (I think not.)
+% \item Quantifiers for |\u| and assertions.
+% \item When matching, keep track of an explicit stack of
+% \texttt{current_state} and \texttt{current_submatches}.
+% \item If possible, when a state is reused by the same thread, kill
+% other subthreads.
+% \item Use an array rather than \cs{l__regex_balance_tl}
+% to build \cs{__regex_replacement_balance_one_match:n}.
+% \item Reduce the number of epsilon-transitions in alternatives.
+% \item Optimize simple strings: use less states (|abcade| should give
+% two states, for |abc| and |ade|). [Does that really make sense?]
+% \item Optimize groups with no alternative.
+% \item Optimize states with a single \cs{__regex_action_free:n}.
+% \item Optimize the use of \cs{__regex_action_success:} by inserting it
+% in state $2$ directly instead of having an extra transition.
+% \item Optimize the use of \cs{int_step_...} functions.
+% \item Groups don't capture within regexes for csnames; optimize and
+% document.
+% \item Better \enquote{show} for anchors, properties, and catcode tests.
+% \item Does |\K| really need a new state for itself?
+% \item When compiling, use a boolean \texttt{in_cs} and less magic
+% numbers.
+% \item Instead of checking whether the character is special or
+% alphanumeric using its character code, check if it is special in
+% regexes with \cs{cs_if_exist} tests.
+% \end{itemize}
+%
+% The following features are likely to be implemented at some point
+% in the future.
+% \begin{itemize}
+% \item General look-ahead/behind assertions.
+% \item Regex matching on external files.
+% \item Conditional subpatterns with look ahead/behind: \enquote{if
+% what follows is [\ldots{}], then [\ldots{}]}.
+% \item |(*..)| and |(?..)| sequences to set some options.
+% \item UTF-8 mode for pdf\TeX{}.
+% \item Newline conventions are not done.
+% In particular, we should have an option for |.| not to match newlines.
+% Also, |\A| should differ from |^|, and |\Z|, |\z| and |$| should
+% differ.
+% \item Unicode properties: |\p{..}| and |\P{..}|;
+% |\X| which should match any \enquote{extended} Unicode sequence.
+% This requires to manipulate a lot of data, probably using tree-boxes.
+% \end{itemize}
+%
+% The following features of \textsc{pcre} or Perl may or may not be
+% implemented.
+% \begin{itemize}
+% \item |\ddd|, matching the character with octal code \texttt{ddd};
+% \item Callout with |(?C...)|;
+% \item Conditional subpatterns (other than with a look-ahead or
+% look-behind condition): this is non-regular, isn't it?
+% \item Named subpatterns: \TeX{} programmers have lived so far
+% without any need for named macro parameters.
+% \end{itemize}
+%
+% The following features of \textsc{pcre} or Perl will definitely not be
+% implemented.
+% \begin{itemize}
+% \item |\cx|, similar to \TeX{}'s own |\^^x|;
+% \item Comments: \TeX{} already has its own system for comments.
+% \item |\Q...\E| escaping: this would require to read the argument
+% verbatim, which is not in the scope of this module.
+% \item Atomic grouping, possessive quantifiers: those tools, mostly
+% meant to fix catastrophic backtracking, are unnecessary in a
+% non-backtracking algorithm, and difficult to implement.
+% \item Subroutine calls: this syntactic sugar is difficult to include
+% in a non-backtracking algorithm, in particular because the
+% corresponding group should be treated as atomic.
+% \item Recursion: this is a non-regular feature.
+% \item Back-references: non-regular feature, this requires
+% backtracking, which is prohibitively slow.
+% \item Backtracking control verbs: intrinsically tied to
+% backtracking.
+% \item |\C| single byte in UTF-8 mode: Xe\TeX{} and Lua\TeX{} serve
+% us characters directly, and splitting those into bytes is tricky,
+% encoding dependent, and most likely not useful anyways.
+% \end{itemize}
+%
+% \end{documentation}
+%
+% \begin{implementation}
+%
+% \section{\pkg{l3regex} implementation}
+%
+% \begin{macrocode}
+%<*initex|package>
+% \end{macrocode}
+%
+% \begin{macrocode}
+%<@@=regex>
+% \end{macrocode}
+%
+% \subsection{Plan of attack}
+%
+% Most regex engines use backtracking. This allows to provide very
+% powerful features (back-references come to mind first), but it is
+% costly, and raises the problem of catastrophic backtracking. Since
+% \TeX{} is not first and foremost a programming language, complicated
+% code tends to run slowly, and we must use faster, albeit slightly more
+% restrictive, techniques, coming from automata theory.
+%
+% Given a regular expression of $n$ characters, we do the following:
+% \begin{itemize}
+% \item (Compiling.) Analyse the regex, finding invalid input, and
+% convert it to an internal representation.
+% \item (Building.) Convert the compiled regex to a non-deterministic
+% finite automaton (\textsc{nfa}) with $O(n)$ states which
+% accepts precisely token lists matching that regex.
+% \item (Matching.) Loop through the query token list one token (one
+% \enquote{position}) at a time, exploring in parallel every
+% possible path (\enquote{active thread}) through the \textsc{nfa},
+% considering active threads in an order determined by the
+% quantifiers' greediness.
+% \end{itemize}
+%
+% We use the following vocabulary in the code comments (and in variable
+% names).
+% \begin{itemize}
+% \item \emph{Group}: index of the capturing group, $-1$ for
+% non-capturing groups.
+% \item \emph{Position}: each token in the query is labelled by an
+% integer \meta{position}, with $\texttt{min_pos} - 1 \leq
+% \meta{position} \leq \texttt{max_pos}$. The lowest and highest
+% positions correspond to imaginary begin and end markers (with
+% inaccessible category code and character code).
+% \item \emph{Query}: the token list to which we apply the regular
+% expression.
+% \item \emph{State}: each state of the \textsc{nfa} is labelled by an
+% integer \meta{state} with $\texttt{min_state} \leq \meta{state} <
+% \texttt{max_state}$.
+% \item \emph{Active thread}: state of the \textsc{nfa} that is reached
+% when reading the query token list for the matching. Those threads
+% are ordered according to the greediness of quantifiers.
+% \item \emph{Step}: used when matching, starts at $0$, incremented
+% every time a character is read, and is not reset when searching
+% for repeated matches. The integer \cs{l_@@_step_int} is a
+% unique id for all the steps of the matching algorithm.
+% \end{itemize}
+%
+% We use \pkg{l3intarray} to manipulate arrays of integers (stored into
+% some dimension registers in scaled points). We also abuse \TeX{}'s
+% \tn{toks} registers, by accessing them directly by number rather than
+% tying them to control sequence using the \tn{newtoks} allocation
+% functions. Specifically, these arrays and \tn{toks} are used as
+% follows. When compiling, \tn{toks} registers are used under the hood
+% by functions from the \pkg{l3tl-build} module. When building,
+% \tn{toks}\meta{state} holds the tests and actions to perform in the
+% \meta{state} of the \textsc{nfa}. When matching,
+% \begin{itemize}
+% \item \cs{g_@@_state_active_intarray} holds the last \meta{step} in
+% which each \meta{state} was active.
+% \item \cs{g_@@_thread_state_intarray} maps each \meta{thread} (with
+% $\texttt{min_active} \leq \meta{thread} < \texttt{max_active}$) to
+% the \meta{state} in which the \meta{thread} currently is. The
+% \meta{threads} or ordered starting from the best to the least
+% preferred.
+% \item \tn{toks}\meta{thread} holds the submatch information for the
+% \meta{thread}, as the contents of a property list.
+% \item \cs{g_@@_charcode_intarray} and \cs{g_@@_catcode_intarray} hold the
+% character codes and category codes of tokens at each
+% \meta{position} in the query.
+% \item \cs{g_@@_balance_intarray} holds the balance of begin-group and
+% end-group character tokens which appear before that point in the
+% token list.
+% \item \tn{toks}\meta{position} holds \meta{tokens} which \texttt{o}-
+% and \texttt{x}-expand to the \meta{position}-th token in the query.
+% \item \cs{g_@@_submatch_prev_intarray}, \cs{g_@@_submatch_begin_intarray}
+% and \cs{g_@@_submatch_end_intarray} hold, for each submatch (as would
+% be extracted by \cs{regex_extract_all:nnN}), the place where the
+% submatch started to be looked for and its two end-points. For
+% historical reasons, the minimum index is twice \texttt{max_state},
+% and the used registers go up to \cs{l_@@_submatch_int}. They are
+% organized in blocks of \cs{l_@@_capturing_group_int} entries, each
+% block corresponding to one match with all its submatches stored in
+% consecutive entries.
+% \end{itemize}
+% \tn{count} registers are not abused, which means that we can safely
+% use named integers in this module. Note that \tn{box} registers are
+% not abused either; maybe we could leverage those for some purpose.
+%
+% The code is structured as follows. Variables are introduced in the
+% relevant section. First we present some generic helper functions. Then
+% comes the code for compiling a regular expression, and for showing the
+% result of the compilation. The building phase converts a compiled
+% regex to \textsc{nfa} states, and the automaton is run by the code in
+% the following section. The only remaining brick is parsing the
+% replacement text and performing the replacement. We are then ready for
+% all the user functions. Finally, messages, and a little bit of tracing
+% code.
+%
+% \subsection{Helpers}
+%
+% \begin{macro}[int]{\@@_standard_escapechar:}
+% Make the \tn{escapechar} into the standard backslash.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_standard_escapechar:
+ { \int_set:Nn \tex_escapechar:D { `\\ } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int, EXP]{\@@_toks_use:w}
+% Unpack a \tn{toks} given its number.
+% \begin{macrocode}
+\cs_new:Npn \@@_toks_use:w { \tex_the:D \tex_toks:D }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_toks_clear:N, \@@_toks_set:Nn, \@@_toks_set:No}
+% Empty a \tn{toks} or set it to a value, given its number.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_toks_clear:N #1
+ { \tex_toks:D #1 { } }
+\cs_new_eq:NN \@@_toks_set:Nn \tex_toks:D
+\cs_new_protected:Npn \@@_toks_set:No #1
+ { \@@_toks_set:Nn #1 \exp_after:wN }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_toks_memcpy:NNn}
+% Copy |#3| \tn{toks} registers from |#2| onwards to |#1| onwards,
+% like |C|'s |memcpy|.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_toks_memcpy:NNn #1#2#3
+ {
+ \prg_replicate:nn {#3}
+ {
+ \tex_toks:D #1 = \tex_toks:D #2
+ \int_incr:N #1
+ \int_incr:N #2
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_toks_put_left:Nx}
+% \begin{macro}[int]{\@@_toks_put_right:Nx, \@@_toks_put_right:Nn}
+% During the building phase we wish to add \texttt{x}-expanded
+% material to \tn{toks}, either to the left or to the right. The
+% expansion is done \enquote{by hand} for optimization (these
+% operations are used quite a lot). The \texttt{Nn} version of
+% \cs{@@_toks_put_right:Nx} is provided because it is more
+% efficient than \texttt{x}-expanding with \cs{exp_not:n}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_toks_put_left:Nx #1#2
+ {
+ \cs_set:Npx \@@_tmp:w { #2 }
+ \tex_toks:D #1 \exp_after:wN \exp_after:wN \exp_after:wN
+ { \exp_after:wN \@@_tmp:w \tex_the:D \tex_toks:D #1 }
+ }
+\cs_new_protected:Npn \@@_toks_put_right:Nx #1#2
+ {
+ \cs_set:Npx \@@_tmp:w {#2}
+ \tex_toks:D #1 \exp_after:wN
+ { \tex_the:D \tex_toks:D \exp_after:wN #1 \@@_tmp:w }
+ }
+\cs_new_protected:Npn \@@_toks_put_right:Nn #1#2
+ { \tex_toks:D #1 \exp_after:wN { \tex_the:D \tex_toks:D #1 #2 } }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int, rEXP]{\@@_current_cs_to_str:}
+% Expands to the string representation of the token (known to be a
+% control sequence) at the current position \cs{l_@@_current_pos_int}.
+% It should only be used in \texttt{x}-expansion to avoid losing a
+% leading space.
+% \begin{macrocode}
+\cs_new:Npn \@@_current_cs_to_str:
+ {
+ \exp_after:wN \exp_after:wN \exp_after:wN \cs_to_str:N
+ \tex_the:D \tex_toks:D \l_@@_current_pos_int
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Constants and variables}
+%
+% \begin{macro}[aux]{\@@_tmp:w}
+% Temporary function used for various short-term purposes.
+% \begin{macrocode}
+\cs_new:Npn \@@_tmp:w { }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{variable}
+% {
+% \l_@@_internal_a_tl, \l_@@_internal_b_tl,
+% \l_@@_internal_a_int, \l_@@_internal_b_int,
+% \l_@@_internal_c_int, \l_@@_internal_bool,
+% \l_@@_internal_seq, \g_@@_internal_tl,
+% }
+% Temporary variables used for various purposes.
+% \begin{macrocode}
+\tl_new:N \l_@@_internal_a_tl
+\tl_new:N \l_@@_internal_b_tl
+\int_new:N \l_@@_internal_a_int
+\int_new:N \l_@@_internal_b_int
+\int_new:N \l_@@_internal_c_int
+\bool_new:N \l_@@_internal_bool
+\seq_new:N \l_@@_internal_seq
+\tl_new:N \g_@@_internal_tl
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\c_@@_no_match_regex}
+% This regular expression matches nothing, but is still a valid
+% regular expression. We could use a failing assertion, but I went for
+% an empty class. It is used as the initial value for regular
+% expressions declared using \cs{regex_new:N}.
+% \begin{macrocode}
+\tl_const:Nn \c_@@_no_match_regex
+ {
+ \@@_branch:n
+ { \@@_class:NnnnN \c_true_bool { } { 1 } { 0 } \c_true_bool }
+ }
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\g_@@_charcode_intarray, \g_@@_catcode_intarray, \g_@@_balance_intarray}
+% The first thing we do when matching is to go once through the query
+% token list and store the information for each token into
+% \cs{g_@@_charcode_intarray}, \cs{g_@@_catcode_intarray} and \tn{toks}
+% registers. We also store the balance of begin-group/end-group
+% characters into \cs{g_@@_balance_intarray}.
+% \begin{macrocode}
+\__intarray_new:Nn \g_@@_charcode_intarray { 65536 }
+\__intarray_new:Nn \g_@@_catcode_intarray { 65536 }
+\__intarray_new:Nn \g_@@_balance_intarray { 65536 }
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_balance_int}
+% During this phase, \cs{l_@@_balance_int} counts the balance of
+% begin-group and end-group character tokens which appear before a
+% given point in the token list. This variable is also used to keep
+% track of the balance in the replacement text.
+% \begin{macrocode}
+\int_new:N \l_@@_balance_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_cs_name_tl}
+% This variable is used in \cs{@@_item_cs:n} to store the csname of
+% the currently-tested token when the regex contains a sub-regex for
+% testing csnames.
+% \begin{macrocode}
+\tl_new:N \l_@@_cs_name_tl
+% \end{macrocode}
+% \end{variable}
+%
+% \subsubsection{Testing characters}
+%
+% \begin{macro}{\c_@@_ascii_min_int, \c_@@_ascii_max_control_int, \c_@@_ascii_max_int}
+% \begin{macrocode}
+\int_const:Nn \c_@@_ascii_min_int { 0 }
+\int_const:Nn \c_@@_ascii_max_control_int { 31 }
+\int_const:Nn \c_@@_ascii_max_int { 127 }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{variable}{\c_@@_ascii_lower_int}
+% \begin{macrocode}
+\int_const:Nn \c_@@_ascii_lower_int { `a - `A }
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{macro}[int]{\@@_break_point:TF}
+% \begin{macro}[int]{\@@_break_true:w}
+% When testing whether a character of the query token list matches
+% a given character class in the regular expression, we often
+% have to test it against several ranges of characters, checking
+% if any one of those matches. This is done with a structure like
+% \begin{quote}
+% \meta{test1} \ldots{} \meta{test$\sb{n}$} \\
+% \cs{@@_break_point:TF} \Arg{true code} \Arg{false code}
+% \end{quote}
+% If any of the tests succeeds, it calls \cs{@@_break_true:w},
+% which cleans up and leaves \meta{true code} in the input stream.
+% Otherwise, \cs{@@_break_point:TF} leaves the \meta{false code}
+% in the input stream.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_break_true:w
+ #1 \@@_break_point:TF #2 #3 {#2}
+\cs_new_protected:Npn \@@_break_point:TF #1 #2 { #2 }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_item_reverse:n}
+% This function makes showing regular expressions easier, and lets us
+% define |\D| in terms of |\d| for instance. There is a subtlety: the
+% end of the query is marked by $-2$, and will thus match |\D| and
+% other negated properties; this case is caught by another part of
+% the code.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_item_reverse:n #1
+ {
+ #1
+ \@@_break_point:TF { } \@@_break_true:w
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]
+% {\@@_item_caseful_equal:n, \@@_item_caseful_range:nn}
+% Simple comparisons triggering \cs{@@_break_true:w} when true.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_item_caseful_equal:n #1
+ {
+ \if_int_compare:w #1 = \l_@@_current_char_int
+ \exp_after:wN \@@_break_true:w
+ \fi:
+ }
+\cs_new_protected:Npn \@@_item_caseful_range:nn #1 #2
+ {
+ \reverse_if:N \if_int_compare:w #1 > \l_@@_current_char_int
+ \reverse_if:N \if_int_compare:w #2 < \l_@@_current_char_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \@@_break_true:w
+ \fi:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]
+% {\@@_item_caseless_equal:n, \@@_item_caseless_range:nn}
+% For caseless matching, we perform the test both on the
+% \texttt{current_char} and on the \texttt{case_changed_char}. Before
+% doing the second set of tests, we make sure that
+% \texttt{case_changed_char} has been computed.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_item_caseless_equal:n #1
+ {
+ \if_int_compare:w #1 = \l_@@_current_char_int
+ \exp_after:wN \@@_break_true:w
+ \fi:
+ \if_int_compare:w \l_@@_case_changed_char_int = \c_max_int
+ \@@_compute_case_changed_char:
+ \fi:
+ \if_int_compare:w #1 = \l_@@_case_changed_char_int
+ \exp_after:wN \@@_break_true:w
+ \fi:
+ }
+\cs_new_protected:Npn \@@_item_caseless_range:nn #1 #2
+ {
+ \reverse_if:N \if_int_compare:w #1 > \l_@@_current_char_int
+ \reverse_if:N \if_int_compare:w #2 < \l_@@_current_char_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \@@_break_true:w
+ \fi:
+ \fi:
+ \if_int_compare:w \l_@@_case_changed_char_int = \c_max_int
+ \@@_compute_case_changed_char:
+ \fi:
+ \reverse_if:N \if_int_compare:w #1 > \l_@@_case_changed_char_int
+ \reverse_if:N \if_int_compare:w #2 < \l_@@_case_changed_char_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \@@_break_true:w
+ \fi:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_compute_case_changed_char:}
+% This function is called when \cs{l_@@_case_changed_char_int} has
+% not yet been computed (or rather, when it is set to the marker value
+% \cs{c_max_int}). If the current character code is in the range
+% $[65,90]$ (upper-case), then add $32$, making it lowercase. If it is
+% in the lower-case letter range $[97,122]$, subtract $32$.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compute_case_changed_char:
+ {
+ \int_set_eq:NN \l_@@_case_changed_char_int \l_@@_current_char_int
+ \if_int_compare:w \l_@@_current_char_int > `Z \exp_stop_f:
+ \if_int_compare:w \l_@@_current_char_int > `z \exp_stop_f: \else:
+ \if_int_compare:w \l_@@_current_char_int < `a \exp_stop_f: \else:
+ \int_sub:Nn \l_@@_case_changed_char_int { \c_@@_ascii_lower_int }
+ \fi:
+ \fi:
+ \else:
+ \if_int_compare:w \l_@@_current_char_int < `A \exp_stop_f: \else:
+ \int_add:Nn \l_@@_case_changed_char_int { \c_@@_ascii_lower_int }
+ \fi:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int, EXP]{\@@_item_equal:n, \@@_item_range:nn}
+% Those must always be defined to expand to a \texttt{caseful}
+% (default) or \texttt{caseless} version, and not be protected: they
+% must expand when compiling, to hard-code which tests are caseless or
+% caseful.
+% \begin{macrocode}
+\cs_new_eq:NN \@@_item_equal:n ?
+\cs_new_eq:NN \@@_item_range:nn ?
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_item_catcode:nT, \@@_item_catcode_reverse:nT}
+% \begin{macro}[aux]{\@@_item_catcode:}
+% The argument is a sum of powers of $4$ with exponents given by the
+% allowed category codes (between $0$ and $13$). Dividing by a given
+% power of $4$ gives an odd result if and only if that category code
+% is allowed. If the catcode does not match, then skip the character
+% code tests which follow.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_item_catcode:
+ {
+ "
+ \if_case:w \l_@@_current_catcode_int
+ 1 \or: 4 \or: 10 \or: 40
+ \or: 100 \or: \or: 1000 \or: 4000
+ \or: 10000 \or: \or: 100000 \or: 400000
+ \or: 1000000 \or: 4000000 \else: 1*0
+ \fi:
+ }
+\cs_new_protected:Npn \@@_item_catcode:nT #1
+ {
+ \if_int_odd:w \__int_eval:w #1 / \@@_item_catcode: \__int_eval_end:
+ \exp_after:wN \use:n
+ \else:
+ \exp_after:wN \use_none:n
+ \fi:
+ }
+\cs_new_protected:Npn \@@_item_catcode_reverse:nT #1#2
+ { \@@_item_catcode:nT {#1} { \@@_item_reverse:n {#2} } }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_item_exact:nn, \@@_item_exact_cs:n}
+% This matches an exact \meta{category}-\meta{character code} pair, or
+% an exact control sequence, more precisely one of several possible control sequences.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_item_exact:nn #1#2
+ {
+ \if_int_compare:w #1 = \l_@@_current_catcode_int
+ \if_int_compare:w #2 = \l_@@_current_char_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \@@_break_true:w
+ \fi:
+ \fi:
+ }
+\cs_new_protected:Npn \@@_item_exact_cs:n #1
+ {
+ \int_compare:nNnTF \l_@@_current_catcode_int = 0
+ {
+ \tl_set:Nx \l_@@_internal_a_tl
+ { \scan_stop: \@@_current_cs_to_str: \scan_stop: }
+ \tl_if_in:noTF { \scan_stop: #1 \scan_stop: } \l_@@_internal_a_tl
+ { \@@_break_true:w } { }
+ }
+ { }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_item_cs:n}
+% Match a control sequence (the argument is a compiled regex).
+% First test the catcode of the current token to be zero.
+% Then perform the matching test, and break if the csname
+% indeed matches. The three \cs{exp_after:wN} expand the contents
+% of the \tn{toks}\meta{current position} (of the form \cs{exp_not:n}
+% \Arg{control sequence}) to \meta{control sequence}.
+% We store the cs name before building states for the cs, as those
+% states may overlap with toks registers storing the user's input.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_item_cs:n #1
+ {
+ \int_compare:nNnT \l_@@_current_catcode_int = 0
+ {
+ \group_begin:
+ \tl_set:Nx \l_@@_cs_name_tl { \@@_current_cs_to_str: }
+ \@@_single_match:
+ \@@_disable_submatches:
+ \@@_build_for_cs:n {#1}
+ \bool_set_eq:NN \l_@@_saved_success_bool \g_@@_success_bool
+ \exp_args:NV \@@_match:n \l_@@_cs_name_tl
+ \if_meaning:w \c_true_bool \g_@@_success_bool
+ \group_insert_after:N \@@_break_true:w
+ \fi:
+ \bool_gset_eq:NN \g_@@_success_bool \l_@@_saved_success_bool
+ \group_end:
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Character property tests}
+%
+% \begin{macro}[aux]
+% {
+% \@@_prop_d:, \@@_prop_h:, \@@_prop_s:,
+% \@@_prop_v:, \@@_prop_w:, \@@_prop_N:
+% }
+% Character property tests for |\d|, |\W|, \emph{etc.} These character
+% properties are not affected by the |(?i)| option. The characters
+% recognized by each one are as follows: |\d=[0-9]|,
+% |\w=[0-9A-Z_a-z]|, \verb*+\s=[\ \^^I\^^J\^^L\^^M]+,
+% \verb*+\h=[\ \^^I]+, |\v=[\^^J-\^^M]|, and the upper case
+% counterparts match anything that the lower case does not match. The
+% order in which the various tests appear is optimized for usual
+% mostly lower case letter text.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_prop_d:
+ { \@@_item_caseful_range:nn { `0 } { `9 } }
+\cs_new_protected:Npn \@@_prop_h:
+ {
+ \@@_item_caseful_equal:n { `\ }
+ \@@_item_caseful_equal:n { `\^^I }
+ }
+\cs_new_protected:Npn \@@_prop_s:
+ {
+ \@@_item_caseful_equal:n { `\ }
+ \@@_item_caseful_equal:n { `\^^I }
+ \@@_item_caseful_equal:n { `\^^J }
+ \@@_item_caseful_equal:n { `\^^L }
+ \@@_item_caseful_equal:n { `\^^M }
+ }
+\cs_new_protected:Npn \@@_prop_v:
+ { \@@_item_caseful_range:nn { `\^^J } { `\^^M } } % lf, vtab, ff, cr
+\cs_new_protected:Npn \@@_prop_w:
+ {
+ \@@_item_caseful_range:nn { `a } { `z }
+ \@@_item_caseful_range:nn { `A } { `Z }
+ \@@_item_caseful_range:nn { `0 } { `9 }
+ \@@_item_caseful_equal:n { `_ }
+ }
+\cs_new_protected:Npn \@@_prop_N:
+ {
+ \@@_item_reverse:n
+ { \@@_item_caseful_equal:n { `\^^J } }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]
+% {
+% \@@_posix_alnum:, \@@_posix_alpha:, \@@_posix_ascii:,
+% \@@_posix_blank:, \@@_posix_cntrl:, \@@_posix_digit:,
+% \@@_posix_graph:, \@@_posix_lower:, \@@_posix_print:,
+% \@@_posix_punct:, \@@_posix_space:, \@@_posix_upper:,
+% \@@_posix_word: , \@@_posix_xdigit:
+% }
+% \textsc{posix} properties. No surprise.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_posix_alnum:
+ { \@@_posix_alpha: \@@_posix_digit: }
+\cs_new_protected:Npn \@@_posix_alpha:
+ { \@@_posix_lower: \@@_posix_upper: }
+\cs_new_protected:Npn \@@_posix_ascii:
+ {
+ \@@_item_caseful_range:nn
+ \c_@@_ascii_min_int
+ \c_@@_ascii_max_int
+ }
+\cs_new_eq:NN \@@_posix_blank: \@@_prop_h:
+\cs_new_protected:Npn \@@_posix_cntrl:
+ {
+ \@@_item_caseful_range:nn
+ \c_@@_ascii_min_int
+ \c_@@_ascii_max_control_int
+ \@@_item_caseful_equal:n \c_@@_ascii_max_int
+ }
+\cs_new_eq:NN \@@_posix_digit: \@@_prop_d:
+\cs_new_protected:Npn \@@_posix_graph:
+ { \@@_item_caseful_range:nn { `! } { `\~ } }
+\cs_new_protected:Npn \@@_posix_lower:
+ { \@@_item_caseful_range:nn { `a } { `z } }
+\cs_new_protected:Npn \@@_posix_print:
+ { \@@_item_caseful_range:nn { `\ } { `\~ } }
+\cs_new_protected:Npn \@@_posix_punct:
+ {
+ \@@_item_caseful_range:nn { `! } { `/ }
+ \@@_item_caseful_range:nn { `: } { `@ }
+ \@@_item_caseful_range:nn { `[ } { `` }
+ \@@_item_caseful_range:nn { `\{ } { `\~ }
+ }
+\cs_new_protected:Npn \@@_posix_space:
+ {
+ \@@_item_caseful_equal:n { `\ }
+ \@@_item_caseful_range:nn { `\^^I } { `\^^M }
+ }
+\cs_new_protected:Npn \@@_posix_upper:
+ { \@@_item_caseful_range:nn { `A } { `Z } }
+\cs_new_eq:NN \@@_posix_word: \@@_prop_w:
+\cs_new_protected:Npn \@@_posix_xdigit:
+ {
+ \@@_posix_digit:
+ \@@_item_caseful_range:nn { `A } { `F }
+ \@@_item_caseful_range:nn { `a } { `f }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Simple character escape}
+%
+% Before actually parsing the regular expression or the replacement
+% text, we go through them once, converting |\n| to the character $10$,
+% \emph{etc.} In this pass, we also convert any special character
+% (\texttt{*}, \texttt{?}, \texttt{\{}, etc.) or escaped alphanumeric
+% character into a marker indicating that this was a special sequence,
+% and replace escaped special characters and non-escaped alphanumeric
+% characters by markers indicating that those were \enquote{raw}
+% characters. The rest of the code can then avoid caring about escaping
+% issues (those can become quite complex to handle in combination with
+% ranges in character classes).
+%
+% Usage: \cs{@@_escape_use:nnnn} \meta{inline~1} \meta{inline~2}
+% \meta{inline~3} \Arg{token list} The \meta{token list} is converted to
+% a string, then read from left to right, interpreting backslashes as
+% escaping the next character. Unescaped characters are fed to the
+% function \meta{inline~1}, and escaped characters are fed to the function
+% \meta{inline~2} within an \texttt{x}-expansion context (typically those
+% functions perform some tests on their argument to decide how to output
+% them). The escape sequences |\a|, |\e|, |\f|, |\n|, |\r|, |\t| and
+% |\x| are recognized, and those are replaced by the corresponding
+% character, then fed to \meta{inline~3}. The result is then left in the
+% input stream. Spaces are ignored unless escaped.
+%
+% The conversion is mostly done within an \texttt{x}-expanding
+% assignment, except for the |\x| escape sequence, which is not amenable
+% to that in general. For this, we use the general framework of
+% \cs{__tl_build:Nw}.
+%
+% \begin{macro}[int]{\@@_escape_use:nnnn}
+% The result is built in \cs{l_@@_internal_a_tl}, which is then
+% left in the input stream. Go through |#4| once, applying |#1|,
+% |#2|, or |#3| as relevant to each character (after de-escaping
+% it). Note that we cannot replace \cs{tl_set:Nx} and
+% \cs{__tl_build_one:o} by a single call to \cs{__tl_build_one:x}, because
+% the \texttt{x}-expanding assignment may be interrupted by |\x|.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_escape_use:nnnn #1#2#3#4
+ {
+%<trace> \trace_push:nnn { regex } { 1 } { @@_escape_use:nnnn }
+ \__tl_build:Nw \l_@@_internal_a_tl
+ \cs_set:Npn \@@_escape_unescaped:N ##1 { #1 }
+ \cs_set:Npn \@@_escape_escaped:N ##1 { #2 }
+ \cs_set:Npn \@@_escape_raw:N ##1 { #3 }
+ \@@_standard_escapechar:
+ \tl_gset:Nx \g_@@_internal_tl { \__str_to_other_fast:n {#4} }
+ \tl_set:Nx \l_@@_internal_b_tl
+ {
+ \exp_after:wN \@@_escape_loop:N \g_@@_internal_tl
+ { break } \__prg_break_point:
+ }
+ \__tl_build_one:o \l_@@_internal_b_tl
+ \__tl_build_end:
+%<trace> \trace_pop:nnn { regex } { 1 } { @@_escape_use:nnnn }
+ \l_@@_internal_a_tl
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_escape_loop:N}
+% \begin{macro}[aux]+\@@_escape_\:w+
+% \cs{@@_escape_loop:N} reads one character: if it is special
+% (space, backslash, or end-marker), perform the associated action,
+% otherwise it is simply an unescaped character. After a backslash,
+% the same is done, but unknown characters are \enquote{escaped}.
+% \begin{macrocode}
+\cs_new:Npn \@@_escape_loop:N #1
+ {
+ \cs_if_exist_use:cF { @@_escape_\token_to_str:N #1:w }
+ { \@@_escape_unescaped:N #1 }
+ \@@_escape_loop:N
+ }
+\cs_new:cpn { @@_escape_ \c_backslash_str :w }
+ \@@_escape_loop:N #1
+ {
+ \cs_if_exist_use:cF { @@_escape_/\token_to_str:N #1:w }
+ { \@@_escape_escaped:N #1 }
+ \@@_escape_loop:N
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux]
+% {\@@_escape_unescaped:N, \@@_escape_escaped:N, \@@_escape_raw:N}
+% Those functions are never called before being given a new meaning,
+% so their definitions here don't matter.
+% \begin{macrocode}
+\cs_new_eq:NN \@@_escape_unescaped:N ?
+\cs_new_eq:NN \@@_escape_escaped:N ?
+\cs_new_eq:NN \@@_escape_raw:N ?
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]
+% {
+% \@@_escape_break:w, \@@_escape_/break:w,
+% \@@_escape_/a:w, \@@_escape_/e:w, \@@_escape_/f:w,
+% \@@_escape_/n:w, \@@_escape_/r:w, \@@_escape_/t:w
+% }
+% \begin{macro}[aux]+\@@_escape_ :w+
+% The loop is ended upon seeing the end-marker
+% \enquote{\texttt{break}}, with an error if the string ended in a
+% backslash. Spaces are ignored, and |\a|, |\e|, |\f|, |\n|, |\r|,
+% |\t| take their meaning here.
+% \begin{macrocode}
+\cs_new_eq:NN \@@_escape_break:w \__prg_break:
+\cs_new:cpn { @@_escape_/break:w }
+ {
+ \if_false: { \fi: }
+ \__msg_kernel_error:nn { regex } { trailing-backslash }
+ \exp_after:wN \use_none:n \exp_after:wN { \if_false: } \fi:
+ }
+\cs_new:cpn { @@_escape_~:w } { }
+\cs_new:cpx { @@_escape_/a:w }
+ { \exp_not:N \@@_escape_raw:N \iow_char:N \^^G }
+\cs_new:cpx { @@_escape_/t:w }
+ { \exp_not:N \@@_escape_raw:N \iow_char:N \^^I }
+\cs_new:cpx { @@_escape_/n:w }
+ { \exp_not:N \@@_escape_raw:N \iow_char:N \^^J }
+\cs_new:cpx { @@_escape_/f:w }
+ { \exp_not:N \@@_escape_raw:N \iow_char:N \^^L }
+\cs_new:cpx { @@_escape_/r:w }
+ { \exp_not:N \@@_escape_raw:N \iow_char:N \^^M }
+\cs_new:cpx { @@_escape_/e:w }
+ { \exp_not:N \@@_escape_raw:N \iow_char:N \^^[ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_escape_/x:w}
+% \begin{macro}[aux]{\@@_escape_x_end:w, \@@_escape_x_large:n}
+% When |\x| is encountered, \cs{@@_escape_x_test:N} is responsible for
+% grabbing some hexadecimal digits, and feeding the result to
+% \cs{@@_escape_x_end:w}. If the number is too big interrupt the
+% assignment and produce an error, otherwise call \cs{@@_escape_raw:N}
+% on the corresponding character token.
+% \begin{macrocode}
+\cs_new:cpn { @@_escape_/x:w } \@@_escape_loop:N
+ {
+ \exp_after:wN \@@_escape_x_end:w
+ \__int_value:w "0 \@@_escape_x_test:N
+ }
+\cs_new:Npn \@@_escape_x_end:w #1 ;
+ {
+ \int_compare:nNnTF {#1} > \c_max_char_int
+ {
+ \if_false: { \fi: }
+ \__tl_build_one:o \l_@@_internal_b_tl
+ \__msg_kernel_error:nnx { regex } { x-overflow } {#1}
+ \tl_set:Nx \l_@@_internal_b_tl
+ { \if_false: } \fi:
+ }
+ {
+ \exp_last_unbraced:Nf \@@_escape_raw:N
+ { \char_generate:nn {#1} { 12 } }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_escape_x_test:N, \@@_escape_x_testii:N}
+% Find out whether the first character is a left brace (allowing any
+% number of hexadecimal digits), or not (allowing up to two
+% hexadecimal digits). We need to check for the end-of-string marker.
+% Eventually, call either \cs{@@_escape_x_loop:N} or
+% \cs{@@_escape_x:N}.
+% \begin{macrocode}
+\cs_new:Npn \@@_escape_x_test:N #1
+ {
+ \str_if_eq_x:nnTF {#1} { break } { ; }
+ {
+ \if_charcode:w \c_space_token #1
+ \exp_after:wN \@@_escape_x_test:N
+ \else:
+ \exp_after:wN \@@_escape_x_testii:N
+ \exp_after:wN #1
+ \fi:
+ }
+ }
+\cs_new:Npn \@@_escape_x_testii:N #1
+ {
+ \if_charcode:w \c_left_brace_str #1
+ \exp_after:wN \@@_escape_x_loop:N
+ \else:
+ \@@_hexadecimal_use:NTF #1
+ { \exp_after:wN \@@_escape_x:N }
+ { ; \exp_after:wN \@@_escape_loop:N \exp_after:wN #1 }
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_escape_x:N}
+% This looks for the second digit in the unbraced case.
+% \begin{macrocode}
+\cs_new:Npn \@@_escape_x:N #1
+ {
+ \str_if_eq_x:nnTF {#1} { break } { ; }
+ {
+ \@@_hexadecimal_use:NTF #1
+ { ; \@@_escape_loop:N }
+ { ; \@@_escape_loop:N #1 }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_escape_x_loop:N, \@@_escape_x_loop_error:}
+% Grab hexadecimal digits, skip spaces, and at the end, check that
+% there is a right brace, otherwise raise an error outside the
+% assignment.
+% \begin{macrocode}
+\cs_new:Npn \@@_escape_x_loop:N #1
+ {
+ \str_if_eq_x:nnTF {#1} { break }
+ { ; \@@_escape_x_loop_error:n { } {#1} }
+ {
+ \@@_hexadecimal_use:NTF #1
+ { \@@_escape_x_loop:N }
+ {
+ \token_if_eq_charcode:NNTF \c_space_token #1
+ { \@@_escape_x_loop:N }
+ {
+ ;
+ \exp_after:wN
+ \token_if_eq_charcode:NNTF \c_right_brace_str #1
+ { \@@_escape_loop:N }
+ { \@@_escape_x_loop_error:n {#1} }
+ }
+ }
+ }
+ }
+\cs_new:Npn \@@_escape_x_loop_error:n #1
+ {
+ \if_false: { \fi: }
+ \__tl_build_one:o \l_@@_internal_b_tl
+ \__msg_kernel_error:nnx { regex } { x-missing-rbrace } {#1}
+ \tl_set:Nx \l_@@_internal_b_tl
+ { \if_false: } \fi: \@@_escape_loop:N #1
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux, rEXP]{\@@_hexadecimal_use:NTF}
+% \TeX{} detects uppercase hexadecimal digits for us but not the
+% lowercase letters, which we need to detect and replace by their
+% uppercase counterpart.
+% \begin{macrocode}
+\prg_new_conditional:Npnn \@@_hexadecimal_use:N #1 { TF }
+ {
+ \if_int_compare:w 1 < "1 \token_to_str:N #1 \exp_stop_f:
+ #1 \prg_return_true:
+ \else:
+ \if_case:w \__int_eval:w
+ \exp_after:wN ` \token_to_str:N #1 - `a
+ \__int_eval_end:
+ A
+ \or: B
+ \or: C
+ \or: D
+ \or: E
+ \or: F
+ \else:
+ \prg_return_false:
+ \exp_after:wN \use_none:n
+ \fi:
+ \prg_return_true:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[EXP, aux]
+% {\@@_char_if_alphanumeric:NTF, \@@_char_if_special:NTF}
+% These two tests are used in the first pass when parsing a regular
+% expression. That pass is responsible for finding escaped and
+% non-escaped characters, and recognizing which ones have special
+% meanings and which should be interpreted as \enquote{raw}
+% characters. Namely,
+% \begin{itemize}
+% \item alphanumerics are \enquote{raw} if they are not escaped, and
+% may have a special meaning when escaped;
+% \item non-alphanumeric printable ascii characters are
+% \enquote{raw} if they are escaped, and may have a special
+% meaning when not escaped;
+% \item characters other than printable ascii are always
+% \enquote{raw}.
+% \end{itemize}
+% The code is ugly, and highly based on magic numbers and the ascii
+% codes of characters. This is mostly unavoidable for performance
+% reasons. Maybe the tests can be optimized a little bit more.
+% Here, \enquote{alphanumeric} means \texttt{0}--\texttt{9},
+% \texttt{A}--\texttt{Z}, \texttt{a}--\texttt{z};
+% \enquote{special} character means non-alphanumeric
+% but printable ascii, from space (hex \texttt{20}) to
+% \texttt{del} (hex \texttt{7E}).
+% \begin{macrocode}
+\prg_new_conditional:Npnn \@@_char_if_special:N #1 { TF }
+ {
+ \if_int_compare:w `#1 > `Z \exp_stop_f:
+ \if_int_compare:w `#1 > `z \exp_stop_f:
+ \if_int_compare:w `#1 < \c_@@_ascii_max_int
+ \prg_return_true: \else: \prg_return_false: \fi:
+ \else:
+ \if_int_compare:w `#1 < `a \exp_stop_f:
+ \prg_return_true: \else: \prg_return_false: \fi:
+ \fi:
+ \else:
+ \if_int_compare:w `#1 > `9 \exp_stop_f:
+ \if_int_compare:w `#1 < `A \exp_stop_f:
+ \prg_return_true: \else: \prg_return_false: \fi:
+ \else:
+ \if_int_compare:w `#1 < `0 \exp_stop_f:
+ \if_int_compare:w `#1 < `\ \exp_stop_f:
+ \prg_return_false: \else: \prg_return_true: \fi:
+ \else: \prg_return_false: \fi:
+ \fi:
+ \fi:
+ }
+\prg_new_conditional:Npnn \@@_char_if_alphanumeric:N #1 { TF }
+ {
+ \if_int_compare:w `#1 > `Z \exp_stop_f:
+ \if_int_compare:w `#1 > `z \exp_stop_f:
+ \prg_return_false:
+ \else:
+ \if_int_compare:w `#1 < `a \exp_stop_f:
+ \prg_return_false: \else: \prg_return_true: \fi:
+ \fi:
+ \else:
+ \if_int_compare:w `#1 > `9 \exp_stop_f:
+ \if_int_compare:w `#1 < `A \exp_stop_f:
+ \prg_return_false: \else: \prg_return_true: \fi:
+ \else:
+ \if_int_compare:w `#1 < `0 \exp_stop_f:
+ \prg_return_false: \else: \prg_return_true: \fi:
+ \fi:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsection{Compiling}
+%
+% A regular expression starts its life as a string of characters. In
+% this section, we convert it to internal instructions, resulting in a
+% \enquote{compiled} regular expression. This compiled expression is
+% then turned into states of an automaton in the building
+% phase. Compiled regular expressions consist of the following:
+% \begin{itemize}
+% \item \cs{@@_class:NnnnN} \meta{boolean} \Arg{tests} \Arg{min}
+% \Arg{more} \meta{lazyness}
+% \item \cs{@@_group:nnnN} \Arg{branches} \Arg{min} \Arg{more}
+% \meta{lazyness}, also \cs{@@_group_no_capture:nnnN} and
+% \cs{@@_group_resetting:nnnN} with the same syntax.
+% \item \cs{@@_branch:n} \Arg{contents}
+% \item \cs{@@_command_K:}
+% \item \cs{@@_assertion:Nn} \meta{boolean} \Arg{assertion test},
+% where the \meta{assertion test} is \cs{@@_b_test:} or
+% |{|\cs{@@_anchor:N} \meta{integer}|}|
+% \end{itemize}
+% Tests can be the following:
+% \begin{itemize}
+% \item \cs{@@_item_caseful_equal:n} \Arg{char code}
+% \item \cs{@@_item_caseless_equal:n} \Arg{char code}
+% \item \cs{@@_item_caseful_range:nn} \Arg{min} \Arg{max}
+% \item \cs{@@_item_caseless_range:nn} \Arg{min} \Arg{max}
+% \item \cs{@@_item_catcode:nT} \Arg{catcode bitmap} \Arg{tests}
+% \item \cs{@@_item_catcode_reverse:nT} \Arg{catcode bitmap} \Arg{tests}
+% \item \cs{@@_item_reverse:n} \Arg{tests}
+% \item \cs{@@_item_exact:nn} \Arg{catcode} \Arg{char code}
+% \item \cs{@@_item_exact_cs:n} \Arg{csnames}, more precisely given as
+% \meta{csname} \cs{scan_stop:} \meta{csname} \cs{scan_stop:}
+% \meta{csname} and so on in a brace group.
+% \item \cs{@@_item_cs:n} \Arg{compiled regex}
+% \end{itemize}
+%
+% \subsubsection{Variables used when compiling}
+%
+% \begin{variable}{\l_@@_group_level_int}
+% We make sure to open the same number of groups as we close.
+% \begin{macrocode}
+\int_new:N \l_@@_group_level_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_mode_int}
+% \begin{variable}
+% {
+% \c_@@_cs_in_class_mode_int,
+% \c_@@_cs_mode_int,
+% \c_@@_outer_mode_int,
+% \c_@@_catcode_mode_int,
+% \c_@@_class_mode_int,
+% \c_@@_catcode_in_class_mode_int
+% }
+% While compiling, ten modes are recognized, labelled $-63$, $-23$,
+% $-6$, $-2$, $0$, $2$, $3$, $6$, $23$, $63$. See
+% section~\ref{sec:regex-modes}. We only define some of these as
+% constants.
+% \begin{macrocode}
+\int_new:N \l_@@_mode_int
+\int_const:Nn \c_@@_cs_in_class_mode_int { -6 }
+\int_const:Nn \c_@@_cs_mode_int { -2 }
+\int_const:Nn \c_@@_outer_mode_int { 0 }
+\int_const:Nn \c_@@_catcode_mode_int { 2 }
+\int_const:Nn \c_@@_class_mode_int { 3 }
+\int_const:Nn \c_@@_catcode_in_class_mode_int { 6 }
+% \end{macrocode}
+% \end{variable}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_catcodes_int, \l_@@_default_catcodes_int}
+% \begin{variable}{\l_@@_catcodes_bool}
+% We wish to allow constructions such as |\c[^BE](..\cL[a-z]..)|,
+% where the outer catcode test applies to the whole group, but is
+% superseded by the inner catcode test. For this to work, we need to
+% keep track of lists of allowed category codes:
+% \cs{l_@@_catcodes_int} and \cs{l_@@_default_catcodes_int} are
+% bitmaps, sums of $4^c$, for all allowed catcodes $c$. The latter is
+% local to each capturing group, and we reset
+% \cs{l_@@_catcodes_int} to that value after each character or
+% class, changing it only when encountering a |\c| escape. The boolean
+% records whether the list of categories of a catcode test has to be
+% inverted: compare |\c[^BE]| and |\c[BE]|.
+% \begin{macrocode}
+\int_new:N \l_@@_catcodes_int
+\int_new:N \l_@@_default_catcodes_int
+\bool_new:N \l_@@_catcodes_bool
+% \end{macrocode}
+% \end{variable}
+% \end{variable}
+%
+% \begin{variable}
+% {
+% \c_@@_catcode_C_int, \c_@@_catcode_B_int, \c_@@_catcode_E_int,
+% \c_@@_catcode_M_int, \c_@@_catcode_T_int, \c_@@_catcode_P_int,
+% \c_@@_catcode_U_int, \c_@@_catcode_D_int, \c_@@_catcode_S_int,
+% \c_@@_catcode_L_int, \c_@@_catcode_O_int, \c_@@_catcode_A_int
+% }
+% \begin{variable}{\c_@@_all_catcodes_int}
+% Constants: $4^c$ for each category, and the sum of all powers of $4$.
+% \begin{macrocode}
+\int_const:Nn \c_@@_catcode_C_int { "1 }
+\int_const:Nn \c_@@_catcode_B_int { "4 }
+\int_const:Nn \c_@@_catcode_E_int { "10 }
+\int_const:Nn \c_@@_catcode_M_int { "40 }
+\int_const:Nn \c_@@_catcode_T_int { "100 }
+\int_const:Nn \c_@@_catcode_P_int { "1000 }
+\int_const:Nn \c_@@_catcode_U_int { "4000 }
+\int_const:Nn \c_@@_catcode_D_int { "10000 }
+\int_const:Nn \c_@@_catcode_S_int { "100000 }
+\int_const:Nn \c_@@_catcode_L_int { "400000 }
+\int_const:Nn \c_@@_catcode_O_int { "1000000 }
+\int_const:Nn \c_@@_catcode_A_int { "4000000 }
+\int_const:Nn \c_@@_all_catcodes_int { "5515155 }
+% \end{macrocode}
+% \end{variable}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_internal_regex}
+% The compilation step stores its result in this variable.
+% \begin{macrocode}
+\cs_new_eq:NN \l_@@_internal_regex \c_@@_no_match_regex
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_show_prefix_seq}
+% This sequence holds the prefix that makes up the line displayed to
+% the user. The various items must be removed from the right, which is
+% tricky with a token list, hence we use a sequence.
+% \begin{macrocode}
+\seq_new:N \l_@@_show_prefix_seq
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_show_lines_int}
+% A hack. To know whether a given class has a single item in it or
+% not, we count the number of lines when showing the class.
+% \begin{macrocode}
+\int_new:N \l_@@_show_lines_int
+% \end{macrocode}
+% \end{variable}
+%
+% \subsubsection{Generic helpers used when compiling}
+%
+% \begin{macro}[int]{\@@_get_digits:NTFw}
+% \begin{macro}[aux, rEXP]{\@@_get_digits_loop:w}
+% If followed by some raw digits, collect them one by one in the
+% integer variable |#1|, and take the \texttt{true} branch. Otherwise,
+% take the \texttt{false} branch.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_get_digits:NTFw #1#2#3#4#5
+ {
+ \@@_if_raw_digit:NNTF #4 #5
+ { #1 = #5 \@@_get_digits_loop:nw {#2} }
+ { #3 #4 #5 }
+ }
+\cs_new:Npn \@@_get_digits_loop:nw #1#2#3
+ {
+ \@@_if_raw_digit:NNTF #2 #3
+ { #3 \@@_get_digits_loop:nw {#1} }
+ { \scan_stop: #1 #2 #3 }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux, EXP]{\@@_if_raw_digit:NNTF}
+% Test used when grabbing digits for the |{m,n}| quantifier.
+% It only accepts non-escaped digits.
+% \begin{macrocode}
+\prg_new_conditional:Npnn \@@_if_raw_digit:NN #1#2 { TF }
+ {
+ \if_meaning:w \@@_compile_raw:N #1
+ \if_int_compare:w 1 < 1 #2 \exp_stop_f:
+ \prg_return_true:
+ \else:
+ \prg_return_false:
+ \fi:
+ \else:
+ \prg_return_false:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Mode}
+% \label{sec:regex-modes}
+%
+% When compiling the \textsc{nfa} corresponding to a given regex string,
+% we can be in ten distinct modes, which we label by some magic numbers:
+% \begin{itemize}
+% \item[-6] |[\c{...}]| control sequence in a class,
+% \item[-2] |\c{...}| control sequence,
+% \item[0] |...| outer,
+% \item[2] |\c...| catcode test,
+% \item[6] |[\c...]| catcode test in a class,
+% \item[-63] |[\c{[...]}]| class inside mode $-6$,
+% \item[-23] |\c{[...]}| class inside mode $-2$,
+% \item[3] |[...]| class inside mode $0$,
+% \item[23] |\c[...]| class inside mode $2$,
+% \item[63] |[\c[...]]| class inside mode $6$.
+% \end{itemize}
+% This list is exhaustive, because |\c| escape sequences cannot be
+% nested, and character classes cannot be nested directly. The choice of
+% numbers is such as to optimize the most useful tests, and make
+% transitions from one mode to another as simple as possible.
+% \begin{itemize}
+% \item Even modes mean that we are not directly in a character class.
+% In this case, a left bracket appends $3$ to the mode. In a
+% character class, a right bracket changes the mode as $m\to
+% (m-15)/13$, truncated.
+% \item Grouping, assertion, and anchors are allowed in non-positive
+% even modes ($0$, $-2$, $-6$), and do not change the
+% mode. Otherwise, they trigger an error.
+% \item A left bracket is special in even modes, appending $3$ to the
+% mode; in those modes, quantifiers and the dot are recognized, and
+% the right bracket is normal. In odd modes (within classes), the
+% left bracket is normal, but the right bracket ends the class,
+% changing the mode from $m$ to $(m-15)/13$, truncated; also, ranges
+% are recognized.
+% \item In non-negative modes, left and right braces are normal. In
+% negative modes, however, left braces trigger a warning; right
+% braces end the control sequence, going from $-2$ to $0$ or $-6$ to
+% $3$, with error recovery for odd modes.
+% \item Properties (such as the |\d| character class) can appear in
+% any mode.
+% \end{itemize}
+%
+% \begin{macro}[int, EXP]{\@@_if_in_class:TF}
+% Test whether we are directly in a character class (at the innermost
+% level of nesting). There, many escape sequences are not recognized,
+% and special characters are normal. Also, for every raw character, we
+% must look ahead for a possible raw dash.
+% \begin{macrocode}
+\cs_new:Npn \@@_if_in_class:TF
+ {
+ \if_int_odd:w \l_@@_mode_int
+ \exp_after:wN \use_i:nn
+ \else:
+ \exp_after:wN \use_ii:nn
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int, EXP]{\@@_if_in_cs:TF}
+% Right braces are special only directly inside control sequences (at
+% the inner-most level of nesting, not counting groups).
+% \begin{macrocode}
+\cs_new:Npn \@@_if_in_cs:TF
+ {
+ \if_int_odd:w \l_@@_mode_int
+ \exp_after:wN \use_ii:nn
+ \else:
+ \if_int_compare:w \l_@@_mode_int < \c_@@_outer_mode_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \use_i:nn
+ \else:
+ \exp_after:wN \exp_after:wN \exp_after:wN \use_ii:nn
+ \fi:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int, EXP]{\@@_if_in_class_or_catcode:TF}
+% Assertions are only allowed in modes $0$, $-2$, and $-6$,
+% \emph{i.e.}, even, non-positive modes.
+% \begin{macrocode}
+\cs_new:Npn \@@_if_in_class_or_catcode:TF
+ {
+ \if_int_odd:w \l_@@_mode_int
+ \exp_after:wN \use_i:nn
+ \else:
+ \if_int_compare:w \l_@@_mode_int > \c_@@_outer_mode_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \use_i:nn
+ \else:
+ \exp_after:wN \exp_after:wN \exp_after:wN \use_ii:nn
+ \fi:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int, EXP]{\@@_if_within_catcode:TF}
+% This test takes the true branch if we are in a catcode test, either
+% immediately following it (modes $2$ and $6$) or in a class on which
+% it applies (modes $23$ and $63$). This is used to tweak how left
+% brackets behave in modes $2$ and $6$.
+% \begin{macrocode}
+\cs_new:Npn \@@_if_within_catcode:TF
+ {
+ \if_int_compare:w \l_@@_mode_int > \c_@@_outer_mode_int
+ \exp_after:wN \use_i:nn
+ \else:
+ \exp_after:wN \use_ii:nn
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_chk_c_allowed:T}
+% The |\c| escape sequence is only allowed in modes $0$ and $3$,
+% \emph{i.e.}, not within any other |\c| escape sequence.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_chk_c_allowed:T
+ {
+ \if_int_compare:w \l_@@_mode_int = \c_@@_outer_mode_int
+ \exp_after:wN \use:n
+ \else:
+ \if_int_compare:w \l_@@_mode_int = \c_@@_class_mode_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \use:n
+ \else:
+ \__msg_kernel_error:nn { regex } { c-bad-mode }
+ \exp_after:wN \exp_after:wN \exp_after:wN \use_none:n
+ \fi:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_mode_quit_c:}
+% This function changes the mode as it is needed just after a catcode
+% test.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_mode_quit_c:
+ {
+ \if_int_compare:w \l_@@_mode_int = \c_@@_catcode_mode_int
+ \int_set_eq:NN \l_@@_mode_int \c_@@_outer_mode_int
+ \else:
+ \if_int_compare:w \l_@@_mode_int = \c_@@_catcode_in_class_mode_int
+ \int_set_eq:NN \l_@@_mode_int \c_@@_class_mode_int
+ \fi:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Framework}
+%
+% \begin{macro}[int]{\@@_compile:w, \@@_compile_end:}
+% Used when compiling a user regex or a regex for the |\c{...}| escape
+% sequence within another regex. Start building a token list within a
+% group (with \texttt{x}-expansion at the outset), and set a few
+% variables (group level, catcodes), then start the first branch. At
+% the end, make sure there are no dangling classes nor groups, close
+% the last branch: we are done building \cs{l_@@_internal_regex}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile:w
+ {
+ \__tl_build_x:Nw \l_@@_internal_regex
+ \int_zero:N \l_@@_group_level_int
+ \int_set_eq:NN \l_@@_default_catcodes_int \c_@@_all_catcodes_int
+ \int_set_eq:NN \l_@@_catcodes_int \l_@@_default_catcodes_int
+ \cs_set:Npn \@@_item_equal:n { \@@_item_caseful_equal:n }
+ \cs_set:Npn \@@_item_range:nn { \@@_item_caseful_range:nn }
+ \__tl_build_one:n { \@@_branch:n { \if_false: } \fi: }
+ }
+\cs_new_protected:Npn \@@_compile_end:
+ {
+ \@@_if_in_class:TF
+ {
+ \__msg_kernel_error:nn { regex } { missing-rbrack }
+ \use:c { @@_compile_]: }
+ \prg_do_nothing: \prg_do_nothing:
+ }
+ { }
+ \if_int_compare:w \l_@@_group_level_int > 0 \exp_stop_f:
+ \__msg_kernel_error:nnx { regex } { missing-rparen }
+ { \int_use:N \l_@@_group_level_int }
+ \prg_replicate:nn
+ { \l_@@_group_level_int }
+ {
+ \__tl_build_one:n
+ {
+ \if_false: { \fi: }
+ \if_false: { \fi: } { 1 } { 0 } \c_true_bool
+ }
+ \__tl_build_end:
+ \__tl_build_one:o \l_@@_internal_regex
+ }
+ \fi:
+ \__tl_build_one:n { \if_false: { \fi: } }
+ \__tl_build_end:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_compile:n}
+% The compilation is done between \cs{@@_compile:w} and
+% \cs{@@_compile_end:}, starting in mode~$0$. Then
+% \cs{@@_escape_use:nnnn} distinguishes special characters, escaped
+% alphanumerics, and raw characters, interpreting |\a|, |\x| and other
+% sequences. The $4$ trailing \cs{prg_do_nothing:} are needed because
+% some functions defined later look up to $4$ tokens ahead. Before
+% ending, make sure that any |\c{...}| is properly closed. No need to
+% check that brackets are closed properly since \cs{@@_compile_end:}
+% does that. However, catch the case of a trailing |\cL|
+% construction.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile:n #1
+ {
+ \@@_compile:w
+ \@@_standard_escapechar:
+ \int_set_eq:NN \l_@@_mode_int \c_@@_outer_mode_int
+ \@@_escape_use:nnnn
+ {
+ \@@_char_if_special:NTF ##1
+ \@@_compile_special:N \@@_compile_raw:N ##1
+ }
+ {
+ \@@_char_if_alphanumeric:NTF ##1
+ \@@_compile_escaped:N \@@_compile_raw:N ##1
+ }
+ { \@@_compile_raw:N ##1 }
+ { #1 }
+ \prg_do_nothing: \prg_do_nothing:
+ \prg_do_nothing: \prg_do_nothing:
+ \int_compare:nNnT \l_@@_mode_int = \c_@@_catcode_mode_int
+ { \__msg_kernel_error:nn { regex } { c-trailing } }
+ \int_compare:nNnT \l_@@_mode_int < \c_@@_outer_mode_int
+ {
+ \__msg_kernel_error:nn { regex } { c-missing-rbrace }
+ \@@_compile_end_cs:
+ \prg_do_nothing: \prg_do_nothing:
+ \prg_do_nothing: \prg_do_nothing:
+ }
+ \@@_compile_end:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_compile_escaped:N, \@@_compile_special:N}
+% If the special character or escaped alphanumeric has a particular
+% meaning in regexes, the corresponding function is used. Otherwise,
+% it is interpreted as a raw character. We distinguish special
+% characters from escaped alphanumeric characters because they behave
+% differently when appearing as an end-point of a range.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_special:N #1
+ {
+ \cs_if_exist_use:cF { @@_compile_#1: }
+ { \@@_compile_raw:N #1 }
+ }
+\cs_new_protected:Npn \@@_compile_escaped:N #1
+ {
+ \cs_if_exist_use:cF { @@_compile_/#1: }
+ { \@@_compile_raw:N #1 }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_compile_one:x}
+% This is used after finding one \enquote{test}, such as |\d|, or a
+% raw character. If that followed a catcode test (\emph{e.g.}, |\cL|),
+% then restore the mode. If we are not in a class, then the test is
+% \enquote{standalone}, and we need to add \cs{@@_class:NnnnN} and
+% search for quantifiers. In any case, insert the test, possibly
+% together with a catcode test if appropriate.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_one:x #1
+ {
+ \@@_mode_quit_c:
+ \@@_if_in_class:TF { }
+ {
+ \__tl_build_one:n
+ { \@@_class:NnnnN \c_true_bool { \if_false: } \fi: }
+ }
+ \__tl_build_one:x
+ {
+ \if_int_compare:w \l_@@_catcodes_int < \c_@@_all_catcodes_int
+ \@@_item_catcode:nT { \int_use:N \l_@@_catcodes_int }
+ { \exp_not:N \exp_not:n {#1} }
+ \else:
+ \exp_not:N \exp_not:n {#1}
+ \fi:
+ }
+ \int_set_eq:NN \l_@@_catcodes_int \l_@@_default_catcodes_int
+ \@@_if_in_class:TF { } { \@@_compile_quantifier:w }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]
+% {\@@_compile_abort_tokens:n, \@@_compile_abort_tokens:x}
+% This function places the collected tokens back in the input stream,
+% each as a raw character. Spaces are not preserved.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_abort_tokens:n #1
+ {
+ \use:x
+ {
+ \exp_args:No \tl_map_function:nN { \tl_to_str:n {#1} }
+ \@@_compile_raw:N
+ }
+ }
+\cs_generate_variant:Nn \@@_compile_abort_tokens:n { x }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Quantifiers}
+%
+% \begin{macro}[int]{\@@_compile_quantifier:w}
+% This looks ahead and finds any quantifier (special character equal
+% to either of \texttt{?+*\{}).
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_quantifier:w #1#2
+ {
+ \token_if_eq_meaning:NNTF #1 \@@_compile_special:N
+ {
+ \cs_if_exist_use:cF { @@_compile_quantifier_#2:w }
+ { \@@_compile_quantifier_none: #1 #2 }
+ }
+ { \@@_compile_quantifier_none: #1 #2 }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_quantifier_none:}
+% \begin{macro}[aux]{\@@_compile_quantifier_abort:xNN}
+% Those functions are called whenever there is no quantifier, or a
+% braced construction is invalid (equivalent to no quantifier, and
+% whatever characters were grabbed are left raw).
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_quantifier_none:
+ { \__tl_build_one:n { \if_false: { \fi: } { 1 } { 0 } \c_false_bool } }
+\cs_new_protected:Npn \@@_compile_quantifier_abort:xNN #1#2#3
+ {
+ \@@_compile_quantifier_none:
+ \__msg_kernel_warning:nnxx { regex } { invalid-quantifier } {#1} {#3}
+ \@@_compile_abort_tokens:x {#1}
+ #2 #3
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_quantifier_lazyness:nnNN}
+% Once the \enquote{main} quantifier (\texttt{?}, \texttt{*},
+% \texttt{+} or a braced construction) is found, we check whether it
+% is lazy (followed by a question mark). We then add to the compiled
+% regex a closing brace (ending \cs{@@_class:NnnnN} and friends),
+% the start-point of the range, its end-point, and a boolean,
+% \texttt{true} for lazy and \texttt{false} for greedy operators.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_quantifier_lazyness:nnNN #1#2#3#4
+ {
+ \str_if_eq:nnTF { #3 #4 } { \@@_compile_special:N ? }
+ { \__tl_build_one:n { \if_false: { \fi: } { #1 } { #2 } \c_true_bool } }
+ {
+ \__tl_build_one:n { \if_false: { \fi: } { #1 } { #2 } \c_false_bool }
+ #3 #4
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]
+% {
+% \@@_compile_quantifier_?:w,
+% \@@_compile_quantifier_*:w,
+% \@@_compile_quantifier_+:w
+% }
+% For each \enquote{basic} quantifier, |?|, |*|, |+|, feed the correct
+% arguments to \cs{@@_compile_quantifier_lazyness:nnNN}, $-1$ means
+% that there is no upper bound on the number of repetitions.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_quantifier_?:w }
+ { \@@_compile_quantifier_lazyness:nnNN { 0 } { 1 } }
+\cs_new_protected:cpn { @@_compile_quantifier_*:w }
+ { \@@_compile_quantifier_lazyness:nnNN { 0 } { -1 } }
+\cs_new_protected:cpn { @@_compile_quantifier_+:w }
+ { \@@_compile_quantifier_lazyness:nnNN { 1 } { -1 } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]+\@@_compile_quantifier_{:w+ ^^A}
+% \begin{macro}[aux]
+% {
+% \@@_compile_quantifier_braced_auxi:w,
+% \@@_compile_quantifier_braced_auxii:w,
+% \@@_compile_quantifier_braced_auxiii:w,
+% }
+% Three possible syntaxes: \texttt{\{\meta{int}\}},
+% \texttt{\{\meta{int},\}}, or \texttt{\{\meta{int},\meta{int}\}}. Any
+% other syntax causes us to abort and put whatever we collected back
+% in the input stream, as \texttt{raw} characters, including the
+% opening brace. Grab a number into \cs{l_@@_internal_a_int}. If
+% the number is followed by a right brace, the range is $[a,a]$. If
+% followed by a comma, grab one more number, and call the \texttt{_ii}
+% or \texttt{_iii} auxiliary. Those auxiliaries check for a closing
+% brace, leading to the range $[a,\infty]$ or $[a,b]$, encoded as
+% $\{a\}\{-1\}$ and $\{a\}\{b-a\}$.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_quantifier_ \c_left_brace_str :w }
+ {
+ \@@_get_digits:NTFw \l_@@_internal_a_int
+ { \@@_compile_quantifier_braced_auxi:w }
+ { \@@_compile_quantifier_abort:xNN { \c_left_brace_str } }
+ }
+\cs_new_protected:Npn \@@_compile_quantifier_braced_auxi:w #1#2
+ {
+ \str_case_x:nnF { #1 #2 }
+ {
+ { \@@_compile_special:N \c_right_brace_str }
+ {
+ \exp_args:No \@@_compile_quantifier_lazyness:nnNN
+ { \int_use:N \l_@@_internal_a_int } { 0 }
+ }
+ { \@@_compile_special:N , }
+ {
+ \@@_get_digits:NTFw \l_@@_internal_b_int
+ { \@@_compile_quantifier_braced_auxiii:w }
+ { \@@_compile_quantifier_braced_auxii:w }
+ }
+ }
+ {
+ \@@_compile_quantifier_abort:xNN
+ { \c_left_brace_str \int_use:N \l_@@_internal_a_int }
+ #1 #2
+ }
+ }
+\cs_new_protected:Npn \@@_compile_quantifier_braced_auxii:w #1#2
+ {
+ \str_if_eq_x:nnTF
+ { #1 #2 } { \@@_compile_special:N \c_right_brace_str }
+ {
+ \exp_args:No \@@_compile_quantifier_lazyness:nnNN
+ { \int_use:N \l_@@_internal_a_int } { -1 }
+ }
+ {
+ \@@_compile_quantifier_abort:xNN
+ { \c_left_brace_str \int_use:N \l_@@_internal_a_int , }
+ #1 #2
+ }
+ }
+\cs_new_protected:Npn \@@_compile_quantifier_braced_auxiii:w #1#2
+ {
+ \str_if_eq_x:nnTF
+ { #1 #2 } { \@@_compile_special:N \c_right_brace_str }
+ {
+ \if_int_compare:w \l_@@_internal_a_int > \l_@@_internal_b_int
+ \__msg_kernel_error:nnxx { regex } { backwards-quantifier }
+ { \int_use:N \l_@@_internal_a_int }
+ { \int_use:N \l_@@_internal_b_int }
+ \int_zero:N \l_@@_internal_b_int
+ \else:
+ \int_sub:Nn \l_@@_internal_b_int \l_@@_internal_a_int
+ \fi:
+ \exp_args:Noo \@@_compile_quantifier_lazyness:nnNN
+ { \int_use:N \l_@@_internal_a_int }
+ { \int_use:N \l_@@_internal_b_int }
+ }
+ {
+ \@@_compile_quantifier_abort:xNN
+ {
+ \c_left_brace_str
+ \int_use:N \l_@@_internal_a_int ,
+ \int_use:N \l_@@_internal_b_int
+ }
+ #1 #2
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \subsubsection{Raw characters}
+%
+% \begin{macro}[int]{\@@_compile_raw_error:N}
+% Within character classes, and following catcode tests, some escaped
+% alphanumeric sequences such as |\b| do not have any meaning. They
+% are replaced by a raw character, after spitting out an error.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_raw_error:N #1
+ {
+ \__msg_kernel_error:nnx { regex } { bad-escape } {#1}
+ \@@_compile_raw:N #1
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_compile_raw:N}
+% If we are in a character class and the next character is an
+% unescaped dash, this denotes a range. Otherwise, the current
+% character |#1| matches itself.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_raw:N #1#2#3
+ {
+ \@@_if_in_class:TF
+ {
+ \str_if_eq:nnTF {#2#3} { \@@_compile_special:N - }
+ { \@@_compile_range:Nw #1 }
+ {
+ \@@_compile_one:x
+ { \@@_item_equal:n { \__int_value:w `#1 ~ } }
+ #2 #3
+ }
+ }
+ {
+ \@@_compile_one:x
+ { \@@_item_equal:n { \__int_value:w `#1 ~ } }
+ #2 #3
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_range:Nw, \@@_if_end_range:NNTF}
+% We have just read a raw character followed by a dash; this should be
+% followed by an end-point for the range. Valid end-points are: any
+% raw character; any special character, except a right bracket. In
+% particular, escaped characters are forbidden.
+% \begin{macrocode}
+\prg_new_protected_conditional:Npnn \@@_if_end_range:NN #1#2 { TF }
+ {
+ \if_meaning:w \@@_compile_raw:N #1
+ \prg_return_true:
+ \else:
+ \if_meaning:w \@@_compile_special:N #1
+ \if_charcode:w ] #2
+ \prg_return_false:
+ \else:
+ \prg_return_true:
+ \fi:
+ \else:
+ \prg_return_false:
+ \fi:
+ \fi:
+ }
+\cs_new_protected:Npn \@@_compile_range:Nw #1#2#3
+ {
+ \@@_if_end_range:NNTF #2 #3
+ {
+ \if_int_compare:w `#1 > `#3 \exp_stop_f:
+ \__msg_kernel_error:nnxx { regex } { range-backwards } {#1} {#3}
+ \else:
+ \__tl_build_one:x
+ {
+ \if_int_compare:w `#1 = `#3 \exp_stop_f:
+ \@@_item_equal:n
+ \else:
+ \@@_item_range:nn { \__int_value:w `#1 ~ }
+ \fi:
+ { \__int_value:w `#3 ~ }
+ }
+ \fi:
+ }
+ {
+ \__msg_kernel_warning:nnxx { regex } { range-missing-end }
+ {#1} { \c_backslash_str #3 }
+ \__tl_build_one:x
+ {
+ \@@_item_equal:n { \__int_value:w `#1 ~ }
+ \@@_item_equal:n { \__int_value:w `- ~ }
+ }
+ #2#3
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Character properties}
+%
+% \begin{macro}[aux]{\@@_compile_.:, \@@_prop_.:}
+% In a class, the dot has no special meaning. Outside, insert
+% \cs{@@_prop_.:}, which matches any character or control
+% sequence, and refuses $-2$ (end-marker).
+% \begin{macrocode}
+\cs_new_protected:cpx { @@_compile_.: }
+ {
+ \exp_not:N \@@_if_in_class:TF
+ { \@@_compile_raw:N . }
+ { \@@_compile_one:x \exp_not:c { @@_prop_.: } }
+ }
+\cs_new_protected:cpn { @@_prop_.: }
+ {
+ \if_int_compare:w \l_@@_current_char_int > - 2 \exp_stop_f:
+ \exp_after:wN \@@_break_true:w
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]
+% {
+% \@@_compile_/d:, \@@_compile_/D:,
+% \@@_compile_/h:, \@@_compile_/H:,
+% \@@_compile_/s:, \@@_compile_/S:,
+% \@@_compile_/v:, \@@_compile_/V:,
+% \@@_compile_/w:, \@@_compile_/W:,
+% \@@_compile_/N:,
+% }
+% The constants \cs{@@_prop_d:}, \emph{etc.} hold
+% a list of tests which match the corresponding character
+% class, and jump to the \cs{@@_break_point:TF} marker.
+% As for a normal character, we check for quantifiers.
+% \begin{macrocode}
+\cs_set_protected:Npn \@@_tmp:w #1#2
+ {
+ \cs_new_protected:cpx { @@_compile_/#1: }
+ { \@@_compile_one:x \exp_not:c { @@_prop_#1: } }
+ \cs_new_protected:cpx { @@_compile_/#2: }
+ {
+ \@@_compile_one:x
+ { \@@_item_reverse:n \exp_not:c { @@_prop_#1: } }
+ }
+ }
+\@@_tmp:w d D
+\@@_tmp:w h H
+\@@_tmp:w s S
+\@@_tmp:w v V
+\@@_tmp:w w W
+\cs_new_protected:cpn { @@_compile_/N: }
+ { \@@_compile_one:x \@@_prop_N: }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Anchoring and simple assertions}
+%
+% \begin{macro}[aux]{\@@_compile_anchor:NF}
+% \begin{macro}[aux]+\@@_compile_^:+
+% \begin{macro}[aux]{\@@_compile_/A:, \@@_compile_/G:}
+% \begin{macro}[aux]+\@@_compile_$:+
+% \begin{macro}[aux]{\@@_compile_/Z:, \@@_compile_/z:}
+% In modes where assertions are allowed, anchor to the start of the
+% query, the start of the match, or the end of the query, depending on
+% the integer |#1|. In other modes, |#2| treats the character as raw,
+% with an error for escaped letters (|$| is valid in a class, but |\A|
+% is definitely a mistake on the user's part).
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_anchor:NF #1#2
+ {
+ \@@_if_in_class_or_catcode:TF {#2}
+ {
+ \__tl_build_one:n
+ { \@@_assertion:Nn \c_true_bool { \@@_anchor:N #1 } }
+ }
+ }
+\cs_set_protected:Npn \@@_tmp:w #1#2
+ {
+ \cs_new_protected:cpn { @@_compile_/#1: }
+ { \@@_compile_anchor:NF #2 { \@@_compile_raw_error:N #1 } }
+ }
+\@@_tmp:w A \l_@@_min_pos_int
+\@@_tmp:w G \l_@@_start_pos_int
+\@@_tmp:w Z \l_@@_max_pos_int
+\@@_tmp:w z \l_@@_max_pos_int
+\cs_set_protected:Npn \@@_tmp:w #1#2
+ {
+ \cs_new_protected:cpn { @@_compile_#1: }
+ { \@@_compile_anchor:NF #2 { \@@_compile_raw:N #1 } }
+ }
+\exp_args:Nx \@@_tmp:w { \iow_char:N \^ } \l_@@_min_pos_int
+\exp_args:Nx \@@_tmp:w { \iow_char:N \$ } \l_@@_max_pos_int
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_/b:, \@@_compile_/B:}
+% Contrarily to |^| and |$|, which could be implemented without really
+% knowing what precedes in the token list, this requires more
+% information, namely, the knowledge of the last character code.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_/b: }
+ {
+ \@@_if_in_class_or_catcode:TF
+ { \@@_compile_raw_error:N b }
+ {
+ \__tl_build_one:n
+ { \@@_assertion:Nn \c_true_bool { \@@_b_test: } }
+ }
+ }
+\cs_new_protected:cpn { @@_compile_/B: }
+ {
+ \@@_if_in_class_or_catcode:TF
+ { \@@_compile_raw_error:N B }
+ {
+ \__tl_build_one:n
+ { \@@_assertion:Nn \c_false_bool { \@@_b_test: } }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Character classes}
+%
+% \begin{macro}[aux]{\@@_compile_]:}
+% Outside a class, right brackets have no meaning. In a class, change
+% the mode ($m\to (m-15)/13$, truncated) to reflect the fact that we
+% are leaving the class. Look for quantifiers, unless we are still in
+% a class after leaving one (the case of |[...\cL[...]...]|).
+% quantifiers.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_]: }
+ {
+ \@@_if_in_class:TF
+ {
+ \if_int_compare:w \l_@@_mode_int > \c_@@_catcode_in_class_mode_int
+ \__tl_build_one:n { \if_false: { \fi: } }
+ \fi:
+ \tex_advance:D \l_@@_mode_int - 15 \exp_stop_f:
+ \tex_divide:D \l_@@_mode_int 13 \exp_stop_f:
+ \if_int_odd:w \l_@@_mode_int \else:
+ \exp_after:wN \@@_compile_quantifier:w
+ \fi:
+ }
+ { \@@_compile_raw:N ] }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_[:}
+% In a class, left brackets might introduce a \textsc{posix} character
+% class, or mean nothing. Immediately following |\c|\meta{category},
+% we must insert the appropriate catcode test, then parse the class; we
+% pre-expand the catcode as an optimization. Otherwise (modes $0$,
+% $-2$ and $-6$) just parse the class. The mode is updated later.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_[: }
+ {
+ \@@_if_in_class:TF
+ { \@@_compile_class_posix_test:w }
+ {
+ \@@_if_within_catcode:TF
+ {
+ \exp_after:wN \@@_compile_class_catcode:w
+ \int_use:N \l_@@_catcodes_int ;
+ }
+ { \@@_compile_class_normal:w }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_class_normal:w}
+% In the \enquote{normal} case, we will insert \cs{@@_class:NnnnN}
+% \meta{boolean} in the compiled code. The \meta{boolean} is true for
+% positive classes, and false for negative classes, characterized by a
+% leading |^|. The auxiliary \cs{@@_compile_class:TFNN} also
+% checks for a leading |]| which has a special meaning.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_class_normal:w
+ {
+ \@@_compile_class:TFNN
+ { \@@_class:NnnnN \c_true_bool }
+ { \@@_class:NnnnN \c_false_bool }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_class_catcode:w}
+% This function is called for a left bracket in modes $2$ or $6$
+% (catcode test, and catcode test within a class). In mode $2$ the
+% whole construction needs to be put in a class (like single
+% character). Then determine if the class is positive or negative,
+% inserting \cs{@@_item_catcode:nT} or the \texttt{reverse} variant
+% as appropriate, each with the current catcodes bitmap |#1| as an
+% argument, and reset the catcodes.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_class_catcode:w #1;
+ {
+ \if_int_compare:w \l_@@_mode_int = \c_@@_catcode_mode_int
+ \__tl_build_one:n
+ { \@@_class:NnnnN \c_true_bool { \if_false: } \fi: }
+ \fi:
+ \int_set_eq:NN \l_@@_catcodes_int \l_@@_default_catcodes_int
+ \@@_compile_class:TFNN
+ { \@@_item_catcode:nT {#1} }
+ { \@@_item_catcode_reverse:nT {#1} }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]
+% {\@@_compile_class:TFNN, \@@_compile_class:NN}
+% If the first character is |^|, then the class is negative (use
+% |#2|), otherwise it is positive (use |#1|). If the next character
+% is a right bracket, then it should be changed to a raw one.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_class:TFNN #1#2#3#4
+ {
+ \l_@@_mode_int = \__int_value:w \l_@@_mode_int 3 \exp_stop_f:
+ \str_if_eq:nnTF { #3 #4 } { \@@_compile_special:N ^ }
+ {
+ \__tl_build_one:n { #2 { \if_false: } \fi: }
+ \@@_compile_class:NN
+ }
+ {
+ \__tl_build_one:n { #1 { \if_false: } \fi: }
+ \@@_compile_class:NN #3 #4
+ }
+ }
+\cs_new_protected:Npn \@@_compile_class:NN #1#2
+ {
+ \token_if_eq_charcode:NNTF #2 ]
+ { \@@_compile_raw:N #2 }
+ { #1 #2 }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]
+% {
+% \@@_compile_class_posix_test:w,
+% \@@_compile_class_posix:NNNNw,
+% \@@_compile_class_posix_loop:w,
+% \@@_compile_class_posix_end:w
+% }
+% Here we check for a syntax such as |[:alpha:]|. We also detect |[=|
+% and |[.| which have a meaning in \textsc{posix} regular expressions,
+% but are not implemented in \pkg{l3regex}. In case we see |[:|, grab
+% raw characters until hopefully reaching |:]|. If that's missing, or
+% the \textsc{posix} class is unknown, abort. If all is right, add the
+% test to the current class, with an extra \cs{@@_item_reverse:n}
+% for negative classes.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_class_posix_test:w #1#2
+ {
+ \token_if_eq_meaning:NNT \@@_compile_special:N #1
+ {
+ \str_case:nn { #2 }
+ {
+ : { \@@_compile_class_posix:NNNNw }
+ = { \__msg_kernel_warning:nnx { regex } { posix-unsupported } { = } }
+ . { \__msg_kernel_warning:nnx { regex } { posix-unsupported } { . } }
+ }
+ }
+ \@@_compile_raw:N [ #1 #2
+ }
+\cs_new_protected:Npn \@@_compile_class_posix:NNNNw #1#2#3#4#5#6
+ {
+ \str_if_eq:nnTF { #5 #6 } { \@@_compile_special:N ^ }
+ {
+ \bool_set_false:N \l_@@_internal_bool
+ \tl_set:Nx \l_@@_internal_a_tl { \if_false: } \fi:
+ \@@_compile_class_posix_loop:w
+ }
+ {
+ \bool_set_true:N \l_@@_internal_bool
+ \tl_set:Nx \l_@@_internal_a_tl { \if_false: } \fi:
+ \@@_compile_class_posix_loop:w #5 #6
+ }
+ }
+\cs_new:Npn \@@_compile_class_posix_loop:w #1#2
+ {
+ \token_if_eq_meaning:NNTF \@@_compile_raw:N #1
+ { #2 \@@_compile_class_posix_loop:w }
+ { \if_false: { \fi: } \@@_compile_class_posix_end:w #1 #2 }
+ }
+\cs_new_protected:Npn \@@_compile_class_posix_end:w #1#2#3#4
+ {
+ \str_if_eq:nnTF { #1 #2 #3 #4 }
+ { \@@_compile_special:N : \@@_compile_special:N ] }
+ {
+ \cs_if_exist:cTF { @@_posix_ \l_@@_internal_a_tl : }
+ {
+ \@@_compile_one:x
+ {
+ \bool_if:NF \l_@@_internal_bool \@@_item_reverse:n
+ \exp_not:c { @@_posix_ \l_@@_internal_a_tl : }
+ }
+ }
+ {
+ \__msg_kernel_warning:nnx { regex } { posix-unknown }
+ { \l_@@_internal_a_tl }
+ \@@_compile_abort_tokens:x
+ {
+ [: \bool_if:NF \l_@@_internal_bool { ^ }
+ \l_@@_internal_a_tl :]
+ }
+ }
+ }
+ {
+ \__msg_kernel_error:nnxx { regex } { posix-missing-close }
+ { [: \l_@@_internal_a_tl } { #2 #4 }
+ \@@_compile_abort_tokens:x { [: \l_@@_internal_a_tl }
+ #1 #2 #3 #4
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Groups and alternations}
+%
+% \begin{macro}[aux]{\@@_compile_group_begin:N, \@@_compile_group_end:}
+% The contents of a regex group are turned into compiled code in
+% \cs{l_@@_internal_regex}, which ends up with items of the form
+% \cs{@@_branch:n} \Arg{concatenation}. This construction is done
+% using \pkg{l3tl-build} within a \TeX{} group, which automatically
+% makes sure that options (case-sensitivity and default catcode) are
+% reset at the end of the group. The argument |#1| is
+% \cs{@@_group:nnnN} or a variant thereof. A small subtlety to
+% support |\cL(abc)| as a shorthand for |(\cLa\cLb\cLc)|: exit any
+% pending catcode test, save the category code at the start of the
+% group as the default catcode for that group, and make sure that the
+% catcode is restored to the default outside the group.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_group_begin:N #1
+ {
+ \__tl_build_one:n { #1 { \if_false: } \fi: }
+ \@@_mode_quit_c:
+ \__tl_build:Nw \l_@@_internal_regex
+ \int_set_eq:NN \l_@@_default_catcodes_int \l_@@_catcodes_int
+ \int_incr:N \l_@@_group_level_int
+ \__tl_build_one:n { \@@_branch:n { \if_false: } \fi: }
+ }
+\cs_new_protected:Npn \@@_compile_group_end:
+ {
+ \if_int_compare:w \l_@@_group_level_int > 0 \exp_stop_f:
+ \__tl_build_one:n { \if_false: { \fi: } }
+ \__tl_build_end:
+ \int_set_eq:NN \l_@@_catcodes_int \l_@@_default_catcodes_int
+ \__tl_build_one:o \l_@@_internal_regex
+ \exp_after:wN \@@_compile_quantifier:w
+ \else:
+ \__msg_kernel_warning:nn { regex } { extra-rparen }
+ \exp_after:wN \@@_compile_raw:N \exp_after:wN )
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_(:}
+% In a class, parentheses are not special. Outside, check for a |?|,
+% denoting special groups, and run the code for the corresponding
+% special group.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_(: }
+ {
+ \@@_if_in_class:TF { \@@_compile_raw:N ( }
+ { \@@_compile_lparen:w }
+ }
+\cs_new_protected:Npn \@@_compile_lparen:w #1#2#3#4
+ {
+ \str_if_eq:nnTF { #1 #2 } { \@@_compile_special:N ? }
+ {
+ \cs_if_exist_use:cF
+ { @@_compile_special_group_\token_to_str:N #4 :w }
+ {
+ \__msg_kernel_warning:nnx { regex } { special-group-unknown }
+ { (? #4 }
+ \@@_compile_group_begin:N \@@_group:nnnN
+ \@@_compile_raw:N ? #3 #4
+ }
+ }
+ {
+ \@@_compile_group_begin:N \@@_group:nnnN
+ #1 #2 #3 #4
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]+\@@_compile_|:+
+% In a class, the pipe is not special. Otherwise, end the current
+% branch and open another one.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_|: }
+ {
+ \@@_if_in_class:TF { \@@_compile_raw:N | }
+ {
+ \__tl_build_one:n
+ { \if_false: { \fi: } \@@_branch:n { \if_false: } \fi: }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_):}
+% Within a class, parentheses are not special. Outside, close a group.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_): }
+ {
+ \@@_if_in_class:TF { \@@_compile_raw:N ) }
+ { \@@_compile_group_end: }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_special_group_::w}
+% \begin{macro}[aux]+\@@_compile_special_group_|:w+
+% Non-capturing, and resetting groups are easy to take care of during
+% compilation; for those groups, the harder parts will come when
+% building.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_special_group_::w }
+ { \@@_compile_group_begin:N \@@_group_no_capture:nnnN }
+\cs_new_protected:cpn { @@_compile_special_group_|:w }
+ { \@@_compile_group_begin:N \@@_group_resetting:nnnN }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux]
+% {\@@_compile_special_group_i:w, \@@_compile_special_group_-:w}
+% The match can be made case-insensitive by setting the option with
+% \texttt{(?i)}; the original behaviour is restored by \texttt{(?-i)}.
+% This is the only supported option.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_special_group_i:w #1#2
+ {
+ \str_if_eq:nnTF { #1 #2 } { \@@_compile_special:N ) }
+ {
+ \cs_set:Npn \@@_item_equal:n { \@@_item_caseless_equal:n }
+ \cs_set:Npn \@@_item_range:nn { \@@_item_caseless_range:nn }
+ }
+ {
+ \__msg_kernel_warning:nnx { regex } { unknown-option } { (?i #2 }
+ \@@_compile_raw:N (
+ \@@_compile_raw:N ?
+ \@@_compile_raw:N i
+ #1 #2
+ }
+ }
+\cs_new_protected:cpn { @@_compile_special_group_-:w } #1#2#3#4
+ {
+ \str_if_eq:nnTF { #1 #2 #3 #4 }
+ { \@@_compile_raw:N i \@@_compile_special:N ) }
+ {
+ \cs_set:Npn \@@_item_equal:n { \@@_item_caseful_equal:n }
+ \cs_set:Npn \@@_item_range:nn { \@@_item_caseful_range:nn }
+ }
+ {
+ \__msg_kernel_warning:nnx { regex } { unknown-option } { (?-#2#4 }
+ \@@_compile_raw:N (
+ \@@_compile_raw:N ?
+ \@@_compile_raw:N -
+ #1 #2 #3 #4
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Catcodes and csnames}
+%
+% \begin{macro}[aux]{\@@_compile_/c:, \@@_compile_c_test:NN}
+% The |\c| escape sequence can be followed by a capital letter
+% representing a character category, by a left bracket which starts a
+% list of categories, or by a brace group holding a regular expression
+% for a control sequence name. Otherwise, raise an error.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_/c: }
+ { \@@_chk_c_allowed:T { \@@_compile_c_test:NN } }
+\cs_new_protected:Npn \@@_compile_c_test:NN #1#2
+ {
+ \token_if_eq_meaning:NNTF #1 \@@_compile_raw:N
+ {
+ \int_if_exist:cTF { c_@@_catcode_#2_int }
+ {
+ \int_set_eq:Nc \l_@@_catcodes_int { c_@@_catcode_#2_int }
+ \l_@@_mode_int
+ = \if_case:w \l_@@_mode_int
+ \c_@@_catcode_mode_int
+ \else:
+ \c_@@_catcode_in_class_mode_int
+ \fi:
+ }
+ }
+ { \cs_if_exist_use:cF { @@_compile_c_#2:w } }
+ {
+ \__msg_kernel_error:nnx { regex } { c-missing-category } {#2}
+ #1 #2
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]
+% {
+% \@@_compile_c_[:w,
+% \@@_compile_c_lbrack_loop:NN,
+% \@@_compile_c_lbrack_add:N,
+% \@@_compile_c_lbrack_end:,
+% }
+% When encountering |\c[|, the task is to collect uppercase letters
+% representing character categories. First check for |^| which negates
+% the list of category codes.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_c_[:w } #1#2
+ {
+ \l_@@_mode_int
+ = \if_case:w \l_@@_mode_int
+ \c_@@_catcode_mode_int
+ \else:
+ \c_@@_catcode_in_class_mode_int
+ \fi:
+ \int_zero:N \l_@@_catcodes_int
+ \str_if_eq:nnTF { #1 #2 } { \@@_compile_special:N ^ }
+ {
+ \bool_set_false:N \l_@@_catcodes_bool
+ \@@_compile_c_lbrack_loop:NN
+ }
+ {
+ \bool_set_true:N \l_@@_catcodes_bool
+ \@@_compile_c_lbrack_loop:NN
+ #1 #2
+ }
+ }
+\cs_new_protected:Npn \@@_compile_c_lbrack_loop:NN #1#2
+ {
+ \token_if_eq_meaning:NNTF #1 \@@_compile_raw:N
+ {
+ \int_if_exist:cTF { c_@@_catcode_#2_int }
+ {
+ \exp_args:Nc \@@_compile_c_lbrack_add:N
+ { c_@@_catcode_#2_int }
+ \@@_compile_c_lbrack_loop:NN
+ }
+ }
+ {
+ \token_if_eq_charcode:NNTF #2 ]
+ { \@@_compile_c_lbrack_end: }
+ }
+ {
+ \__msg_kernel_error:nnx { regex } { c-missing-rbrack } {#2}
+ \@@_compile_c_lbrack_end:
+ #1 #2
+ }
+ }
+\cs_new_protected:Npn \@@_compile_c_lbrack_add:N #1
+ {
+ \if_int_odd:w \__int_eval:w \l_@@_catcodes_int / #1 \__int_eval_end:
+ \else:
+ \int_add:Nn \l_@@_catcodes_int {#1}
+ \fi:
+ }
+\cs_new_protected:Npn \@@_compile_c_lbrack_end:
+ {
+ \if_meaning:w \c_false_bool \l_@@_catcodes_bool
+ \int_set:Nn \l_@@_catcodes_int
+ { \c_@@_all_catcodes_int - \l_@@_catcodes_int }
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}+\@@_compile_c_{:+
+% The case of a left brace is easy, based on what we have done so far:
+% in a group, compile the regular expression, after changing the mode
+% to forbid nesting |\c|. Additionally, disable submatch tracking
+% since groups don't escape the scope of |\c{...}|.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_c_ \c_left_brace_str :w }
+ {
+ \@@_compile:w
+ \@@_disable_submatches:
+ \l_@@_mode_int
+ = \if_case:w \l_@@_mode_int
+ \c_@@_cs_mode_int
+ \else:
+ \c_@@_cs_in_class_mode_int
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}+\@@_compile_}:+
+% \begin{macro}{\@@_compile_end_cs:}
+% \begin{macro}[EXP,aux]{\@@_compile_cs_aux:Nn, \@@_compile_cs_aux:NNnnnN}
+% Non-escaped right braces are only special if they appear when
+% compiling the regular expression for a csname, but not within a
+% class: |\c{[{}]}| matches the control sequences |\{| and |\}|. So,
+% end compiling the inner regex (this closes any dangling class or
+% group). Then insert the corresponding test in the outer regex. As
+% an optimization, if the control sequence test simply consists of
+% several explicit possibilities (branches) then use
+% \cs{@@_item_exact_cs:n} with an argument consisting of all
+% possibilities separated by \cs{scan_stop:}.
+% \begin{macrocode}
+\flag_new:n { @@_cs }
+\cs_new_protected:cpn { @@_compile_ \c_right_brace_str : }
+ {
+ \@@_if_in_cs:TF
+ { \@@_compile_end_cs: }
+ { \exp_after:wN \@@_compile_raw:N \c_right_brace_str }
+ }
+\cs_new_protected:Npn \@@_compile_end_cs:
+ {
+ \@@_compile_end:
+ \flag_clear:n { @@_cs }
+ \tl_set:Nx \l_@@_internal_a_tl
+ {
+ \exp_after:wN \@@_compile_cs_aux:Nn \l_@@_internal_regex
+ \q_nil \q_nil \q_recursion_stop
+ }
+ \exp_args:Nx \@@_compile_one:x
+ {
+ \flag_if_raised:nTF { @@_cs }
+ { \@@_item_cs:n { \exp_not:o \l_@@_internal_regex } }
+ { \@@_item_exact_cs:n { \tl_tail:N \l_@@_internal_a_tl } }
+ }
+ }
+\cs_new:Npn \@@_compile_cs_aux:Nn #1#2
+ {
+ \cs_if_eq:NNTF #1 \@@_branch:n
+ {
+ \scan_stop:
+ \@@_compile_cs_aux:NNnnnN #2
+ \q_nil \q_nil \q_nil \q_nil \q_nil \q_nil \q_recursion_stop
+ \@@_compile_cs_aux:Nn
+ }
+ {
+ \quark_if_nil:NF #1 { \flag_raise:n { @@_cs } }
+ \use_none_delimit_by_q_recursion_stop:w
+ }
+ }
+\cs_new:Npn \@@_compile_cs_aux:NNnnnN #1#2#3#4#5#6
+ {
+ \bool_lazy_all:nTF
+ {
+ { \cs_if_eq_p:NN #1 \@@_class:NnnnN }
+ {#2}
+ { \tl_if_head_eq_meaning_p:nN {#3} \@@_item_caseful_equal:n }
+ { \int_compare_p:nNn { \tl_count:n {#3} } = { 2 } }
+ { \int_compare_p:nNn {#5} = { 0 } }
+ }
+ {
+ \prg_replicate:nn {#4}
+ { \char_generate:nn { \use_ii:nn #3 } {12} }
+ \@@_compile_cs_aux:NNnnnN
+ }
+ {
+ \quark_if_nil:NF #1
+ {
+ \flag_raise:n { @@_cs }
+ \use_i_delimit_by_q_recursion_stop:nw
+ }
+ \use_none_delimit_by_q_recursion_stop:w
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
+% \subsubsection{Raw token lists with \cs{u}}
+%
+% \begin{macro}[aux]{\@@_compile_/u:}
+% \begin{macro}[aux, EXP]{\@@_compile_u_loop:NN}
+% The |\u| escape is invalid in classes and directly following a
+% catcode test. Otherwise, it must be followed by a left brace. We
+% then collect the characters for the argument of |\u| within an
+% \texttt{x}-expanding assignment. In principle we could just wait to
+% encounter a right brace, but this is unsafe: if the right brace is
+% missing, then we will reach the end-markers of the regex, and
+% continue, leading to obscure fatal errors. Instead, we only allow
+% raw and special characters, and stop when encountering a special
+% right brace, any escaped character, or the end-marker.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_/u: } #1#2
+ {
+ \@@_if_in_class_or_catcode:TF
+ { \@@_compile_raw_error:N u #1 #2 }
+ {
+ \str_if_eq_x:nnTF {#1#2} { \@@_compile_special:N \c_left_brace_str }
+ {
+ \tl_set:Nx \l_@@_internal_a_tl { \if_false: } \fi:
+ \@@_compile_u_loop:NN
+ }
+ {
+ \__msg_kernel_error:nn { regex } { u-missing-lbrace }
+ \@@_compile_raw:N u #1 #2
+ }
+ }
+ }
+\cs_new:Npn \@@_compile_u_loop:NN #1#2
+ {
+ \token_if_eq_meaning:NNTF #1 \@@_compile_raw:N
+ { #2 \@@_compile_u_loop:NN }
+ {
+ \token_if_eq_meaning:NNTF #1 \@@_compile_special:N
+ {
+ \exp_after:wN \token_if_eq_charcode:NNTF \c_right_brace_str #2
+ { \if_false: { \fi: } \@@_compile_u_end: }
+ { #2 \@@_compile_u_loop:NN }
+ }
+ {
+ \if_false: { \fi: }
+ \__msg_kernel_error:nnx { regex } { u-missing-rbrace } {#2}
+ \@@_compile_u_end:
+ #1 #2
+ }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_u_end:}
+% Once we have extracted the variable's name, we store the contents of
+% that variable in \cs{l_@@_internal_a_tl}. The behaviour of |\u|
+% then depends on whether we are within a |\c{...}| escape (in this
+% case, the variable is turned to a string), or not.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_u_end:
+ {
+ \tl_set:Nv \l_@@_internal_a_tl { \l_@@_internal_a_tl }
+ \if_int_compare:w \l_@@_mode_int = \c_@@_outer_mode_int
+ \@@_compile_u_not_cs:
+ \else:
+ \@@_compile_u_in_cs:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_u_in_cs:}
+% When |\u| appears within a control sequence, we convert the variable
+% to a string with escaped spaces. Then for each character insert a
+% class matching exactly that character, once.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_u_in_cs:
+ {
+ \tl_gset:Nx \g_@@_internal_tl
+ { \exp_args:No \__str_to_other_fast:n { \l_@@_internal_a_tl } }
+ \__tl_build_one:x
+ {
+ \tl_map_function:NN \g_@@_internal_tl
+ \@@_compile_u_in_cs_aux:n
+ }
+ }
+\cs_new:Npn \@@_compile_u_in_cs_aux:n #1
+ {
+ \@@_class:NnnnN \c_true_bool
+ { \@@_item_caseful_equal:n { \__int_value:w `#1 } }
+ { 1 } { 0 } \c_false_bool
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_compile_u_not_cs:}
+% In mode $0$, the |\u| escape adds one state to the NFA for each
+% token in \cs{l_@@_internal_a_tl}. If a given \meta{token} is a
+% control sequence, then insert a string comparison test, otherwise,
+% \cs{@@_item_exact:nn} which compares catcode and character code.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_u_not_cs:
+ {
+ \exp_args:No \__tl_analysis_map_inline:nn { \l_@@_internal_a_tl }
+ {
+ \__tl_build_one:n
+ {
+ \@@_class:NnnnN \c_true_bool
+ {
+ \if_int_compare:w "##2 = 0 \exp_stop_f:
+ \@@_item_exact_cs:n { \exp_after:wN \cs_to_str:N ##1 }
+ \else:
+ \@@_item_exact:nn { \__int_value:w "##2 } { ##3 }
+ \fi:
+ }
+ { 1 } { 0 } \c_false_bool
+ }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Other}
+%
+% \begin{macro}[aux]{\@@_compile_/K:}
+% The |\K| control sequence is currently the only \enquote{command},
+% which performs some action, rather than matching something. It is
+% allowed in the same contexts as |\b|. At the compilation stage, we
+% leave it as a single control sequence, defined later.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_/K: }
+ {
+ \int_compare:nNnTF \l_@@_mode_int = \c_@@_outer_mode_int
+ { \__tl_build_one:n { \@@_command_K: } }
+ { \@@_compile_raw_error:N K }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Showing regexes}
+%
+% \begin{macro}[aux]{\@@_show:Nn}
+% Within a \cs{__tl_build:Nw} \ldots{} \cs{__tl_build_end:} group, we
+% redefine all the function that can appear in a compiled regex, then
+% run the regex. The result is then shown.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_show:Nn #1#2
+ {
+ \__tl_build:Nw \l_@@_internal_a_tl
+ \cs_set_protected:Npn \@@_branch:n
+ {
+ \seq_pop_right:NN \l_@@_show_prefix_seq \l_@@_internal_a_tl
+ \@@_show_one:n { +-branch }
+ \seq_put_right:No \l_@@_show_prefix_seq \l_@@_internal_a_tl
+ \use:n
+ }
+ \cs_set_protected:Npn \@@_group:nnnN
+ { \@@_show_group_aux:nnnnN { } }
+ \cs_set_protected:Npn \@@_group_no_capture:nnnN
+ { \@@_show_group_aux:nnnnN { ~(no~capture) } }
+ \cs_set_protected:Npn \@@_group_resetting:nnnN
+ { \@@_show_group_aux:nnnnN { ~(resetting) } }
+ \cs_set_eq:NN \@@_class:NnnnN \@@_show_class:NnnnN
+ \cs_set_protected:Npn \@@_command_K:
+ { \@@_show_one:n { reset~match~start~(\iow_char:N\\K) } }
+ \cs_set_protected:Npn \@@_assertion:Nn ##1##2
+ { \@@_show_one:n { \bool_if:NF ##1 { negative~ } assertion:~##2 } }
+ \cs_set:Npn \@@_b_test: { word~boundary }
+ \cs_set_eq:NN \@@_anchor:N \@@_show_anchor_to_str:N
+ \cs_set_protected:Npn \@@_item_caseful_equal:n ##1
+ { \@@_show_one:n { char~code~\int_eval:n{##1} } }
+ \cs_set_protected:Npn \@@_item_caseful_range:nn ##1##2
+ { \@@_show_one:n { range~[\int_eval:n{##1}, \int_eval:n{##2}] } }
+ \cs_set_protected:Npn \@@_item_caseless_equal:n ##1
+ { \@@_show_one:n { char~code~\int_eval:n{##1}~(caseless) } }
+ \cs_set_protected:Npn \@@_item_caseless_range:nn ##1##2
+ {
+ \@@_show_one:n
+ { Range~[\int_eval:n{##1}, \int_eval:n{##2}]~(caseless) }
+ }
+ \cs_set_protected:Npn \@@_item_catcode:nT
+ { \@@_show_item_catcode:NnT \c_true_bool }
+ \cs_set_protected:Npn \@@_item_catcode_reverse:nT
+ { \@@_show_item_catcode:NnT \c_false_bool }
+ \cs_set_protected:Npn \@@_item_reverse:n
+ { \@@_show_scope:nn { Reversed~match } }
+ \cs_set_protected:Npn \@@_item_exact:nn ##1##2
+ { \@@_show_one:n { char~##2,~catcode~##1 } }
+ \cs_set_eq:NN \@@_item_exact_cs:n \@@_show_item_exact_cs:n
+ \cs_set_protected:Npn \@@_item_cs:n
+ { \@@_show_scope:nn { control~sequence } }
+ \cs_set:cpn { @@_prop_.: } { \@@_show_one:n { any~token } }
+ \seq_clear:N \l_@@_show_prefix_seq
+ \@@_show_push:n { ~ }
+ \cs_if_exist_use:N #1
+ \__tl_build_end:
+ \__msg_show_variable:NNNnn #1 \cs_if_exist:NTF ? { }
+ { >~Compiled~regex~#2: \l_@@_internal_a_tl }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_show_one:n}
+% Every part of the final message go through this function, which adds
+% one line to the output, with the appropriate prefix.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_show_one:n #1
+ {
+ \int_incr:N \l_@@_show_lines_int
+ \__tl_build_one:x
+ {
+ \exp_not:N \\
+ \seq_map_function:NN \l_@@_show_prefix_seq \use:n
+ #1
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]
+% {\@@_show_push:n, \@@_show_pop:, \@@_show_scope:nn}
+% Enter and exit levels of nesting. The \texttt{scope} function prints
+% its first argument as an \enquote{introduction}, then performs its
+% second argument in a deeper level of nesting.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_show_push:n #1
+ { \seq_put_right:Nx \l_@@_show_prefix_seq { #1 ~ } }
+\cs_new_protected:Npn \@@_show_pop:
+ { \seq_pop_right:NN \l_@@_show_prefix_seq \l_@@_internal_a_tl }
+\cs_new_protected:Npn \@@_show_scope:nn #1#2
+ {
+ \@@_show_one:n {#1}
+ \@@_show_push:n { ~ }
+ #2
+ \@@_show_pop:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_show_group_aux:nnnnN}
+% We display all groups in the same way, simply adding a message,
+% \texttt{(no capture)} or \texttt{(resetting)}, to special groups.
+% The odd \cs{use_ii:nn} avoids printing a spurious \texttt{+-branch}
+% for the first branch.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_show_group_aux:nnnnN #1#2#3#4#5
+ {
+ \@@_show_one:n { ,-group~begin #1 }
+ \@@_show_push:n { | }
+ \use_ii:nn #2
+ \@@_show_pop:
+ \@@_show_one:n
+ { `-group~end \@@_msg_repeated:nnN {#3} {#4} #5 }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_show_class:NnnnN}
+% I'm entirely unhappy about this function: I couldn't find a way to
+% test if a class is a single test. Instead, collect the
+% representation of the tests in the class. If that had more than one
+% line, write \texttt{Match} or \texttt{Don't match} on its own line,
+% with the repeating information if any. Then the various tests on
+% lines of their own, and finally a line. Otherwise, we need to
+% evaluate the representation of the tests again (since the prefix is
+% incorrect). That's clunky, but not too expensive, since it's only
+% one test.
+% \begin{macrocode}
+\cs_set:Npn \@@_show_class:NnnnN #1#2#3#4#5
+ {
+ \__tl_build:Nw \l_@@_internal_a_tl
+ \int_zero:N \l_@@_show_lines_int
+ \@@_show_push:n {~}
+ #2
+ \exp_last_unbraced:Nf
+ \int_case:nnF { \l_@@_show_lines_int }
+ {
+ {0}
+ {
+ \__tl_build_end:
+ \@@_show_one:n { \bool_if:NTF #1 { Fail } { Pass } }
+ }
+ {1}
+ {
+ \__tl_build_end:
+ \bool_if:NTF #1
+ {
+ #2
+ \__tl_build_one:n { \@@_msg_repeated:nnN {#3} {#4} #5 }
+ }
+ {
+ \@@_show_one:n
+ { Don't~match~\@@_msg_repeated:nnN {#3} {#4} #5 }
+ \__tl_build_one:o \l_@@_internal_a_tl
+ }
+ }
+ }
+ {
+ \__tl_build_end:
+ \@@_show_one:n
+ {
+ \bool_if:NTF #1 { M } { Don't~m } atch
+ \@@_msg_repeated:nnN {#3} {#4} #5
+ }
+ \__tl_build_one:o \l_@@_internal_a_tl
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux, rEXP]{\@@_show_anchor_to_str:N}
+% The argument is an integer telling us where the anchor is. We
+% convert that to the relevant info.
+% \begin{macrocode}
+\cs_new:Npn \@@_show_anchor_to_str:N #1
+ {
+ anchor~at~
+ \str_case:nnF { #1 }
+ {
+ { \l_@@_min_pos_int } { start~(\iow_char:N\\A) }
+ { \l_@@_start_pos_int } { start~of~match~(\iow_char:N\\G) }
+ { \l_@@_max_pos_int } { end~(\iow_char:N\\Z) }
+ }
+ { <error:~'#1'~not~recognized> }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_show_item_catcode:NnT}
+% Produce a sequence of categories which the catcode bitmap |#2|
+% contains, and show it, indenting the tests on which this catcode
+% constraint applies.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_show_item_catcode:NnT #1#2
+ {
+ \seq_set_split:Nnn \l_@@_internal_seq { } { CBEMTPUDSLOA }
+ \seq_set_filter:NNn \l_@@_internal_seq \l_@@_internal_seq
+ { \int_if_odd_p:n { #2 / \int_use:c { c_@@_catcode_##1_int } } }
+ \@@_show_scope:nn
+ {
+ categories~
+ \seq_map_function:NN \l_@@_internal_seq \use:n
+ , ~
+ \bool_if:NF #1 { negative~ } class
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_show_item_exact_cs:n}
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_show_item_exact_cs:n #1
+ {
+ \seq_set_split:Nnn \l_@@_internal_seq { \scan_stop: } {#1}
+ \seq_set_map:NNn \l_@@_internal_seq
+ \l_@@_internal_seq { \iow_char:N\\##1 }
+ \@@_show_one:n
+ { control~sequence~ \seq_use:Nn \l_@@_internal_seq { ~or~ } }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsection{Building}
+%
+% \subsubsection{Variables used while building}
+%
+% \begin{variable}{\l_@@_min_state_int, \l_@@_max_state_int}
+% The last state that was allocated is $\cs{l_@@_max_state_int}-1$,
+% so that \cs{l_@@_max_state_int} always points to a free state.
+% The \texttt{min_state} variable is $1$, but is included to
+% avoid hard-coding this value everywhere.
+% \begin{macrocode}
+\int_new:N \l_@@_min_state_int
+\int_set:Nn \l_@@_min_state_int { 1 }
+\int_new:N \l_@@_max_state_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_left_state_int, \l_@@_right_state_int}
+% \begin{variable}{\l_@@_left_state_seq, \l_@@_right_state_seq}
+% Alternatives are implemented by branching from a \texttt{left} state
+% into the various choices, then merging those into a \texttt{right}
+% state. We store information about those states in two sequences.
+% Those states are also used to implement group quantifiers. Most
+% often, the left and right pointers only differ by~$1$.
+% \begin{macrocode}
+\int_new:N \l_@@_left_state_int
+\int_new:N \l_@@_right_state_int
+\seq_new:N \l_@@_left_state_seq
+\seq_new:N \l_@@_right_state_seq
+% \end{macrocode}
+% \end{variable}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_capturing_group_int}
+% \cs{l_@@_capturing_group_int} is the \textsc{id} number that will
+% be assigned to a capturing group if one was opened now. This starts
+% at $0$ for the group enclosing the full regular expression, and
+% groups are counted in the order of their left parenthesis, except
+% when encountering \texttt{resetting} groups.
+% \begin{macrocode}
+\int_new:N \l_@@_capturing_group_int
+% \end{macrocode}
+% \end{variable}
+%
+% \subsubsection{Framework}
+%
+% This phase is about going from a compiled regex to an \textsc{nfa}.
+% Each state of the \textsc{nfa} is stored in a \tn{toks}. The
+% operations which can appear in the \tn{toks} are
+% \begin{itemize}
+% \item \cs{@@_action_start_wildcard:} inserted at the start
+% of the regular expression to make it unanchored.
+% \item \cs{@@_action_success:} marks the exit state of the
+% \textsc{nfa}.
+% \item \cs{@@_action_cost:n} \Arg{shift} is a transition from the
+% current \meta{state} to $\meta{state}+\meta{shift}$, which
+% consumes the current character: the target state is saved and will
+% be considered again when matching at the next position.
+% \item \cs{@@_action_free:n} \Arg{shift}, and
+% \cs{@@_action_free_group:n} \Arg{shift} are free transitions,
+% which immediately perform the actions for the state
+% $\meta{state}+\meta{shift}$ of the \textsc{nfa}. They differ in
+% how they detect and avoid infinite loops. For now, we just need to
+% know that the \texttt{group} variant must be used for transitions
+% back to the start of a group.
+% \item \cs{@@_action_submatch:n} \Arg{key} where the \meta{key} is
+% a group number followed by |<| or |>| for the beginning or end of
+% group. This causes the current position in the query to be stored
+% as the \meta{key} submatch boundary.
+% \end{itemize}
+%
+% We strive to preserve the following properties while building.
+% \begin{itemize}
+% \item The current capturing group is
+% $\text{\texttt{capturing_group}}-1$, and if a group is opened now,
+% it will be labelled \texttt{capturing_group}.
+% \item The last allocated state is $\text{\texttt{max_state}}-1$, so
+% \texttt{max_state} is a free state.
+% \item The \texttt{left_state} points to a state to the left of the
+% current group or of the last class.
+% \item The \texttt{right_state} points to a newly created,
+% empty state, with some transitions leading to it.
+% \item The \texttt{left/right} sequences hold a list of the
+% corresponding end-points of nested groups.
+% \end{itemize}
+%
+% \begin{macro}[int]{\@@_build:n, \@@_build:N}
+% The \texttt{n}-type function first compiles its argument. Reset some
+% variables. Allocate two states, and put a wildcard in state $0$
+% (transitions to state $1$ and $0$ state). Then build the regex
+% within a (capturing) group, which will be numbered $0$ (current
+% value of \texttt{capturing_group}). Finally, if the match reaches the
+% last state, it is successful.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_build:n #1
+ {
+ \@@_compile:n {#1}
+ \@@_build:N \l_@@_internal_regex
+ }
+\cs_new_protected:Npn \@@_build:N #1
+ {
+%<trace> \trace_push:nnn { regex } { 1 } { @@_build }
+ \@@_standard_escapechar:
+ \int_zero:N \l_@@_capturing_group_int
+ \int_set_eq:NN \l_@@_max_state_int \l_@@_min_state_int
+ \@@_build_new_state:
+ \@@_build_new_state:
+ \@@_toks_put_right:Nn \l_@@_left_state_int
+ { \@@_action_start_wildcard: }
+ \@@_group:nnnN {#1} { 1 } { 0 } \c_false_bool
+ \@@_toks_put_right:Nn \l_@@_right_state_int
+ { \@@_action_success: }
+%<trace> \@@_trace_states:n { 2 }
+%<trace> \trace_pop:nnn { regex } { 1 } { @@_build }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_build_for_cs:n}
+% When using a regex to match a cs, we don't insert a wildcard, we
+% anchor at the end, and since we ignore submatches, there is no need
+% to surround the expression with a group. However, for branches to
+% work properly at the outer level, we need to put the appropriate
+% \texttt{left} and \texttt{right} states in their sequence.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_build_for_cs:n #1
+ {
+%<trace> \trace_push:nnn { regex } { 1 } { @@_build_for_cs }
+ \int_set_eq:NN \l_@@_max_state_int \l_@@_min_state_int
+ \@@_build_new_state:
+ \@@_build_new_state:
+ \@@_push_lr_states:
+ #1
+ \@@_pop_lr_states:
+ \@@_toks_put_right:Nn \l_@@_right_state_int
+ {
+ \if_int_compare:w \l_@@_current_pos_int = \l_@@_max_pos_int
+ \exp_after:wN \@@_action_success:
+ \fi:
+ }
+%<trace> \@@_trace_states:n { 2 }
+%<trace> \trace_pop:nnn { regex } { 1 } { @@_build_for_cs }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Helpers for building an \textsc{nfa}}
+%
+% \begin{macro}[int]{\@@_push_lr_states:, \@@_pop_lr_states:}
+% When building the regular expression, we keep track of pointers to
+% the left-end and right-end of each group without help from \TeX{}'s
+% grouping.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_push_lr_states:
+ {
+ \seq_push:No \l_@@_left_state_seq
+ { \int_use:N \l_@@_left_state_int }
+ \seq_push:No \l_@@_right_state_seq
+ { \int_use:N \l_@@_right_state_int }
+ }
+\cs_new_protected:Npn \@@_pop_lr_states:
+ {
+ \seq_pop:NN \l_@@_left_state_seq \l_@@_internal_a_tl
+ \int_set:Nn \l_@@_left_state_int \l_@@_internal_a_tl
+ \seq_pop:NN \l_@@_right_state_seq \l_@@_internal_a_tl
+ \int_set:Nn \l_@@_right_state_int \l_@@_internal_a_tl
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]
+% {
+% \@@_build_transition_left:NNN,
+% \@@_build_transition_right:nNn
+% }
+% Add a transition from |#2| to |#3| using the function |#1|. The
+% \texttt{left} function is used for higher priority transitions, and
+% the \texttt{right} function for lower priority transitions (which
+% should be performed later). The signatures differ to reflect the
+% differing usage later on. Both functions could be optimized.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_build_transition_left:NNN #1#2#3
+ { \@@_toks_put_left:Nx #2 { #1 { \int_eval:n { #3 - #2 } } } }
+\cs_new_protected:Npn \@@_build_transition_right:nNn #1#2#3
+ { \@@_toks_put_right:Nx #2 { #1 { \int_eval:n { #3 - #2 } } } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_build_new_state:}
+% Add a new empty state to the \textsc{nfa}. Then update the
+% \texttt{left}, \texttt{right}, and \texttt{max} states, so that the
+% \texttt{right} state is the new empty state, and the \texttt{left}
+% state points to the previously \enquote{current} state.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_build_new_state:
+ {
+%<*trace>
+ \trace:nnx { regex } { 2 }
+ {
+ regex~new~state~
+ L=\int_use:N \l_@@_left_state_int ~ -> ~
+ R=\int_use:N \l_@@_right_state_int ~ -> ~
+ M=\int_use:N \l_@@_max_state_int ~ -> ~
+ \int_eval:n { \l_@@_max_state_int + 1 }
+ }
+%</trace>
+ \@@_toks_clear:N \l_@@_max_state_int
+ \int_set_eq:NN \l_@@_left_state_int \l_@@_right_state_int
+ \int_set_eq:NN \l_@@_right_state_int \l_@@_max_state_int
+ \int_incr:N \l_@@_max_state_int
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_build_transitions_lazyness:NNNNN}
+% This function creates a new state, and puts two transitions starting
+% from the old current state. The order of the transitions is
+% controlled by |#1|, true for lazy quantifiers, and false for greedy
+% quantifiers.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_build_transitions_lazyness:NNNNN #1#2#3#4#5
+ {
+ \@@_build_new_state:
+ \@@_toks_put_right:Nx \l_@@_left_state_int
+ {
+ \if_meaning:w \c_true_bool #1
+ #2 { \int_eval:n { #3 - \l_@@_left_state_int } }
+ #4 { \int_eval:n { #5 - \l_@@_left_state_int } }
+ \else:
+ #4 { \int_eval:n { #5 - \l_@@_left_state_int } }
+ #2 { \int_eval:n { #3 - \l_@@_left_state_int } }
+ \fi:
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Building classes}
+%
+% \begin{macro}[int]{\@@_class:NnnnN}
+% \begin{macro}[int, rEXP]{\@@_tests_action_cost:n}
+% The arguments are: \meta{boolean} \Arg{tests} \Arg{min} \Arg{more}
+% \meta{lazyness}. First store the tests with a trailing
+% \cs{@@_action_cost:n}, in the true branch of
+% \cs{@@_break_point:TF} for positive classes, or the false branch
+% for negative classes. The integer \meta{more} is $0$ for fixed
+% repetitions, $-1$ for unbounded repetitions, and
+% $\meta{max}-\meta{min}$ for a range of repetitions.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_class:NnnnN #1#2#3#4#5
+ {
+ \cs_set:Npx \@@_tests_action_cost:n ##1
+ {
+ \exp_not:n { \exp_not:n {#2} }
+ \bool_if:NTF #1
+ { \@@_break_point:TF { \@@_action_cost:n {##1} } { } }
+ { \@@_break_point:TF { } { \@@_action_cost:n {##1} } }
+ }
+ \if_case:w - #4 \exp_stop_f:
+ \@@_class_repeat:n {#3}
+ \or: \@@_class_repeat:nN {#3} #5
+ \else: \@@_class_repeat:nnN {#3} {#4} #5
+ \fi:
+ }
+\cs_new:Npn \@@_tests_action_cost:n { \@@_action_cost:n }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_class_repeat:n}
+% This is used for a fixed number of repetitions. Build one state for
+% each repetition, with a transition controlled by the tests that we
+% have collected. That works just fine for |#1|${}=0$ repetitions:
+% nothing is built.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_class_repeat:n #1
+ {
+ \prg_replicate:nn {#1}
+ {
+ \@@_build_new_state:
+ \@@_build_transition_right:nNn \@@_tests_action_cost:n
+ \l_@@_left_state_int \l_@@_right_state_int
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_class_repeat:nN}
+% This implements unbounded repetitions of a single class (\emph{e.g.}
+% the |*| and |+| quantifiers). If the minimum number |#1| of
+% repetitions is $0$, then build a transition from the current state
+% to itself governed by the tests, and a free transition to a new
+% state (hence skipping the tests). Otherwise, call
+% \cs{@@_class_repeat:n} for the code to match |#1| repetitions,
+% and add free transitions from the last state to the previous one,
+% and to a new one. In both cases, the order of transitions is
+% controlled by the lazyness boolean |#2|.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_class_repeat:nN #1#2
+ {
+ \if_int_compare:w #1 = 0 \exp_stop_f:
+ \@@_build_transitions_lazyness:NNNNN #2
+ \@@_action_free:n \l_@@_right_state_int
+ \@@_tests_action_cost:n \l_@@_left_state_int
+ \else:
+ \@@_class_repeat:n {#1}
+ \int_set_eq:NN \l_@@_internal_a_int \l_@@_left_state_int
+ \@@_build_transitions_lazyness:NNNNN #2
+ \@@_action_free:n \l_@@_right_state_int
+ \@@_action_free:n \l_@@_internal_a_int
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_class_repeat:nnN}
+% We want to build the code to match from |#1| to $|#1|+|#2|$
+% repetitions. Match |#1| repetitions (can be $0$). Compute the final
+% state of the next construction as \texttt{a}. Build $|#2|>0$ states,
+% each with a transition to the next state governed by the tests, and
+% a transition to the final state \texttt{a}. The computation of
+% \texttt{a} is safe because states are allocated in order, starting
+% from \texttt{max_state}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_class_repeat:nnN #1#2#3
+ {
+ \@@_class_repeat:n {#1}
+ \int_set:Nn \l_@@_internal_a_int
+ { \l_@@_max_state_int + #2 - 1 }
+ \prg_replicate:nn { #2 }
+ {
+ \@@_build_transitions_lazyness:NNNNN #3
+ \@@_action_free:n \l_@@_internal_a_int
+ \@@_tests_action_cost:n \l_@@_right_state_int
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Building groups}
+%
+% \begin{macro}[aux]{\@@_group_aux:nnnnN}
+% Arguments: \Arg{label} \Arg{contents} \Arg{min} \Arg{more}
+% \meta{lazyness}. If \meta{min} is $0$, we need to add a state before
+% building the group, so that the thread which skips the group does
+% not also set the start-point of the submatch. After adding one more
+% state, the \texttt{left_state} is the left end of the group, from
+% which all branches will stem, and the \texttt{right_state} is the
+% right end of the group, and all branches end their course in that
+% state. We store those two integers to be queried for each branch, we
+% build the \textsc{nfa} states for the contents |#2| of the group,
+% and we forget about the two integers. Once this is done, perform the
+% repetition: either exactly |#3| times, or |#3| or more times, or
+% between |#3| and $|#3|+|#4|$ times, with lazyness |#5|. The
+% \meta{label} |#1| is used for submatch tracking. Each of the three
+% auxiliaries expects \texttt{left_state} and \texttt{right_state} to
+% be set properly.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_group_aux:nnnnN #1#2#3#4#5
+ {
+%<trace> \trace_push:nnn { regex } { 1 } { @@_group }
+ \if_int_compare:w #3 = 0 \exp_stop_f:
+ \@@_build_new_state:
+%<assert>\assert_int:n { \l_@@_max_state_int = \l_@@_right_state_int + 1 }
+ \@@_build_transition_right:nNn \@@_action_free_group:n
+ \l_@@_left_state_int \l_@@_right_state_int
+ \fi:
+ \@@_build_new_state:
+ \@@_push_lr_states:
+ #2
+ \@@_pop_lr_states:
+ \if_case:w - #4 \exp_stop_f:
+ \@@_group_repeat:nn {#1} {#3}
+ \or: \@@_group_repeat:nnN {#1} {#3} #5
+ \else: \@@_group_repeat:nnnN {#1} {#3} {#4} #5
+ \fi:
+%<trace> \trace_pop:nnn { regex } { 1 } { @@_group }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_group:nnnN, \@@_group_no_capture:nnnN}
+% Hand to \cs{@@_group_aux:nnnnnN} the label of that group
+% (expanded), and the group itself, with some extra commands to
+% perform.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_group:nnnN #1
+ {
+ \exp_args:No \@@_group_aux:nnnnN
+ { \int_use:N \l_@@_capturing_group_int }
+ {
+ \int_incr:N \l_@@_capturing_group_int
+ #1
+ }
+ }
+\cs_new_protected:Npn \@@_group_no_capture:nnnN
+ { \@@_group_aux:nnnnN { -1 } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_group_resetting:nnnN}
+% \begin{macro}[aux]{\@@_group_resetting_loop:nnNn}
+% Again, hand the label $-1$ to \cs{@@_group_aux:nnnnN}, but this
+% time we work a little bit harder to keep track of the maximum group
+% label at the end of any branch, and to reset the group number at
+% each branch. This relies on the fact that a compiled regex always is
+% a sequence of items of the form \cs{@@_branch:n} \Arg{branch}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_group_resetting:nnnN #1
+ {
+ \@@_group_aux:nnnnN { -1 }
+ {
+ \exp_args:Noo \@@_group_resetting_loop:nnNn
+ { \int_use:N \l_@@_capturing_group_int }
+ { \int_use:N \l_@@_capturing_group_int }
+ #1
+ { ?? \__prg_break:n } { }
+ \__prg_break_point:
+ }
+ }
+\cs_new_protected:Npn \@@_group_resetting_loop:nnNn #1#2#3#4
+ {
+ \use_none:nn #3 { \int_set:Nn \l_@@_capturing_group_int {#1} }
+ \int_set:Nn \l_@@_capturing_group_int {#2}
+ #3 {#4}
+ \exp_args:Nf \@@_group_resetting_loop:nnNn
+ { \int_max:nn {#1} { \l_@@_capturing_group_int } }
+ {#2}
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_branch:n}
+% Add a free transition from the left state of the current group to a
+% brand new state, starting point of this branch. Once the branch is
+% built, add a transition from its last state to the right state of
+% the group. The left and right states of the group are extracted from
+% the relevant sequences.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_branch:n #1
+ {
+%<trace> \trace_push:nnn { regex } { 1 } { @@_branch }
+ \@@_build_new_state:
+ \seq_get:NN \l_@@_left_state_seq \l_@@_internal_a_tl
+ \int_set:Nn \l_@@_left_state_int \l_@@_internal_a_tl
+ \@@_build_transition_right:nNn \@@_action_free:n
+ \l_@@_left_state_int \l_@@_right_state_int
+ #1
+ \seq_get:NN \l_@@_right_state_seq \l_@@_internal_a_tl
+ \@@_build_transition_right:nNn \@@_action_free:n
+ \l_@@_right_state_int \l_@@_internal_a_tl
+%<trace> \trace_pop:nnn { regex } { 1 } { @@_branch }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_group_repeat:nn}
+% This function is called to repeat a group a fixed number of times
+% |#2|; if this is $0$ we remove the group altogether (but don't reset
+% the \texttt{capturing_group} label). Otherwise, the auxiliary
+% \cs{@@_group_repeat_aux:n} copies |#2| times the \tn{toks} for
+% the group, and leaves \texttt{internal_a} pointing to the left end
+% of the last repetition. We only record the submatch information at
+% the last repetition. Finally, add a state at the end (the transition
+% to it has been taken care of by the replicating auxiliary.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_group_repeat:nn #1#2
+ {
+ \if_int_compare:w #2 = 0 \exp_stop_f:
+ \int_set:Nn \l_@@_max_state_int
+ { \l_@@_left_state_int - 1 }
+ \@@_build_new_state:
+ \else:
+ \@@_group_repeat_aux:n {#2}
+ \@@_group_submatches:nNN {#1}
+ \l_@@_internal_a_int \l_@@_right_state_int
+ \@@_build_new_state:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_group_submatches:nNN}
+% This inserts in states |#2| and |#3| the code for tracking
+% submatches of the group |#1|, unless inhibited by a label of $-1$.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_group_submatches:nNN #1#2#3
+ {
+ \if_int_compare:w #1 > - 1 \exp_stop_f:
+ \@@_toks_put_left:Nx #2 { \@@_action_submatch:n { #1 < } }
+ \@@_toks_put_left:Nx #3 { \@@_action_submatch:n { #1 > } }
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_group_repeat_aux:n}
+% Here we repeat \tn{toks} ranging from \texttt{left_state} to
+% \texttt{max_state}, $|#1|>0$ times. First add a transition so that
+% the copies will \enquote{chain} properly. Compute the shift
+% \texttt{c} between the original copy and the last copy we
+% want. Shift the \texttt{right_state} and \texttt{max_state} to their
+% final values. We then want to perform \texttt{c} copy operations. At
+% the end, \texttt{b} is equal to the \texttt{max_state}, and
+% \texttt{a} points to the left of the last copy of the group.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_group_repeat_aux:n #1
+ {
+ \@@_build_transition_right:nNn \@@_action_free:n
+ \l_@@_right_state_int \l_@@_max_state_int
+ \int_set_eq:NN \l_@@_internal_a_int \l_@@_left_state_int
+ \int_set_eq:NN \l_@@_internal_b_int \l_@@_max_state_int
+ \if_int_compare:w \__int_eval:w #1 > 1 \exp_stop_f:
+ \int_set:Nn \l_@@_internal_c_int
+ {
+ ( #1 - 1 )
+ * ( \l_@@_internal_b_int - \l_@@_internal_a_int )
+ }
+ \int_add:Nn \l_@@_right_state_int { \l_@@_internal_c_int }
+ \int_add:Nn \l_@@_max_state_int { \l_@@_internal_c_int }
+ \@@_toks_memcpy:NNn
+ \l_@@_internal_b_int
+ \l_@@_internal_a_int
+ \l_@@_internal_c_int
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_group_repeat:nnN}
+% This function is called to repeat a group at least $n$ times; the
+% case $n=0$ is very different from $n>0$. Assume first that $n=0$.
+% Insert submatch tracking information at the start and end of the
+% group, add a free transition from the right end to the
+% \enquote{true} left state \texttt{a} (remember: in this case we had
+% added an extra state before the left state). This forms the loop,
+% which we break away from by adding a free transition from \texttt{a}
+% to a new state.
+%
+% Now consider the case $n>0$. Repeat the group $n$ times, chaining
+% various copies with a free transition. Add submatch tracking only to
+% the last copy, then add a free transition from the right end back to
+% the left end of the last copy, either before or after the transition
+% to move on towards the rest of the \textsc{nfa}. This transition can
+% end up before submatch tracking, but that is irrelevant since it
+% only does so when going again through the group, recording new
+% matches. Finally, add a state; we already have a transition pointing
+% to it from \cs{@@_group_repeat_aux:n}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_group_repeat:nnN #1#2#3
+ {
+ \if_int_compare:w #2 = 0 \exp_stop_f:
+ \@@_group_submatches:nNN {#1}
+ \l_@@_left_state_int \l_@@_right_state_int
+ \int_set:Nn \l_@@_internal_a_int
+ { \l_@@_left_state_int - 1 }
+ \@@_build_transition_right:nNn \@@_action_free:n
+ \l_@@_right_state_int \l_@@_internal_a_int
+ \@@_build_new_state:
+ \if_meaning:w \c_true_bool #3
+ \@@_build_transition_left:NNN \@@_action_free:n
+ \l_@@_internal_a_int \l_@@_right_state_int
+ \else:
+ \@@_build_transition_right:nNn \@@_action_free:n
+ \l_@@_internal_a_int \l_@@_right_state_int
+ \fi:
+ \else:
+ \@@_group_repeat_aux:n {#2}
+ \@@_group_submatches:nNN {#1}
+ \l_@@_internal_a_int \l_@@_right_state_int
+ \if_meaning:w \c_true_bool #3
+ \@@_build_transition_right:nNn \@@_action_free_group:n
+ \l_@@_right_state_int \l_@@_internal_a_int
+ \else:
+ \@@_build_transition_left:NNN \@@_action_free_group:n
+ \l_@@_right_state_int \l_@@_internal_a_int
+ \fi:
+ \@@_build_new_state:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_group_repeat:nnnN}
+% We wish to repeat the group between |#2| and $|#2|+|#3|$ times, with
+% a lazyness controlled by |#4|. We insert submatch tracking up front:
+% in principle, we could avoid recording submatches for the first |#2|
+% copies of the group, but that forces us to treat specially the case
+% $|#2|=0$. Repeat that group with submatch tracking $|#2|+|#3|$ times
+% (the maximum number of repetitions). Then our goal is to add |#3|
+% transitions from the end of the |#2|-th group, and each subsequent
+% groups, to the end. For a lazy quantifier, we add those transitions
+% to the left states, before submatch tracking. For the greedy case,
+% we add the transitions to the right states, after submatch tracking
+% and the transitions which go on with more repetitions. In the greedy
+% case with $|#2|=0$, the transition which skips over all copies of
+% the group must be added separately, because its starting state does
+% not follow the normal pattern: we had to add it \enquote{by hand}
+% earlier.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_group_repeat:nnnN #1#2#3#4
+ {
+ \@@_group_submatches:nNN {#1}
+ \l_@@_left_state_int \l_@@_right_state_int
+ \@@_group_repeat_aux:n { #2 + #3 }
+ \if_meaning:w \c_true_bool #4
+ \int_set_eq:NN \l_@@_left_state_int \l_@@_max_state_int
+ \prg_replicate:nn { #3 }
+ {
+ \int_sub:Nn \l_@@_left_state_int
+ { \l_@@_internal_b_int - \l_@@_internal_a_int }
+ \@@_build_transition_left:NNN \@@_action_free:n
+ \l_@@_left_state_int \l_@@_max_state_int
+ }
+ \else:
+ \prg_replicate:nn { #3 - 1 }
+ {
+ \int_sub:Nn \l_@@_right_state_int
+ { \l_@@_internal_b_int - \l_@@_internal_a_int }
+ \@@_build_transition_right:nNn \@@_action_free:n
+ \l_@@_right_state_int \l_@@_max_state_int
+ }
+ \if_int_compare:w #2 = 0 \exp_stop_f:
+ \int_set:Nn \l_@@_right_state_int
+ { \l_@@_left_state_int - 1 }
+ \else:
+ \int_sub:Nn \l_@@_right_state_int
+ { \l_@@_internal_b_int - \l_@@_internal_a_int }
+ \fi:
+ \@@_build_transition_right:nNn \@@_action_free:n
+ \l_@@_right_state_int \l_@@_max_state_int
+ \fi:
+ \@@_build_new_state:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Others}
+%
+% \begin{macro}[int]{\@@_assertion:Nn, \@@_b_test:, \@@_anchor:N}
+% Usage: \cs{@@_assertion:Nn} \meta{boolean} \Arg{test}, where the
+% \meta{test} is either of the two other functions. Add a free
+% transition to a new state, conditionally to the assertion test. The
+% \cs{@@_b_test:} test is used by the |\b| and |\B| escape: check
+% if the last character was a word character or not, and do the same
+% to the current character. The boundary-markers of the string are
+% non-word characters for this purpose. Anchors at the start or end
+% of match use \cs{@@_anchor:N}, with a position controlled by the
+% integer |#1|.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_assertion:Nn #1#2
+ {
+ \@@_build_new_state:
+ \@@_toks_put_right:Nx \l_@@_left_state_int
+ {
+ \exp_not:n {#2}
+ \@@_break_point:TF
+ \bool_if:NF #1 { { } }
+ {
+ \@@_action_free:n
+ {
+ \int_eval:n
+ { \l_@@_right_state_int - \l_@@_left_state_int }
+ }
+ }
+ \bool_if:NT #1 { { } }
+ }
+ }
+\cs_new_protected:Npn \@@_anchor:N #1
+ {
+ \if_int_compare:w #1 = \l_@@_current_pos_int
+ \exp_after:wN \@@_break_true:w
+ \fi:
+ }
+\cs_new_protected:Npn \@@_b_test:
+ {
+ \group_begin:
+ \int_set_eq:NN \l_@@_current_char_int \l_@@_last_char_int
+ \@@_prop_w:
+ \@@_break_point:TF
+ { \group_end: \@@_item_reverse:n \@@_prop_w: }
+ { \group_end: \@@_prop_w: }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_command_K:}
+% Change the starting point of the $0$-th submatch (full match), and
+% transition to a new state, pretending that this is a fresh thread.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_command_K:
+ {
+ \@@_build_new_state:
+ \@@_toks_put_right:Nx \l_@@_left_state_int
+ {
+ \@@_action_submatch:n { 0< }
+ \bool_set_true:N \l_@@_fresh_thread_bool
+ \@@_action_free:n
+ { \int_eval:n { \l_@@_right_state_int - \l_@@_left_state_int } }
+ \bool_set_false:N \l_@@_fresh_thread_bool
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsection{Matching}
+%
+% We search for matches by running all the execution threads through the
+% \textsc{nfa} in parallel, reading one token of the query at each step.
+% The \textsc{nfa} contains \enquote{free} transitions to other states,
+% and transitions which \enquote{consume} the current token. For free
+% transitions, the instruction at the new state of the \textsc{nfa} is
+% performed immediately. When a transition consumes a character, the
+% new state is appended to a list of \enquote{active states}, stored in
+% \cs{g_@@_thread_state_intarray}: this thread will be active again when the next
+% token is read from the query. At every step (for each token in the
+% query), we unpack that list of active states and the corresponding
+% submatch props, and empty those.
+%
+% If two paths through the \textsc{nfa} \enquote{collide} in the sense
+% that they reach the same state after reading a given token, then they
+% only differ in how they previously matched, and the future execution
+% will be identical for both. (Note that this would be wrong in the
+% presence of back-references.) Hence, we only need to keep one of the
+% two threads: the thread with the highest priority. Our \textsc{nfa} is
+% built in such a way that higher priority actions always come before
+% lower priority actions, which makes things work.
+%
+% The explanation in the previous paragraph may make us think that we
+% simply need to keep track of which states were visited at a given
+% step: after all, the loop generated when matching |(a?)*| against |a|
+% is broken, isn't it? No. The group first matches |a|, as it should,
+% then repeats; it attempts to match |a| again but fails; it skips |a|,
+% and finds out that this state has already been seen at this position
+% in the query: the match stops. The capturing group is (wrongly) |a|.
+% What went wrong is that a thread collided with itself, and the later
+% version, which has gone through the group one more times with an empty
+% match, should have a higher priority than not going through the group.
+%
+% We solve this by distinguishing \enquote{normal} free transitions
+% \cs{@@_action_free:n} from transitions
+% \cs{@@_action_free_group:n} which go back to the start of the
+% group. The former will keep threads unless they have been visited by a
+% \enquote{completed} thread, while the latter kind of transition also
+% prevents going back to a state visited by the current thread.
+%
+% \subsubsection{Variables used when matching}
+%
+% \begin{variable}
+% {
+% \l_@@_min_pos_int,
+% \l_@@_max_pos_int,
+% \l_@@_current_pos_int,
+% \l_@@_start_pos_int,
+% \l_@@_success_pos_int,
+% }
+% The tokens in the query are indexed from \texttt{min_pos} for the
+% first to $\texttt{max_pos}-1$ for the last, and their information is
+% stored in several arrays and \tn{toks} registers with those numbers. We
+% don't start from $0$ because the \tn{toks} registers with low
+% numbers are used to hold the states of the \textsc{nfa}. We match
+% without backtracking, keeping all threads in lockstep at the
+% \texttt{current_pos} in the query. The starting point of the current
+% match attempt is \texttt{start_pos}, and \texttt{success_pos},
+% updated whenever a thread succeeds, is used as the next starting
+% position.
+% \begin{macrocode}
+\int_new:N \l_@@_min_pos_int
+\int_new:N \l_@@_max_pos_int
+\int_new:N \l_@@_current_pos_int
+\int_new:N \l_@@_start_pos_int
+\int_new:N \l_@@_success_pos_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}
+% {
+% \l_@@_current_char_int,
+% \l_@@_current_catcode_int,
+% \l_@@_last_char_int,
+% \l_@@_case_changed_char_int
+% }
+% The character and category codes of the token at the current
+% position; the character code of the token at the previous position;
+% and the character code of the result of changing the case of the
+% current token (|A-Z|$\leftrightarrow$|a-z|). This last integer is
+% only computed when necessary, and is otherwise \cs{c_max_int}. The
+% \texttt{current_char} variable is also used in various other phases
+% to hold a character code.
+% \begin{macrocode}
+\int_new:N \l_@@_current_char_int
+\int_new:N \l_@@_current_catcode_int
+\int_new:N \l_@@_last_char_int
+\int_new:N \l_@@_case_changed_char_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_current_state_int}
+% For every character in the token list, each of the active states is
+% considered in turn. The variable \cs{l_@@_current_state_int}
+% holds the state of the \textsc{nfa} which is currently considered:
+% transitions are then given as shifts relative to the current state.
+% \begin{macrocode}
+\int_new:N \l_@@_current_state_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}
+% {\l_@@_current_submatches_prop, \l_@@_success_submatches_prop}
+% The submatches for the thread which is currently active are stored
+% in the \texttt{current_submatches} property list variable. This
+% property list is stored by \cs{@@_action_cost:n} into the
+% \tn{toks} register for the target state of the transition, to be
+% retrieved when matching at the next position. When a thread
+% succeeds, this property list is copied to
+% \cs{l_@@_success_submatches_prop}: only the last successful thread
+% will remain there.
+% \begin{macrocode}
+\prop_new:N \l_@@_current_submatches_prop
+\prop_new:N \l_@@_success_submatches_prop
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_step_int}
+% This integer, always even, is increased every time a character in
+% the query is read, and not reset when doing multiple matches. We
+% store in \cs{g_@@_state_active_intarray} the last step in which each
+% \meta{state} in the \textsc{nfa} was encountered. This lets us break
+% infinite loops by not visiting the same state twice in the same
+% step. In fact, the step we store is equal to \texttt{step} when we
+% have started performing the operations of \tn{toks}\meta{state}, but
+% not finished yet. However, once we finish, we store
+% $\text{\texttt{step}}+1$ in \cs{g_@@_state_active_intarray}. This is
+% needed to track submatches
+% properly (see building phase). The \texttt{step} is also used to
+% attach each set of submatch information to a given iteration (and
+% automatically discard it when it corresponds to a past step).
+% \begin{macrocode}
+\int_new:N \l_@@_step_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_min_active_int, \l_@@_max_active_int}
+% All the currently active threads are kept in order of precedence in
+% \cs{g_@@_thread_state_intarray}, and the corresponding submatches in the
+% \tn{toks}. For our purposes, those serve as an array, indexed from
+% \texttt{min_active} (inclusive) to \texttt{max_active} (excluded).
+% At the start of every step, the whole array is unpacked, so that the
+% space can immediately be reused, and \texttt{max_active} is reset to
+% \texttt{min_active}, effectively clearing the array.
+% \begin{macrocode}
+\int_new:N \l_@@_min_active_int
+\int_new:N \l_@@_max_active_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\g_@@_state_active_intarray, \g_@@_thread_state_intarray}
+% \cs{g_@@_state_active_intarray} stores the last \meta{step} in which
+% each \meta{state} was active. \cs{g_@@_thread_state_intarray} stores
+% threads that will be considered in the next step, more precisely the
+% states in which these threads are.
+% \begin{macrocode}
+\__intarray_new:Nn \g_@@_state_active_intarray { 65536 }
+\__intarray_new:Nn \g_@@_thread_state_intarray { 65536 }
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_every_match_tl}
+% Every time a match is found, this token list is used. For single
+% matching, the token list is empty. For multiple matching, the token
+% list is set to repeat the matching, after performing some operation
+% which depends on the user function. See \cs{@@_single_match:} and
+% \cs{@@_multi_match:n}.
+% \begin{macrocode}
+\tl_new:N \l_@@_every_match_tl
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_fresh_thread_bool, \l_@@_empty_success_bool}
+% \begin{macro}[aux]{\@@_if_two_empty_matches:F}
+% When doing multiple matches, we need to avoid infinite loops where
+% each iteration matches the same empty token list. When an empty
+% token list is matched, the next successful match of the same empty
+% token list is suppressed. We detect empty matches by setting
+% \cs{l_@@_fresh_thread_bool} to \texttt{true} for threads which
+% directly come from the start of the regex or from the |\K| command,
+% and testing that boolean whenever a thread succeeds. The function
+% \cs{@@_if_two_empty_matches:F} is redefined at every match
+% attempt, depending on whether the previous match was empty or not:
+% if it was, then the function must cancel a purported success if it
+% is empty and at the same spot as the previous match; otherwise, we
+% definitely don't have two identical empty matches, so the function
+% is \cs{use:n}.
+% \begin{macrocode}
+\bool_new:N \l_@@_fresh_thread_bool
+\bool_new:N \l_@@_empty_success_bool
+\cs_new_eq:NN \@@_if_two_empty_matches:F \use:n
+% \end{macrocode}
+% \end{macro}
+% \end{variable}
+%
+% \begin{variable}
+% {
+% \g_@@_success_bool,
+% \l_@@_saved_success_bool,
+% \l_@@_match_success_bool
+% }
+% The boolean \cs{l_@@_match_success_bool} is true if the current
+% match attempt was successful, and \cs{g_@@_success_bool} is true
+% if there was at least one successful match. This is the only global
+% variable in this whole module, but we would need it to be local when
+% matching a control sequence with |\c{...}|. This is done by saving
+% the global variable into \cs{l_@@_saved_success_bool}, which is
+% local, hence not affected by the changes due to inner regex
+% functions.
+% \begin{macrocode}
+\bool_new:N \g_@@_success_bool
+\bool_new:N \l_@@_saved_success_bool
+\bool_new:N \l_@@_match_success_bool
+% \end{macrocode}
+% \end{variable}
+%
+% \subsubsection{Matching: framework}
+%
+% \begin{macro}[int]{\@@_match:n}
+% First store the query into \tn{toks} registers and arrays (see
+% \cs{@@_query_set:nnn}). Then initialize the variables that should
+% be set once for each user function (even for multiple
+% matches). Namely, the overall matching is not yet successful; none of
+% the states should be marked as visited (\cs{g_@@_state_active_intarray}), and
+% we start at step $0$; we pretend that there was a previous match
+% ending at the start of the query, which was not empty (to avoid
+% smothering an empty match at the start). Once all this is set up, we
+% are ready for the ride. Find the first match.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_match:n #1
+ {
+%<trace> \trace_push:nnx { regex } { 1 } { @@_match }
+%<trace> \trace:nnx { regex } { 1 } { analyzing~query~token~list }
+ \int_zero:N \l_@@_balance_int
+ \int_set:Nn \l_@@_current_pos_int { 2 * \l_@@_max_state_int }
+ \@@_query_set:nnn { } { -1 } { -2 }
+ \int_set_eq:NN \l_@@_min_pos_int \l_@@_current_pos_int
+ \__tl_analysis_map_inline:nn {#1}
+ { \@@_query_set:nnn {##1} {"##2} {##3} }
+ \int_set_eq:NN \l_@@_max_pos_int \l_@@_current_pos_int
+ \@@_query_set:nnn { } { -1 } { -2 }
+%<trace> \trace:nnx { regex } { 1 } { initializing }
+ \bool_gset_false:N \g_@@_success_bool
+ \int_step_inline:nnnn
+ \l_@@_min_state_int { 1 } { \l_@@_max_state_int - 1 }
+ { \__intarray_gset_fast:Nnn \g_@@_state_active_intarray {##1} { 1 } }
+ \int_set_eq:NN \l_@@_min_active_int \l_@@_max_state_int
+ \int_zero:N \l_@@_step_int
+ \int_set_eq:NN \l_@@_success_pos_int \l_@@_min_pos_int
+ \int_set:Nn \l_@@_min_submatch_int
+ { 2 * \l_@@_max_state_int }
+ \int_set_eq:NN \l_@@_submatch_int \l_@@_min_submatch_int
+ \bool_set_false:N \l_@@_empty_success_bool
+ \@@_match_once:
+%<trace> \trace_pop:nnx { regex } { 1 } { @@_match }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_match_once:}
+% This function finds one match, then does some action defined by the
+% \texttt{every_match} token list, which may recursively call
+% \cs{@@_match_once:}. First initialize some variables: set the
+% conditional which detects identical empty matches; this match
+% attempt starts at the previous \texttt{success_pos}, is not yet
+% successful, and has no submatches yet; clear the array of active
+% threads, and put the starting state $0$ in it. We are then almost
+% ready to read our first token in the query, but we actually start
+% one position earlier than the start, and \texttt{get} that token, so
+% that the \texttt{last_char} will be set properly for word
+% boundaries. Then call \cs{@@_match_loop:}, which runs through the
+% query until the end or until a successful match breaks early.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_match_once:
+ {
+ \if_meaning:w \c_true_bool \l_@@_empty_success_bool
+ \cs_set:Npn \@@_if_two_empty_matches:F
+ { \int_compare:nNnF \l_@@_start_pos_int = \l_@@_current_pos_int }
+ \else:
+ \cs_set_eq:NN \@@_if_two_empty_matches:F \use:n
+ \fi:
+ \int_set_eq:NN \l_@@_start_pos_int \l_@@_success_pos_int
+ \bool_set_false:N \l_@@_match_success_bool
+ \prop_clear:N \l_@@_current_submatches_prop
+ \int_set_eq:NN \l_@@_max_active_int \l_@@_min_active_int
+ \@@_store_state:n { \l_@@_min_state_int }
+ \int_set:Nn \l_@@_current_pos_int
+ { \l_@@_start_pos_int - 1 }
+ \@@_query_get:
+ \@@_match_loop:
+ \l_@@_every_match_tl
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_single_match:, \@@_multi_match:n}
+% For a single match, the overall success is determined by whether the
+% only match attempt is a success. When doing multiple matches, the
+% overall matching is successful as soon as any match
+% succeeds. Perform the action |#1|, then find the next match.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_single_match:
+ {
+ \tl_set:Nn \l_@@_every_match_tl
+ { \bool_gset_eq:NN \g_@@_success_bool \l_@@_match_success_bool }
+ }
+\cs_new_protected:Npn \@@_multi_match:n #1
+ {
+ \tl_set:Nn \l_@@_every_match_tl
+ {
+ \if_meaning:w \c_true_bool \l_@@_match_success_bool
+ \bool_gset_true:N \g_@@_success_bool
+ #1
+ \exp_after:wN \@@_match_once:
+ \fi:
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_match_loop:}
+% \begin{macro}[aux, rEXP]{\@@_match_one_active:n}
+% At each new position, set some variables and get the new character
+% and category from the query. Then unpack the array of active
+% threads, and clear it by resetting its length
+% (\texttt{max_active}). This results in a sequence of
+% \cs{@@_use_state_and_submatches:nn} \Arg{state} \Arg{prop}, and
+% we consider those states one by one in order. As soon as a thread
+% succeeds, exit the step, and, if there are threads to consider at the
+% next position, and we have not reached the end of the string,
+% repeat the loop. Otherwise, the last thread that succeeded is what
+% \cs{@@_match_once:} matches. We explain the \texttt{fresh_thread}
+% business when describing \cs{@@_action_wildcard:}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_match_loop:
+ {
+ \int_add:Nn \l_@@_step_int { 2 }
+ \int_incr:N \l_@@_current_pos_int
+ \int_set_eq:NN \l_@@_last_char_int \l_@@_current_char_int
+ \int_set_eq:NN \l_@@_case_changed_char_int \c_max_int
+ \@@_query_get:
+ \use:x
+ {
+ \int_set_eq:NN \l_@@_max_active_int \l_@@_min_active_int
+ \int_step_function:nnnN
+ { \l_@@_min_active_int }
+ { 1 }
+ { \l_@@_max_active_int - 1 }
+ \@@_match_one_active:n
+ }
+ \__prg_break_point:
+ \bool_set_false:N \l_@@_fresh_thread_bool %^^A was arg of break_point:n
+ \if_int_compare:w \l_@@_max_active_int > \l_@@_min_active_int
+ \if_int_compare:w \l_@@_current_pos_int < \l_@@_max_pos_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \@@_match_loop:
+ \fi:
+ \fi:
+ }
+\cs_new:Npn \@@_match_one_active:n #1
+ {
+ \@@_use_state_and_submatches:nn
+ { \__intarray_item_fast:Nn \g_@@_thread_state_intarray {#1} }
+ { \@@_toks_use:w #1 }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_query_set:nnn}
+% The arguments are: tokens that \texttt{o} and \texttt{x} expand to
+% one token of the query, the catcode, and the character code. Store
+% those, and the current brace balance (used later to check for
+% overall brace balance) in a \tn{toks} register and some arrays,
+% then update the \texttt{balance}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_query_set:nnn #1#2#3
+ {
+ \__intarray_gset_fast:Nnn \g_@@_charcode_intarray
+ { \l_@@_current_pos_int } {#3}
+ \__intarray_gset_fast:Nnn \g_@@_catcode_intarray
+ { \l_@@_current_pos_int } {#2}
+ \__intarray_gset_fast:Nnn \g_@@_balance_intarray
+ { \l_@@_current_pos_int } { \l_@@_balance_int }
+ \@@_toks_set:Nn \l_@@_current_pos_int {#1}
+ \int_incr:N \l_@@_current_pos_int
+ \if_case:w #2 \exp_stop_f:
+ \or: \int_incr:N \l_@@_balance_int
+ \or: \int_decr:N \l_@@_balance_int
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_query_get:}
+% Extract the current character and category codes at the current
+% position from the appropriate arrays.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_query_get:
+ {
+ \l_@@_current_char_int
+ = \__intarray_item_fast:Nn \g_@@_charcode_intarray
+ { \l_@@_current_pos_int } \scan_stop:
+ \l_@@_current_catcode_int
+ = \__intarray_item_fast:Nn \g_@@_catcode_intarray
+ { \l_@@_current_pos_int } \scan_stop:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Using states of the \textsc{nfa}}
+%
+% \begin{macro}[int]{\@@_use_state:}
+% Use the current \textsc{nfa} instruction. The state is initially
+% marked as belonging to the current \texttt{step}: this allows normal
+% free transition to repeat, but group-repeating transitions
+% won't. Once we are done exploring all the branches it spawned, the
+% state is marked as $\texttt{step}+1$: any thread hitting it at that
+% point will be terminated.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_use_state:
+ {
+%<*trace>
+ \trace:nnx { regex } { 2 } { state~\int_use:N \l_@@_current_state_int }
+%</trace>
+ \__intarray_gset_fast:Nnn \g_@@_state_active_intarray
+ { \l_@@_current_state_int } { \l_@@_step_int }
+ \@@_toks_use:w \l_@@_current_state_int
+ \__intarray_gset_fast:Nnn \g_@@_state_active_intarray
+ { \l_@@_current_state_int } { \l_@@_step_int + 1 }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_use_state_and_submatches:nn}
+% This function is called as one item in the array of active threads
+% after that array has been unpacked for a new step. Update the
+% \texttt{current_state} and \texttt{current_submatches} and use the
+% state if it has not yet been encountered at this step.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_use_state_and_submatches:nn #1 #2
+ {
+ \int_set:Nn \l_@@_current_state_int {#1}
+ \if_int_compare:w
+ \__intarray_item_fast:Nn \g_@@_state_active_intarray
+ { \l_@@_current_state_int }
+ < \l_@@_step_int
+ \tl_set:Nn \l_@@_current_submatches_prop {#2}
+ \exp_after:wN \@@_use_state:
+ \fi:
+ \scan_stop:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Actions when matching}
+%
+% \begin{macro}[int]{\@@_action_start_wildcard:}
+% For an unanchored match, state $0$ has a free transition to the next
+% and a costly one to itself, to repeat at the next position. To catch
+% repeated identical empty matches, we need to know if a successful
+% thread corresponds to an empty match. The instruction resetting
+% \cs{l_@@_fresh_thread_bool} may be skipped by a successful
+% thread, hence we had to add it to \cs{@@_match_loop:} too.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_action_start_wildcard:
+ {
+ \bool_set_true:N \l_@@_fresh_thread_bool
+ \@@_action_free:n {1}
+ \bool_set_false:N \l_@@_fresh_thread_bool
+ \@@_action_cost:n {0}
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_action_free:n, \@@_action_free_group:n}
+% \begin{macro}[aux]{\@@_action_free_aux:nn}
+% These functions copy a thread after checking that the \textsc{nfa}
+% state has not already been used at this position. If not, store
+% submatches in the new state, and insert the instructions for that
+% state in the input stream. Then restore the old value of
+% \cs{l_@@_current_state_int} and of the current submatches. The
+% two types of free transitions differ by how they test that the state
+% has not been encountered yet: the \texttt{group} version is
+% stricter, and will not use a state if it was used earlier in the
+% current thread, hence forcefully breaking the loop, while the
+% \enquote{normal} version will revisit a state when within the thread
+% itself.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_action_free:n
+ { \@@_action_free_aux:nn { > \l_@@_step_int \else: } }
+\cs_new_protected:Npn \@@_action_free_group:n
+ { \@@_action_free_aux:nn { < \l_@@_step_int } }
+\cs_new_protected:Npn \@@_action_free_aux:nn #1#2
+ {
+ \use:x
+ {
+ \int_add:Nn \l_@@_current_state_int {#2}
+ \exp_not:n
+ {
+ \if_int_compare:w
+ \__intarray_item_fast:Nn \g_@@_state_active_intarray
+ { \l_@@_current_state_int }
+ #1
+ \exp_after:wN \@@_use_state:
+ \fi:
+ }
+ \int_set:Nn \l_@@_current_state_int
+ { \int_use:N \l_@@_current_state_int }
+ \tl_set:Nn \exp_not:N \l_@@_current_submatches_prop
+ { \exp_not:o \l_@@_current_submatches_prop }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_action_cost:n}
+% A transition which consumes the current character and shifts the
+% state by |#1|. The resulting state is stored in the appropriate array
+% for use at the next position, and we also store the current
+% submatches.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_action_cost:n #1
+ {
+ \exp_args:No \@@_store_state:n
+ { \__int_value:w \__int_eval:w \l_@@_current_state_int + #1 }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_store_state:n}
+% \begin{macro}[aux]{\@@_store_submatches:}
+% Put the given state in \cs{g_@@_thread_state_intarray}, and increment
+% the length of the array. Also store the current submatch in the
+% appropriate \tn{toks}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_store_state:n #1
+ {
+ \@@_store_submatches:
+ \__intarray_gset_fast:Nnn \g_@@_thread_state_intarray
+ { \l_@@_max_active_int } {#1}
+ \int_incr:N \l_@@_max_active_int
+ }
+\cs_new_protected:Npn \@@_store_submatches:
+ {
+ \@@_toks_set:No \l_@@_max_active_int
+ { \l_@@_current_submatches_prop }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_disable_submatches:}
+% Some user functions don't require tracking submatches.
+% We get a performance improvement by simply defining the
+% relevant functions to remove their argument and do nothing
+% with it.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_disable_submatches:
+ {
+ \cs_set_protected:Npn \@@_store_submatches: { }
+ \cs_set_protected:Npn \@@_action_submatch:n ##1 { }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_action_submatch:n}
+% Update the current submatches with the information from the current
+% position. Maybe a bottleneck.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_action_submatch:n #1
+ {
+ \prop_put:Nno \l_@@_current_submatches_prop {#1}
+ { \int_use:N \l_@@_current_pos_int }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_action_success:}
+% There is a successful match when an execution path reaches the last
+% state in the \textsc{nfa}, unless this marks a second identical
+% empty match. Then mark that there was a successful match; it is
+% empty if it is \enquote{fresh}; and we store the current position
+% and submatches. The current step is then interrupted with
+% \cs{__prg_break:}, and only paths with higher precedence are
+% pursued further. The values stored here may be overwritten by a
+% later success of a path with higher precedence.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_action_success:
+ {
+ \@@_if_two_empty_matches:F
+ {
+ \bool_set_true:N \l_@@_match_success_bool
+ \bool_set_eq:NN \l_@@_empty_success_bool
+ \l_@@_fresh_thread_bool
+ \int_set_eq:NN \l_@@_success_pos_int \l_@@_current_pos_int
+ \prop_set_eq:NN \l_@@_success_submatches_prop
+ \l_@@_current_submatches_prop
+ \__prg_break:
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsection{Replacement}
+%
+% \subsubsection{Variables and helpers used in replacement}
+%
+% \begin{variable}{\l_@@_replacement_csnames_int}
+% The behaviour of closing braces inside a replacement text depends on
+% whether a sequences |\c{| or |\u{| has been encountered. The number
+% of \enquote{open} such sequences that should be closed by |}| is
+% stored in \cs{l_@@_replacement_csnames_int}, and decreased by
+% $1$ by each |}|.
+% \begin{macrocode}
+\int_new:N \l_@@_replacement_csnames_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_replacement_category_tl, \l_@@_replacement_category_seq}
+% This sequence of letters is used to correctly restore categories in
+% nested constructions such as |\cL(abc\cD(_)d)|.
+% \begin{macrocode}
+\tl_new:N \l_@@_replacement_category_tl
+\seq_new:N \l_@@_replacement_category_seq
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_balance_tl}
+% This token list holds the replacement text for
+% \cs{@@_replacement_balance_one_match:n} while it is being built
+% incrementally.
+% \begin{macrocode}
+\tl_new:N \l_@@_balance_tl
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{macro}[aux, rEXP]{\@@_replacement_balance_one_match:n}
+% This expects as an argument the first index of a set of entries in
+% \cs{g_@@_submatch_begin_intarray} (and related arrays) which hold the
+% submatch information for a given match. It
+% can be used within an integer expression to obtain the brace balance
+% incurred by performing the replacement on that match. This combines
+% the braces lost by removing the match, braces added by all the
+% submatches appearing in the replacement, and braces appearing
+% explicitly in the replacement. Even though it is always redefined
+% before use, we initialize it as for an empty replacement. An
+% important property is that concatenating several calls to that
+% function must result in a valid integer expression (hence a leading
+% |+| in the actual definition).
+% \begin{macrocode}
+\cs_new:Npn \@@_replacement_balance_one_match:n #1
+ { - \@@_submatch_balance:n {#1} }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux, rEXP]{\@@_replacement_do_one_match:n}
+% The input is the same as \cs{@@_replacement_balance_one_match:n}.
+% This function is redefined to expand to the part of the token list
+% from the end of the previous match to a given match, followed by the
+% replacement text. Hence concatenating the result of this function
+% with all possible arguments (one call for each match), as well as
+% the range from the end of the last match to the end of the string,
+% will produce the fully replaced token list. The initialization does
+% not matter, but (as an example) we set it as for an empty replacement.
+% \begin{macrocode}
+\cs_new:Npn \@@_replacement_do_one_match:n #1
+ {
+ \@@_query_range:nn
+ { \__intarray_item_fast:Nn \g_@@_submatch_prev_intarray {#1} }
+ { \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray {#1} }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_exp_not:N}
+% This function lets us navigate around the fact that the primitive
+% \cs{exp_not:n} requires a braced argument. As far as I can tell, it
+% is only needed if the user tries to include in the replacement text
+% a control sequence set equal to a macro parameter character, such as
+% \cs{c_parameter_token}. Indeed, within an \texttt{x}-expanding
+% assignment, \cs{exp_not:N}~|#| behaves as a single |#|, whereas
+% \cs{exp_not:n}~|{#}| behaves as a doubled |##|.
+% \begin{macrocode}
+\cs_new:Npn \@@_replacement_exp_not:N #1 { \exp_not:n {#1} }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Query and brace balance}
+%
+% \begin{macro}[int, rEXP]{\@@_query_range:nn}
+% \begin{macro}[aux, rEXP]{\@@_query_range_loop:ww}
+% When it is time to extract submatches from the token list, the
+% various tokens are stored in \tn{toks} registers numbered from
+% \cs{l_@@_min_pos_int} inclusive to \cs{l_@@_max_pos_int}
+% exclusive. The function \cs{@@_query_range:nn} \Arg{min}
+% \Arg{max} unpacks registers from the position \meta{min} to the
+% position $\meta{max}-1$ included. Once this is expanded, a second
+% \texttt{x}-expansion will result in the actual tokens from the
+% query. That second expansion is only done by user functions at the
+% very end of their operation, after checking (and correcting) the
+% brace balance first.
+% \begin{macrocode}
+\cs_new:Npn \@@_query_range:nn #1#2
+ {
+ \exp_after:wN \@@_query_range_loop:ww
+ \__int_value:w \__int_eval:w #1 \exp_after:wN ;
+ \__int_value:w \__int_eval:w #2 ;
+ \__prg_break_point:
+ }
+\cs_new:Npn \@@_query_range_loop:ww #1 ; #2 ;
+ {
+ \if_int_compare:w #1 < #2 \exp_stop_f:
+ \else:
+ \exp_after:wN \__prg_break:
+ \fi:
+ \@@_toks_use:w #1 \exp_stop_f:
+ \exp_after:wN \@@_query_range_loop:ww
+ \__int_value:w \__int_eval:w #1 + 1 ; #2 ;
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_query_submatch:n}
+% Find the start and end positions for a given submatch (of a given match).
+% \begin{macrocode}
+\cs_new:Npn \@@_query_submatch:n #1
+ {
+ \@@_query_range:nn
+ { \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray {#1} }
+ { \__intarray_item_fast:Nn \g_@@_submatch_end_intarray {#1} }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[rEXP]{\@@_submatch_balance:n}
+% Every user function must result in a balanced token list (unbalanced
+% token lists cannot be stored by TeX). When we unpacked the query, we
+% kept track of the brace balance, hence the contribution from a given
+% range is the difference between the brace balances at the
+% \meta{max~pos} and \meta{min~pos}. These two positions are found in
+% the corresponding \enquote{submatch} arrays.
+%^^A todo: understand when these int_compare are needed
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_submatch_balance:n #1
+ {
+ \__int_eval:w
+ \int_compare:nNnTF
+ { \__intarray_item_fast:Nn \g_@@_submatch_end_intarray {#1} } = 0
+ { 0 }
+ {
+ \__intarray_item_fast:Nn \g_@@_balance_intarray
+ { \__intarray_item_fast:Nn \g_@@_submatch_end_intarray {#1} }
+ }
+ -
+ \int_compare:nNnTF
+ { \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray {#1} } = 0
+ { 0 }
+ {
+ \__intarray_item_fast:Nn \g_@@_balance_intarray
+ { \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray {#1} }
+ }
+ \__int_eval_end:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Framework}
+%
+% \begin{macro}[int]{\@@_replacement:n}
+% \begin{macro}[aux]{\@@_replacement_aux:n}
+% The replacement text is built incrementally by abusing \tn{toks}
+% within a group (see \pkg{l3tl-build}). We keep track in
+% \cs{l_@@_balance_int} of the balance of explicit begin- and
+% end-group tokens and \cs{l_@@_balance_tl} will consist of some
+% code to compute the brace balance from submatches (see its
+% description). Detect unescaped right braces, and escaped characters,
+% with trailing \cs{prg_do_nothing:} because some of the later
+% function look-ahead. Once the whole replacement text has been
+% parsed, make sure that there is no open csname. Finally, define the
+% \texttt{balance_one_match} and \texttt{do_one_match} functions.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement:n #1
+ {
+%<trace> \trace_push:nnn { regex } { 1 } { @@_replacement:n }
+ \__tl_build:Nw \l_@@_internal_a_tl
+ \int_zero:N \l_@@_balance_int
+ \tl_clear:N \l_@@_balance_tl
+ \@@_escape_use:nnnn
+ {
+ \if_charcode:w \c_right_brace_str ##1
+ \@@_replacement_rbrace:N
+ \else:
+ \@@_replacement_normal:n
+ \fi:
+ ##1
+ }
+ { \@@_replacement_escaped:N ##1 }
+ { \@@_replacement_normal:n ##1 }
+ {#1}
+ \prg_do_nothing: \prg_do_nothing:
+ \if_int_compare:w \l_@@_replacement_csnames_int > 0 \exp_stop_f:
+ \__msg_kernel_error:nnx { regex } { replacement-missing-rbrace }
+ { \int_use:N \l_@@_replacement_csnames_int }
+ \__tl_build_one:x
+ { \prg_replicate:nn \l_@@_replacement_csnames_int \cs_end: }
+ \fi:
+ \seq_if_empty:NF \l_@@_replacement_category_seq
+ {
+ \__msg_kernel_error:nnx { regex } { replacement-missing-rparen }
+ { \seq_count:N \l_@@_replacement_category_seq }
+ \seq_clear:N \l_@@_replacement_category_seq
+ }
+ \cs_gset:Npx \@@_replacement_balance_one_match:n ##1
+ {
+ + \int_use:N \l_@@_balance_int
+ \l_@@_balance_tl
+ - \@@_submatch_balance:n {##1}
+ }
+ \__tl_build_end:
+ \exp_args:No \@@_replacement_aux:n \l_@@_internal_a_tl
+%<trace> \trace_pop:nnn { regex } { 1 } { @@_replacement:n }
+ }
+\cs_new_protected:Npn \@@_replacement_aux:n #1
+ {
+ \cs_set:Npn \@@_replacement_do_one_match:n ##1
+ {
+ \@@_query_range:nn
+ { \__intarray_item_fast:Nn \g_@@_submatch_prev_intarray {##1} }
+ { \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray {##1} }
+ #1
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_normal:n}
+% Most characters are simply sent to the output by
+% \cs{__tl_build_one:n}, unless a particular category code has been
+% requested: then \cs{@@_replacement_c_A:w} or a similar auxiliary is
+% called. One exception is right parentheses, which restore the
+% category code in place before the group started. Note that the
+% sequence is non-empty there: it contains an empty entry
+% corresponding to the initial value of
+% \cs{l_@@_replacement_category_tl}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_normal:n #1
+ {
+ \tl_if_empty:NTF \l_@@_replacement_category_tl
+ { \__tl_build_one:n {#1} }
+ { % (
+ \token_if_eq_charcode:NNTF #1 )
+ {
+ \seq_pop:NN \l_@@_replacement_category_seq
+ \l_@@_replacement_category_tl
+ }
+ {
+ \use:c { @@_replacement_c_ \l_@@_replacement_category_tl :w }
+ \@@_replacement_normal:n {#1}
+ }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_escaped:N}
+% As in parsing a regular expression, we use an auxiliary built from
+% |#1| if defined. Otherwise, check for escaped digits (standing from
+% submatches from $0$ to $9$): anything else is a raw character.
+% We use \cs{token_to_str:N} to give spaces the right category code.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_escaped:N #1
+ {
+ \cs_if_exist_use:cF { @@_replacement_#1:w }
+ {
+ \if_int_compare:w 1 < 1#1 \exp_stop_f:
+ \@@_replacement_put_submatch:n {#1}
+ \else:
+ \exp_args:No \@@_replacement_normal:n
+ { \token_to_str:N #1 }
+ \fi:
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Submatches}
+%
+% \begin{macro}[aux]{\@@_replacement_put_submatch:n}
+% Insert a submatch in the replacement text. This is dropped if the
+% submatch number is larger than the number of capturing groups.
+% Unless the submatch appears inside a |\c{...}| or |\u{...}|
+% construction, it must be taken into account in the brace balance.
+% Here, |##1| will receive a pointer to the $0$-th submatch for a
+% given match. We cannot use \cs{int_eval:n} because it is
+% expandable, and would be expanded too early (short of adding
+% \cs{exp_not:N}, making the code messy again).
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_put_submatch:n #1
+ {
+ \if_int_compare:w #1 < \l_@@_capturing_group_int
+ \__tl_build_one:n { \@@_query_submatch:n { #1 + ##1 } }
+ \if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
+ \tl_put_right:Nn \l_@@_balance_tl
+ { + \@@_submatch_balance:n { \__int_eval:w #1+##1 \__int_eval_end: } }
+ \fi:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_g:w}
+% \begin{macro}[aux,rEXP]{\@@_replacement_g_digits:NN}
+% Grab digits for the |\g| escape sequence in a primitive assignment
+% to the integer \cs{l_@@_internal_a_int}. At the end of the run of
+% digits, check that it ends with a right brace.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_g:w #1#2
+ {
+ \str_if_eq_x:nnTF { #1#2 } { \@@_replacement_normal:n \c_left_brace_str }
+ { \l_@@_internal_a_int = \@@_replacement_g_digits:NN }
+ { \@@_replacement_error:NNN g #1 #2 }
+ }
+\cs_new:Npn \@@_replacement_g_digits:NN #1#2
+ {
+ \token_if_eq_meaning:NNTF #1 \@@_replacement_normal:n
+ {
+ \if_int_compare:w 1 < 1#2 \exp_stop_f:
+ #2
+ \exp_after:wN \use_i:nnn
+ \exp_after:wN \@@_replacement_g_digits:NN
+ \else:
+ \exp_stop_f:
+ \exp_after:wN \@@_replacement_error:NNN
+ \exp_after:wN g
+ \fi:
+ }
+ {
+ \exp_stop_f:
+ \if_meaning:w \@@_replacement_rbrace:N #1
+ \exp_args:No \@@_replacement_put_submatch:n
+ { \int_use:N \l_@@_internal_a_int }
+ \exp_after:wN \use_none:nn
+ \else:
+ \exp_after:wN \@@_replacement_error:NNN
+ \exp_after:wN g
+ \fi:
+ }
+ #1 #2
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \subsubsection{Csnames in replacement}
+%
+% \begin{macro}[aux]{\@@_replacement_c:w}
+% |\c| may only be followed by an unescaped character. If followed by
+% a left brace, start a control sequence by calling an auxiliary
+% common with |\u|. Otherwise test whether the category is known; if
+% it is not, complain.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_c:w #1#2
+ {
+ \token_if_eq_meaning:NNTF #1 \@@_replacement_normal:n
+ {
+ \exp_after:wN \token_if_eq_charcode:NNTF \c_left_brace_str #2
+ { \@@_replacement_cu_aux:Nw \@@_replacement_exp_not:N }
+ {
+ \cs_if_exist:cTF { @@_replacement_c_#2:w }
+ { \@@_replacement_cat:NNN #2 }
+ { \@@_replacement_error:NNN c #1#2 }
+ }
+ }
+ { \@@_replacement_error:NNN c #1#2 }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_cu_aux:Nw}
+% Start a control sequence with \cs{cs:w}, which will be protected
+% from expansion by |#1| (either \cs{@@_replacement_exp_not:N} or
+% \cs{exp_not:V}), or turned to a string by \cs{tl_to_str:V} if inside
+% another csname construction |\c| or |\u|. We use \cs{tl_to_str:V}
+% rather than \cs{tl_to_str:N} to deal with integers and other
+% registers.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_cu_aux:Nw #1
+ {
+ \if_case:w \l_@@_replacement_csnames_int
+ \__tl_build_one:n { \exp_not:n { \exp_after:wN #1 \cs:w } }
+ \else:
+ \__tl_build_one:n { \exp_not:n { \exp_after:wN \tl_to_str:V \cs:w } }
+ \fi:
+ \int_incr:N \l_@@_replacement_csnames_int
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_u:w}
+% Check that |\u| is followed by a left brace. If so, start a control
+% sequence with \cs{cs:w}, which is then unpacked either with
+% \cs{exp_not:V} or \cs{tl_to_str:V} depending on the current context.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_u:w #1#2
+ {
+ \str_if_eq_x:nnTF { #1#2 } { \@@_replacement_normal:n \c_left_brace_str }
+ { \@@_replacement_cu_aux:Nw \exp_not:V }
+ { \@@_replacement_error:NNN u #1#2 }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_rbrace:N}
+% Within a |\c{...}| or |\u{...}| construction, end the control
+% sequence, and decrease the brace count. Otherwise, this is a raw
+% right brace.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_rbrace:N #1
+ {
+ \if_int_compare:w \l_@@_replacement_csnames_int > 0 \exp_stop_f:
+ \__tl_build_one:n \cs_end:
+ \int_decr:N \l_@@_replacement_csnames_int
+ \else:
+ \@@_replacement_normal:n {#1}
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Characters in replacement}
+%
+% \begin{macro}[aux]{\@@_replacement_cat:NNN}
+% Here, |#1| is a letter among |BEMTPUDSLOA| and |#2#3| denote the
+% next character. Complain if we reach the end of the replacement or
+% if the construction appears inside |\c{|\ldots{}|}| or
+% |\u{|\ldots{}|}|, and detect the case of a parenthesis. In that
+% case, store the current category in a sequence and switch to a new
+% one.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_cat:NNN #1#2#3
+ {
+ \token_if_eq_meaning:NNTF \prg_do_nothing: #3
+ { \__msg_kernel_error:nn { regex } { replacement-catcode-end } }
+ {
+ \int_compare:nNnTF { \l_@@_replacement_csnames_int } > 0
+ {
+ \__msg_kernel_error:nnnn
+ { regex } { replacement-catcode-in-cs } {#1} {#3}
+ #2 #3
+ }
+ {
+ \str_if_eq:nnTF { #2 #3 } { \@@_replacement_normal:n ( } % )
+ {
+ \seq_push:NV \l_@@_replacement_category_seq
+ \l_@@_replacement_category_tl
+ \tl_set:Nn \l_@@_replacement_category_tl {#1}
+ }
+ { \use:c { @@_replacement_c_#1:w } #2 #3 }
+ }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% We will need to change the category code of the null character many
+% times, hence work in a group. The catcode-specific macros below are
+% defined in alphabetical order; if you are trying to understand the
+% code, start from the end of the alphabet as those categories are
+% simpler than active or begin-group.
+% \begin{macrocode}
+\group_begin:
+% \end{macrocode}
+%
+% \begin{macro}[aux]{\@@_replacement_char:nNN}
+% The only way to produce an arbitrary character--catcode pair is to
+% use the \tn{lowercase} or \tn{uppercase} primitives. This is a
+% wrapper for our purposes. The first argument is the null character
+% with various catcodes. The second and third arguments are grabbed
+% from the input stream: |#3| is the character whose character code to
+% reproduce. We could use \cs{char_generate:nn} but only for some
+% catcodes (active characters and spaces are not supported).
+% \begin{macrocode}
+ \cs_new_protected:Npn \@@_replacement_char:nNN #1#2#3
+ {
+ \tex_lccode:D 0 = `#3 \scan_stop:
+ \tex_lowercase:D { \__tl_build_one:n {#1} }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_A:w}
+% For an active character, expansion must be avoided, twice because we
+% later do two \texttt{x}-expansions, to unpack \tn{toks} for the
+% query, and to expand their contents to tokens of the query.
+% \begin{macrocode}
+ \char_set_catcode_active:N \^^@
+ \cs_new_protected:Npn \@@_replacement_c_A:w
+ { \@@_replacement_char:nNN { \exp_not:n { \exp_not:N ^^@ } } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_B:w}
+% An explicit begin-group token increases the balance, unless within a
+% |\c{...}| or |\u{...}| construction. Add the desired begin-group
+% character, using the standard \cs{if_false:} trick. We eventually
+% \texttt{x}-expand twice. The first time must yield a balanced token
+% list, and the second one gives the bare begin-group token. The
+% \cs{exp_after:wN} is not strictly needed, but is more consistent
+% with \pkg{l3tl-analysis}.
+% \begin{macrocode}
+ \char_set_catcode_group_begin:N \^^@
+ \cs_new_protected:Npn \@@_replacement_c_B:w
+ {
+ \if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
+ \int_incr:N \l_@@_balance_int
+ \fi:
+ \@@_replacement_char:nNN
+ { \exp_not:n { \exp_after:wN ^^@ \if_false: } \fi: } }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_C:w}
+% This is not quite catcode-related: when the user requests a
+% character with category \enquote{control sequence}, the
+% one-character control symbol is returned. As for the active
+% character, we prepare for two \texttt{x}-expansions.
+% \begin{macrocode}
+ \cs_new_protected:Npn \@@_replacement_c_C:w #1#2
+ { \__tl_build_one:n { \exp_not:N \exp_not:N \exp_not:c {#2} } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_D:w}
+% Subscripts fit the mould: \tn{lowercase} the null byte with the
+% correct category.
+% \begin{macrocode}
+ \char_set_catcode_math_subscript:N \^^@
+ \cs_new_protected:Npn \@@_replacement_c_D:w
+ { \@@_replacement_char:nNN { ^^@ } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_E:w}
+% Similar to the begin-group case, the second \texttt{x}-expansion
+% produces the bare end-group token.
+% \begin{macrocode}
+ \char_set_catcode_group_end:N \^^@
+ \cs_new_protected:Npn \@@_replacement_c_E:w
+ {
+ \if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
+ \int_decr:N \l_@@_balance_int
+ \fi:
+ \@@_replacement_char:nNN
+ { \exp_not:n { \if_false: { \fi: ^^@ } }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_L:w}
+% Simply \tn{lowercase} a letter null byte to produce an arbitrary letter.
+% \begin{macrocode}
+ \char_set_catcode_letter:N \^^@
+ \cs_new_protected:Npn \@@_replacement_c_L:w
+ { \@@_replacement_char:nNN { ^^@ } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_M:w}
+% No surprise here, we lowercase the null math toggle.
+% \begin{macrocode}
+ \char_set_catcode_math_toggle:N \^^@
+ \cs_new_protected:Npn \@@_replacement_c_M:w
+ { \@@_replacement_char:nNN { ^^@ } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_O:w}
+% Lowercase an other null byte.
+% \begin{macrocode}
+ \char_set_catcode_other:N \^^@
+ \cs_new_protected:Npn \@@_replacement_c_O:w
+ { \@@_replacement_char:nNN { ^^@ } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_P:w}
+% For macro parameters, expansion is a tricky issue. We need to
+% prepare for two \texttt{x}-expansions and passing through various
+% macro definitions. Note that we cannot replace one \cs{exp_not:n} by
+% doubling the macro parameter characters because this would misbehave
+% if a mischievous user asks for |\c{\cP\#}|, since that macro
+% parameter character would be doubled.
+% \begin{macrocode}
+ \char_set_catcode_parameter:N \^^@
+ \cs_new_protected:Npn \@@_replacement_c_P:w
+ {
+ \@@_replacement_char:nNN
+ { \exp_not:n { \exp_not:n { ^^@^^@^^@^^@ } } }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_S:w}
+% Spaces are normalized on input by \TeX{} to have character code
+% $32$. It is in fact impossible to get a token with character code
+% $0$ and category code $10$. Hence we use $32$ instead of $0$ as our
+% base character.
+% \begin{macrocode}
+ \cs_new_protected:Npn \@@_replacement_c_S:w #1#2
+ {
+ \if_int_compare:w `#2 = 0 \exp_stop_f:
+ \__msg_kernel_error:nn { regex } { replacement-null-space }
+ \fi:
+ \tex_lccode:D `\ = `#2 \scan_stop:
+ \tex_lowercase:D { \__tl_build_one:n {~} }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_T:w}
+% No surprise for alignment tabs here. Those are surrounded by the
+% appropriate braces whenever necessary, hence they don't cause
+% trouble in alignment settings.
+% \begin{macrocode}
+ \char_set_catcode_alignment:N \^^@
+ \cs_new_protected:Npn \@@_replacement_c_T:w
+ { \@@_replacement_char:nNN { ^^@ } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replacement_c_U:w}
+% Simple call to \cs{@@_replacement_char:nNN} which lowercases the
+% math superscript |^^@|.
+% \begin{macrocode}
+ \char_set_catcode_math_superscript:N \^^@
+ \cs_new_protected:Npn \@@_replacement_c_U:w
+ { \@@_replacement_char:nNN { ^^@ } }
+% \end{macrocode}
+% \end{macro}
+%
+% Restore the catcode of the null byte.
+% \begin{macrocode}
+\group_end:
+% \end{macrocode}
+%
+% \subsubsection{An error}
+%
+% \begin{macro}[aux]{\@@_replacement_error:NNN}
+% Simple error reporting by calling one of the messages
+% \texttt{replacement-c}, \texttt{replacement-g}, or
+% \texttt{replacement-u}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_error:NNN #1#2#3
+ {
+ \__msg_kernel_error:nnx { regex } { replacement-#1 } {#3}
+ #2 #3
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsection{User functions}
+%
+% \begin{macro}{\regex_new:N}
+% Before being assigned a sensible value, a regex variable matches
+% nothing.
+% \begin{macrocode}
+\cs_new_protected:Npn \regex_new:N #1
+ { \cs_new_eq:NN #1 \c_@@_no_match_regex }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\regex_set:Nn, \regex_gset:Nn, \regex_const:Nn}
+% Compile, then store the result in the user variable with the
+% appropriate assignment function.
+% \begin{macrocode}
+\cs_new_protected:Npn \regex_set:Nn #1#2
+ {
+ \@@_compile:n {#2}
+ \tl_set_eq:NN #1 \l_@@_internal_regex
+ }
+\cs_new_protected:Npn \regex_gset:Nn #1#2
+ {
+ \@@_compile:n {#2}
+ \tl_gset_eq:NN #1 \l_@@_internal_regex
+ }
+\cs_new_protected:Npn \regex_const:Nn #1#2
+ {
+ \@@_compile:n {#2}
+ \tl_const:Nx #1 { \exp_not:o \l_@@_internal_regex }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\regex_show:N, \regex_show:n}
+% User functions: the \texttt{n} variant requires compilation first.
+% Then show the variable with some appropriate text. The auxiliary
+% \cs{@@_show:Nx} is defined in a different section.
+% \begin{macrocode}
+\cs_new_protected:Npn \regex_show:n #1
+ {
+ \@@_compile:n {#1}
+ \@@_show:Nn \l_@@_internal_regex
+ { { \tl_to_str:n {#1} } }
+ }
+\cs_new_protected:Npn \regex_show:N #1
+ { \@@_show:Nn #1 { variable~\token_to_str:N #1 } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[TF]{\regex_match:nn, \regex_match:Nn}
+% Those conditionals are based on a common auxiliary defined
+% later. Its first argument builds the \textsc{nfa} corresponding to
+% the regex, and the second argument is the query token list. Once we
+% have performed the match, convert the resulting boolean to
+% \cs{prg_return_true:} or \texttt{false}.
+% \begin{macrocode}
+\prg_new_protected_conditional:Npnn \regex_match:nn #1#2 { T , F , TF }
+ {
+ \@@_if_match:nn { \@@_build:n {#1} } {#2}
+ \@@_return:
+ }
+\prg_new_protected_conditional:Npnn \regex_match:Nn #1#2 { T , F , TF }
+ {
+ \@@_if_match:nn { \@@_build:N #1 } {#2}
+ \@@_return:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\regex_count:nnN, \regex_count:NnN}
+% Again, use an auxiliary whose first argument builds the \textsc{nfa}.
+% \begin{macrocode}
+\cs_new_protected:Npn \regex_count:nnN #1
+ { \@@_count:nnN { \@@_build:n {#1} } }
+\cs_new_protected:Npn \regex_count:NnN #1
+ { \@@_count:nnN { \@@_build:N #1 } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}
+% {
+% \regex_extract_once:nnN, \regex_extract_once:NnN,
+% \regex_extract_all:nnN, \regex_extract_all:NnN,
+% \regex_replace_once:nnN, \regex_replace_once:NnN,
+% \regex_replace_all:nnN, \regex_replace_all:NnN,
+% \regex_split:nnN, \regex_split:NnN
+% }
+% \begin{macro}[TF]
+% {
+% \regex_extract_once:nnN, \regex_extract_once:NnN,
+% \regex_extract_all:nnN, \regex_extract_all:NnN,
+% \regex_replace_once:nnN, \regex_replace_once:NnN,
+% \regex_replace_all:nnN, \regex_replace_all:NnN,
+% \regex_split:nnN, \regex_split:NnN
+% }
+% We define here $40$ user functions, following a common pattern in
+% terms of \texttt{:nnN} auxiliaries, defined in the coming
+% subsections. The auxiliary is handed \cs{@@_build:n} or
+% \cs{@@_build:N} with the appropriate regex argument, then all
+% other necessary arguments (replacement text, token list, \emph{etc.}
+% The conditionals call \cs{@@_return:} to return either
+% \texttt{true} or \texttt{false} once matching has been performed.
+% \begin{macrocode}
+\cs_set_protected:Npn \@@_tmp:w #1#2#3
+ {
+ \cs_new_protected:Npn #2 ##1 { #1 { \@@_build:n {##1} } }
+ \cs_new_protected:Npn #3 ##1 { #1 { \@@_build:N ##1 } }
+ \prg_new_protected_conditional:Npnn #2 ##1##2##3 { T , F , TF }
+ { #1 { \@@_build:n {##1} } {##2} ##3 \@@_return: }
+ \prg_new_protected_conditional:Npnn #3 ##1##2##3 { T , F , TF }
+ { #1 { \@@_build:N ##1 } {##2} ##3 \@@_return: }
+ }
+\@@_tmp:w \@@_extract_once:nnN
+ \regex_extract_once:nnN \regex_extract_once:NnN
+\@@_tmp:w \@@_extract_all:nnN
+ \regex_extract_all:nnN \regex_extract_all:NnN
+\@@_tmp:w \@@_replace_once:nnN
+ \regex_replace_once:nnN \regex_replace_once:NnN
+\@@_tmp:w \@@_replace_all:nnN
+ \regex_replace_all:nnN \regex_replace_all:NnN
+\@@_tmp:w \@@_split:nnN \regex_split:nnN \regex_split:NnN
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \subsubsection{Variables and helpers for user functions}
+%
+% \begin{variable}{\l_@@_match_count_int}
+% The number of matches found so far is stored
+% in \cs{l_@@_match_count_int}. This is only used
+% in the \cs{regex_count:nnN} functions.
+% \begin{macrocode}
+\int_new:N \l_@@_match_count_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{@@_begin, @@_end}
+% Those flags are raised to indicate extra begin-group
+% or end-group tokens when extracting submatches.
+% \begin{macrocode}
+\flag_new:n { @@_begin }
+\flag_new:n { @@_end }
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_min_submatch_int, \l_@@_submatch_int, \l_@@_zeroth_submatch_int}
+% The end-points of each submatch are stored in two arrays whose index \meta{submatch} ranges
+% from \cs{l_@@_min_submatch_int} (inclusive) to
+% \cs{l_@@_submatch_int} (exclusive). Each successful match comes
+% with a $0$-th submatch (the full match), and one match for each
+% capturing group: submatches corresponding to the last successful
+% match are labelled starting at \texttt{zeroth_submatch}. The entry
+% \cs{l_@@_zeroth_submatch_int} in \cs{g_@@_submatch_prev_intarray} holds
+% the position at which that match attempt started: this is used for
+% splitting and replacements.
+% \begin{macrocode}
+\int_new:N \l_@@_min_submatch_int
+\int_new:N \l_@@_submatch_int
+\int_new:N \l_@@_zeroth_submatch_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\g_@@_submatch_prev_intarray, \g_@@_submatch_begin_intarray, \g_@@_submatch_end_intarray}
+% Hold the place where the match attempt begun and the end-points of each submatch.
+% \begin{macrocode}
+\__intarray_new:Nn \g_@@_submatch_prev_intarray { 65536 }
+\__intarray_new:Nn \g_@@_submatch_begin_intarray { 65536 }
+\__intarray_new:Nn \g_@@_submatch_end_intarray { 65536 }
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{macro}[aux]{\@@_return:}
+% This function triggers either \cs{prg_return_false:} or
+% \cs{prg_return_true:} as appropriate to whether a match was found or
+% not. It is used by all user conditionals.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_return:
+ {
+ \if_meaning:w \c_true_bool \g_@@_success_bool
+ \prg_return_true:
+ \else:
+ \prg_return_false:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Matching}
+%
+% \begin{macro}[aux]{\@@_if_match:nn}
+% We don't track submatches, and stop after a single match. Build the
+% \textsc{nfa} with |#1|, and perform the match on the query |#2|.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_if_match:nn #1#2
+ {
+ \group_begin:
+ \@@_disable_submatches:
+ \@@_single_match:
+ #1
+ \@@_match:n {#2}
+ \group_end:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_count:nnN}
+% Again, we don't care about submatches. Instead of aborting after the
+% first \enquote{longest match} is found, we search for multiple
+% matches, incrementing \cs{l_@@_match_count_int} every time to
+% record the number of matches. Build the \textsc{nfa} and match. At
+% the end, store the result in the user's variable.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_count:nnN #1#2#3
+ {
+ \group_begin:
+ \@@_disable_submatches:
+ \int_zero:N \l_@@_match_count_int
+ \@@_multi_match:n { \int_incr:N \l_@@_match_count_int }
+ #1
+ \@@_match:n {#2}
+ \exp_args:NNNo
+ \group_end:
+ \int_set:Nn #3 { \int_use:N \l_@@_match_count_int }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Extracting submatches}
+%
+% \begin{macro}[aux]{\@@_extract_once:nnN, \@@_extract_all:nnN}
+% Match once or multiple times. After each match (or after the only
+% match), extract the submatches using \cs{@@_extract:}. At the
+% end, store the sequence containing all the submatches into the user
+% variable |#3| after closing the group.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_extract_once:nnN #1#2#3
+ {
+ \group_begin:
+ \@@_single_match:
+ #1
+ \@@_match:n {#2}
+ \@@_extract:
+ \@@_group_end_extract_seq:N #3
+ }
+\cs_new_protected:Npn \@@_extract_all:nnN #1#2#3
+ {
+ \group_begin:
+ \@@_multi_match:n { \@@_extract: }
+ #1
+ \@@_match:n {#2}
+ \@@_group_end_extract_seq:N #3
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_split:nnN}
+% Splitting at submatches is a bit more tricky. For each match,
+% extract all submatches, and replace the zeroth submatch by the part
+% of the query between the start of the match attempt and the start of
+% the zeroth submatch. This is inhibited if the delimiter matched an
+% empty token list at the start of this match attempt. After the last
+% match, store the last part of the token list, which ranges from the
+% start of the match attempt to the end of the query. This step is
+% inhibited if the last match was empty and at the very end: decrement
+% \cs{l_@@_submatch_int}, which controls which matches will be used.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_split:nnN #1#2#3
+ {
+ \group_begin:
+ \@@_multi_match:n
+ {
+ \if_int_compare:w \l_@@_start_pos_int < \l_@@_success_pos_int
+ \@@_extract:
+ \__intarray_gset_fast:Nnn \g_@@_submatch_prev_intarray
+ { \l_@@_zeroth_submatch_int } { 0 }
+ \__intarray_gset_fast:Nnn \g_@@_submatch_end_intarray
+ { \l_@@_zeroth_submatch_int }
+ {
+ \__intarray_item_fast:Nn \g_@@_submatch_begin_intarray
+ { \l_@@_zeroth_submatch_int }
+ }
+ \__intarray_gset_fast:Nnn \g_@@_submatch_begin_intarray
+ { \l_@@_zeroth_submatch_int }
+ { \l_@@_start_pos_int }
+ \fi:
+ }
+ #1
+ \@@_match:n {#2}
+%<assert>\assert_int:n { \l_@@_current_pos_int = \l_@@_max_pos_int }
+ \__intarray_gset_fast:Nnn \g_@@_submatch_prev_intarray
+ { \l_@@_submatch_int } { 0 }
+ \__intarray_gset_fast:Nnn \g_@@_submatch_end_intarray
+ { \l_@@_submatch_int }
+ { \l_@@_max_pos_int }
+ \__intarray_gset_fast:Nnn \g_@@_submatch_begin_intarray
+ { \l_@@_submatch_int }
+ { \l_@@_start_pos_int }
+ \int_incr:N \l_@@_submatch_int
+ \if_meaning:w \c_true_bool \l_@@_empty_success_bool
+ \if_int_compare:w \l_@@_start_pos_int = \l_@@_max_pos_int
+ \int_decr:N \l_@@_submatch_int
+ \fi:
+ \fi:
+ \@@_group_end_extract_seq:N #3
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_group_end_extract_seq:N}
+% The end-points of submatches are stored as entries of two arrays
+% from \cs{l_@@_min_submatch_int} to
+% \cs{l_@@_submatch_int} (exclusive). Extract the relevant ranges
+% into \cs{l_@@_internal_a_tl}. We detect unbalanced results using
+% the two flags \texttt{@@_begin} and \texttt{@@_end}, raised
+% whenever we see too many begin-group or end-group tokens in a
+% submatch. We disable \cs{__seq_item:n} to prevent two
+% \texttt{x}-expansions.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_group_end_extract_seq:N #1
+ {
+ \cs_set_eq:NN \__seq_item:n \scan_stop:
+ \flag_clear:n { @@_begin }
+ \flag_clear:n { @@_end }
+ \tl_set:Nx \l_@@_internal_a_tl
+ {
+ \s__seq
+ \int_step_function:nnnN
+ { \l_@@_min_submatch_int }
+ { 1 }
+ { \l_@@_submatch_int - 1 }
+ \@@_extract_seq_aux:n
+ }
+ \int_compare:nNnF
+ { \flag_height:n { @@_begin } + \flag_height:n { @@_end } }
+ = 0
+ {
+ \__msg_kernel_error:nnxxx { regex } { result-unbalanced }
+ { splitting~or~extracting~submatches }
+ { \flag_height:n { @@_end } }
+ { \flag_height:n { @@_begin } }
+ }
+ \use:x
+ {
+ \group_end:
+ \tl_set:Nn \exp_not:N #1 { \l_@@_internal_a_tl }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux, EXP]{\@@_extract_seq_aux:n, \@@_extract_seq_aux:ww}
+% The \texttt{:n} auxiliary builds one item of the sequence of
+% submatches. First compute the brace balance of the submatch, then
+% extract the submatch from the query, adding the appropriate braces
+% and raising a flag if the submatch is not balanced.
+% \begin{macrocode}
+\cs_new:Npn \@@_extract_seq_aux:n #1
+ {
+ \__seq_item:n
+ {
+ \exp_after:wN \@@_extract_seq_aux:ww
+ \__int_value:w \@@_submatch_balance:n {#1} ; #1;
+ }
+ }
+\cs_new:Npn \@@_extract_seq_aux:ww #1; #2;
+ {
+ \if_int_compare:w #1 < 0 \exp_stop_f:
+ \flag_raise:n { @@_end }
+ \prg_replicate:nn {-#1} { \exp_not:n { { \if_false: } \fi: } }
+ \fi:
+ \@@_query_submatch:n {#2}
+ \if_int_compare:w #1 > 0 \exp_stop_f:
+ \flag_raise:n { @@_begin }
+ \prg_replicate:nn {#1} { \exp_not:n { \if_false: { \fi: } } }
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]
+% {\@@_extract:, \@@_extract_b:wn, \@@_extract_e:wn}
+% Our task here is to extract from the property list
+% \cs{l_@@_success_submatches_prop} the list of end-points of
+% submatches, and store them in appropriate array entries, from
+% \cs{l_@@_zeroth_submatch_int} upwards. We begin by emptying those
+% entries. Then for each \meta{key}--\meta{value} pair in
+% the property list update the appropriate entry. This
+% is somewhat a hack: the \meta{key} is a non-negative integer
+% followed by |<| or |>|, which we use in a comparison to $-1$. At the
+% end, store the information about the position at which the match
+% attempt started, in \cs{g_@@_submatch_prev_intarray}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_extract:
+ {
+ \if_meaning:w \c_true_bool \g_@@_success_bool
+ \int_set_eq:NN \l_@@_zeroth_submatch_int \l_@@_submatch_int
+ \prg_replicate:nn \l_@@_capturing_group_int
+ {
+ \__intarray_gset_fast:Nnn \g_@@_submatch_begin_intarray
+ { \l_@@_submatch_int } { 0 }
+ \__intarray_gset_fast:Nnn \g_@@_submatch_end_intarray
+ { \l_@@_submatch_int } { 0 }
+ \__intarray_gset_fast:Nnn \g_@@_submatch_prev_intarray
+ { \l_@@_submatch_int } { 0 }
+ \int_incr:N \l_@@_submatch_int
+ }
+ \prop_map_inline:Nn \l_@@_success_submatches_prop
+ {
+ \if_int_compare:w ##1 - 1 \exp_stop_f:
+ \exp_after:wN \@@_extract_e:wn \__int_value:w
+ \else:
+ \exp_after:wN \@@_extract_b:wn \__int_value:w
+ \fi:
+ \__int_eval:w \l_@@_zeroth_submatch_int + ##1 {##2}
+ }
+ \__intarray_gset_fast:Nnn \g_@@_submatch_prev_intarray
+ { \l_@@_zeroth_submatch_int } { \l_@@_start_pos_int }
+ \fi:
+ }
+\cs_new_protected:Npn \@@_extract_b:wn #1 < #2
+ { \__intarray_gset_fast:Nnn \g_@@_submatch_begin_intarray {#1} {#2} }
+\cs_new_protected:Npn \@@_extract_e:wn #1 > #2
+ { \__intarray_gset_fast:Nnn \g_@@_submatch_end_intarray {#1} {#2} }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Replacement}
+%
+% \begin{macro}[aux]{\@@_replace_once:nnN}
+% Build the \textsc{nfa} and the replacement functions, then find a
+% single match. If the match failed, simply exit the
+% group. Otherwise, we do the replacement. Extract submatches. Compute
+% the brace balance corresponding to replacing this match by the
+% replacement (this depends on submatches). Prepare the replaced token
+% list: the replacement function produces the tokens from the start of
+% the query to the start of the match and the replacement text for
+% this match; we need to add the tokens from the end of the match to
+% the end of the query. Finally, store the result in the user's
+% variable after closing the group: this step involves an additional
+% \texttt{x}-expansion, and checks that braces are balanced in the
+% final result.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replace_once:nnN #1#2#3
+ {
+ \group_begin:
+ \@@_single_match:
+ #1
+ \@@_replacement:n {#2}
+ \exp_args:No \@@_match:n { #3 }
+ \if_meaning:w \c_false_bool \g_@@_success_bool
+ \group_end:
+ \else:
+ \@@_extract:
+ \int_set:Nn \l_@@_balance_int
+ {
+ \@@_replacement_balance_one_match:n
+ { \l_@@_zeroth_submatch_int }
+ }
+ \tl_set:Nx \l_@@_internal_a_tl
+ {
+ \@@_replacement_do_one_match:n { \l_@@_zeroth_submatch_int }
+ \@@_query_range:nn
+ {
+ \__intarray_item_fast:Nn \g_@@_submatch_end_intarray
+ { \l_@@_zeroth_submatch_int }
+ }
+ { \l_@@_max_pos_int }
+ }
+ \@@_group_end_replace:N #3
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_replace_all:nnN}
+% Match multiple times, and for every match, extract submatches and
+% additionally store the position at which the match attempt started.
+% The entries from \cs{l_@@_min_submatch_int} to
+% \cs{l_@@_submatch_int} hold information about submatches of every
+% match in order; each match corresponds to
+% \cs{l_@@_capturing_group_int} consecutive entries.
+% Compute the brace balance corresponding to doing all the
+% replacements: this is the sum of brace balances for replacing each
+% match. Join together the replacement texts for each match (including
+% the part of the query before the match), and the end of the query.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replace_all:nnN #1#2#3
+ {
+ \group_begin:
+ \@@_multi_match:n { \@@_extract: }
+ #1
+ \@@_replacement:n {#2}
+ \exp_args:No \@@_match:n {#3}
+ \int_set:Nn \l_@@_balance_int
+ {
+ 0
+ \int_step_function:nnnN
+ { \l_@@_min_submatch_int }
+ \l_@@_capturing_group_int
+ { \l_@@_submatch_int - 1 }
+ \@@_replacement_balance_one_match:n
+ }
+ \tl_set:Nx \l_@@_internal_a_tl
+ {
+ \int_step_function:nnnN
+ { \l_@@_min_submatch_int }
+ \l_@@_capturing_group_int
+ { \l_@@_submatch_int - 1 }
+ \@@_replacement_do_one_match:n
+ \@@_query_range:nn
+ \l_@@_start_pos_int \l_@@_max_pos_int
+ }
+ \@@_group_end_replace:N #3
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux]{\@@_group_end_replace:N}
+% If the brace balance is not $0$, raise an error. Then set the user's
+% variable |#1| to the \texttt{x}-expansion of
+% \cs{l_@@_internal_a_tl}, adding the appropriate braces to produce
+% a balanced result. And end the group.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_group_end_replace:N #1
+ {
+ \if_int_compare:w \l_@@_balance_int = 0 \exp_stop_f:
+ \else:
+ \__msg_kernel_error:nnxxx { regex } { result-unbalanced }
+ { replacing }
+ { \int_max:nn { - \l_@@_balance_int } { 0 } }
+ { \int_max:nn { \l_@@_balance_int } { 0 } }
+ \fi:
+ \use:x
+ {
+ \group_end:
+ \tl_set:Nn \exp_not:N #1
+ {
+ \if_int_compare:w \l_@@_balance_int < 0 \exp_stop_f:
+ \prg_replicate:nn { - \l_@@_balance_int }
+ { { \if_false: } \fi: }
+ \fi:
+ \l_@@_internal_a_tl
+ \if_int_compare:w \l_@@_balance_int > 0 \exp_stop_f:
+ \prg_replicate:nn { \l_@@_balance_int }
+ { \if_false: { \fi: } }
+ \fi:
+ }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsubsection{Storing and showing compiled patterns}
+%
+% \subsection{Messages}
+%
+% Messages for the preparsing phase.
+% \begin{macrocode}
+\__msg_kernel_new:nnnn { regex } { trailing-backslash }
+ { Trailing~escape~character~'\iow_char:N\\'. }
+ {
+ A~regular~expression~or~its~replacement~text~ends~with~
+ the~escape~character~'\iow_char:N\\'.~It~will~be~ignored.
+ }
+\__msg_kernel_new:nnnn { regex } { x-missing-rbrace }
+ { Missing~closing~brace~in~'\iow_char:N\\x'~hexadecimal~sequence. }
+ {
+ You~wrote~something~like~
+ '\iow_char:N\\x\{...#1'.~
+ The~closing~brace~is~missing.
+ }
+\__msg_kernel_new:nnnn { regex } { x-overflow }
+ { Character~code~'#1'~too~large~in~'\iow_char:N\\x'~hexadecimal~sequence. }
+ {
+ You~wrote~something~like~
+ '\iow_char:N\\x\{\int_to_Hex:n{#1}\}'.~
+ The~character~code~#1~is~larger~than~
+ the~maximum~value~\int_use:N \c_max_char_int.
+ }
+% \end{macrocode}
+%
+% Invalid quantifier.
+% \begin{macrocode}
+\__msg_kernel_new:nnnn { regex } { invalid-quantifier }
+ { Braced~quantifier~'#1'~may~not~be~followed~by~'#2'. }
+ {
+ The~character~'#2'~is~invalid~in~the~braced~quantifier~'#1'.~
+ The~only~valid~quantifiers~are~'*',~'?',~'+',~'{<int>}',~
+ '{<min>,}'~and~'{<min>,<max>}',~optionally~followed~by~'?'.
+ }
+% \end{macrocode}
+%
+% Messages for missing or extra closing brackets and parentheses, with
+% some fancy singular/plural handling for the case of parentheses.
+% \begin{macrocode}
+\__msg_kernel_new:nnnn { regex } { missing-rbrack }
+ { Missing~right~bracket~inserted~in~regular~expression. }
+ {
+ LaTeX~was~given~a~regular~expression~where~a~character~class~
+ was~started~with~'[',~but~the~matching~']'~is~missing.
+ }
+\__msg_kernel_new:nnnn { regex } { missing-rparen }
+ {
+ Missing~right~
+ \int_compare:nTF { #1 = 1 } { parenthesis } { parentheses } ~
+ inserted~in~regular~expression.
+ }
+ {
+ LaTeX~was~given~a~regular~expression~with~\int_eval:n {#1} ~
+ more~left~parentheses~than~right~parentheses.
+ }
+\__msg_kernel_new:nnnn { regex } { extra-rparen }
+ { Extra~right~parenthesis~ignored~in~regular~expression. }
+ {
+ LaTeX~came~across~a~closing~parenthesis~when~no~submatch~group~
+ was~open.~The~parenthesis~will~be~ignored.
+ }
+% \end{macrocode}
+%
+% Some escaped alphanumerics are not allowed everywhere.
+% \begin{macrocode}
+\__msg_kernel_new:nnnn { regex } { bad-escape }
+ {
+ Invalid~escape~'\iow_char:N\\#1'~
+ \@@_if_in_cs:TF { within~a~control~sequence. }
+ {
+ \@@_if_in_class:TF
+ { in~a~character~class. }
+ { following~a~category~test. }
+ }
+ }
+ {
+ The~escape~sequence~'\iow_char:N\\#1'~may~not~appear~
+ \@@_if_in_cs:TF
+ {
+ within~a~control~sequence~test~introduced~by~
+ '\iow_char:N\\c\iow_char:N\{'.
+ }
+ {
+ \@@_if_in_class:TF
+ { within~a~character~class~ }
+ { following~a~category~test~such~as~'\iow_char:N\\cL'~ }
+ because~it~does~not~match~exactly~one~character.
+ }
+ }
+% \end{macrocode}
+%
+% Range errors.
+% \begin{macrocode}
+\__msg_kernel_new:nnnn { regex } { range-missing-end }
+ { Invalid~end-point~for~range~'#1-#2'~in~character~class. }
+ {
+ The~end-point~'#2'~of~the~range~'#1-#2'~may~not~serve~as~an~
+ end-point~for~a~range:~alphanumeric~characters~should~not~be~
+ escaped,~and~non-alphanumeric~characters~should~be~escaped.
+ }
+\__msg_kernel_new:nnnn { regex } { range-backwards }
+ { Range~'[#1-#2]'~out~of~order~in~character~class. }
+ {
+ In~ranges~of~characters~'[x-y]'~appearing~in~character~classes,~
+ the~first~character~code~must~not~be~larger~than~the~second.~
+ Here,~'#1'~has~character~code~\int_eval:n {`#1},~while~
+ '#2'~has~character~code~\int_eval:n {`#2}.
+ }
+% \end{macrocode}
+%
+% Errors related to |\c| and |\u|.
+% \begin{macrocode}
+\__msg_kernel_new:nnnn { regex } { c-bad-mode }
+ { Invalid~nested~'\iow_char:N\\c'~escape~in~regular~expression. }
+ {
+ The~'\iow_char:N\\c'~escape~cannot~be~used~within~
+ a~control~sequence~test~'\iow_char:N\\c{...}'.~
+ To~combine~several~category~tests,~use~'\iow_char:N\\c[...]'.
+ }
+\__msg_kernel_new:nnnn { regex } { c-missing-rbrace }
+ { Missing~right~brace~inserted~for~'\iow_char:N\\c'~escape. }
+ {
+ LaTeX~was~given~a~regular~expression~where~a~
+ '\iow_char:N\\c\iow_char:N\{...'~construction~was~not~ended~
+ with~a~closing~brace~'\iow_char:N\}'.
+ }
+\__msg_kernel_new:nnnn { regex } { c-missing-rbrack }
+ { Missing~right~bracket~inserted~for~'\iow_char:N\\c'~escape. }
+ {
+ A~construction~'\iow_char:N\\c[...'~appears~in~a~
+ regular~expression,~but~the~closing~']'~is~not~present.
+ }
+\__msg_kernel_new:nnnn { regex } { c-missing-category }
+ { Invalid~character~'#1'~following~'\iow_char:N\\c'~escape. }
+ {
+ In~regular~expressions,~the~'\iow_char:N\\c'~escape~sequence~
+ may~only~be~followed~by~a~left~brace,~a~left~bracket,~or~a~
+ capital~letter~representing~a~character~category,~namely~
+ one~of~'ABCDELMOPSTU'.
+ }
+\__msg_kernel_new:nnnn { regex } { c-trailing }
+ { Trailing~category~code~escape~'\iow_char:N\\c'... }
+ {
+ A~regular~expression~ends~with~'\iow_char:N\\c'~followed~
+ by~a~letter.~It~will~be~ignored.
+ }
+\__msg_kernel_new:nnnn { regex } { u-missing-lbrace }
+ { Missing~left~brace~following~'\iow_char:N\\u'~escape. }
+ {
+ The~'\iow_char:N\\u'~escape~sequence~must~be~followed~by~
+ a~brace~group~with~the~name~of~the~variable~to~use.
+ }
+\__msg_kernel_new:nnnn { regex } { u-missing-rbrace }
+ { Missing~right~brace~inserted~for~'\iow_char:N\\u'~escape. }
+ {
+ LaTeX~
+ \str_if_eq_x:nnTF { } {#2}
+ { reached~the~end~of~the~string~ }
+ { encountered~an~escaped~alphanumeric~character '\iow_char:N\\#2'~ }
+ when~parsing~the~argument~of~an~'\iow_char:N\\u\iow_char:N\{...\}'~escape.
+ }
+% \end{macrocode}
+%
+% Errors when encountering the \textsc{posix} syntax |[:...:]|.
+% \begin{macrocode}
+\__msg_kernel_new:nnnn { regex } { posix-unsupported }
+ { POSIX~collating~element~'[#1 ~ #1]'~not~supported. }
+ {
+ The~'[.foo.]'~and~'[=bar=]'~syntaxes~have~a~special~meaning~
+ in~POSIX~regular~expressions.~This~is~not~supported~by~LaTeX.~
+ Maybe~you~forgot~to~escape~a~left~bracket~in~a~character~class?
+ }
+\__msg_kernel_new:nnnn { regex } { posix-unknown }
+ { POSIX~class~'[:#1:]'~unknown. }
+ {
+ '[:#1:]'~is~not~among~the~known~POSIX~classes~
+ '[:alnum:]',~'[:alpha:]',~'[:ascii:]',~'[:blank:]',~
+ '[:cntrl:]',~'[:digit:]',~'[:graph:]',~'[:lower:]',~
+ '[:print:]',~'[:punct:]',~'[:space:]',~'[:upper:]',~
+ '[:word:]',~and~'[:xdigit:]'.
+ }
+\__msg_kernel_new:nnnn { regex } { posix-missing-close }
+ { Missing~closing~':]'~for~POSIX~class. }
+ { The~POSIX~syntax~'#1'~must~be~followed~by~':]',~not~'#2'. }
+% \end{macrocode}
+%
+% In various cases, the result of a \pkg{l3regex} operation can leave us
+% with an unbalanced token list, which we must re-balance by adding
+% begin-group or end-group character tokens.
+% \begin{macrocode}
+\__msg_kernel_new:nnnn { regex } { result-unbalanced }
+ { Missing~brace~inserted~when~#1. }
+ {
+ LaTeX~was~asked~to~do~some~regular~expression~operation,~
+ and~the~resulting~token~list~would~not~have~the~same~number~
+ of~begin-group~and~end-group~tokens.~Braces~were~inserted:~
+ #2~left,~#3~right.
+ }
+% \end{macrocode}
+%
+% Error message for unknown options.
+% \begin{macrocode}
+\__msg_kernel_new:nnnn { regex } { unknown-option }
+ { Unknown~option~'#1'~for~regular~expressions. }
+ {
+ The~only~available~option~is~'case-insensitive',~toggled~by~
+ '(?i)'~and~'(?-i)'.
+ }
+\__msg_kernel_new:nnnn { regex } { special-group-unknown }
+ { Unknown~special~group~'#1~...'~in~a~regular~expression. }
+ {
+ The~only~valid~constructions~starting~with~'(?'~are~
+ '(?:~...~)',~'(?|~...~)',~'(?i)',~and~'(?-i)'.
+ }
+% \end{macrocode}
+%
+% Errors in the replacement text.
+% \begin{macrocode}
+\__msg_kernel_new:nnnn { regex } { replacement-c }
+ { Misused~'\iow_char:N\\c'~command~in~a~replacement~text. }
+ {
+ In~a~replacement~text,~the~'\iow_char:N\\c'~escape~sequence~
+ can~be~followed~by~one~of~the~letters~'ABCDELMOPSTU'~
+ or~a~brace~group,~not~by~'#1'.
+ }
+\__msg_kernel_new:nnnn { regex } { replacement-u }
+ { Misused~'\iow_char:N\\u'~command~in~a~replacement~text. }
+ {
+ In~a~replacement~text,~the~'\iow_char:N\\u'~escape~sequence~
+ must~be~~followed~by~a~brace~group~holding~the~name~of~the~
+ variable~to~use.
+ }
+\__msg_kernel_new:nnnn { regex } { replacement-g }
+ {
+ Missing~brace~for~the~'\iow_char:N\\g'~construction~
+ in~a~replacement~text.
+ }
+ {
+ In~the~replacement~text~for~a~regular~expression~search,~
+ submatches~are~represented~either~as~'\iow_char:N \\g{dd..d}',~
+ or~'\\d',~where~'d'~are~single~digits.~Here,~a~brace~is~missing.
+ }
+\__msg_kernel_new:nnnn { regex } { replacement-catcode-end }
+ {
+ Missing~character~for~the~'\iow_char:N\\c<category><character>'~
+ construction~in~a~replacement~text.
+ }
+ {
+ In~a~replacement~text,~the~'\iow_char:N\\c'~escape~sequence~
+ can~be~followed~by~one~of~the~letters~'ABCDELMOPSTU'~representing~
+ the~character~category.~Then,~a~character~must~follow.~LaTeX~
+ reached~the~end~of~the~replacement~when~looking~for~that.
+ }
+\__msg_kernel_new:nnnn { regex } { replacement-catcode-in-cs }
+ {
+ Category~code~'\iow_char:N\\c#1#3'~ignored~inside~
+ '\iow_char:N\\c\{...\}'~in~a~replacement~text.
+ }
+ {
+ In~a~replacement~text,~the~category~codes~of~the~argument~of~
+ '\iow_char:N\\c\{...\}'~are~ignored~when~building~the~control~
+ sequence~name.
+ }
+\__msg_kernel_new:nnnn { regex } { replacement-null-space }
+ { TeX~cannot~build~a~space~token~with~character~code~0. }
+ {
+ You~asked~for~a~character~token~with~category~space,~
+ and~character~code~0,~for~instance~through~
+ '\iow_char:N\\cS\iow_char:N\\x00'.~
+ This~specific~case~is~impossible~and~will~be~replaced~
+ by~a~normal~space.
+ }
+\__msg_kernel_new:nnnn { regex } { replacement-missing-rbrace }
+ { Missing~right~brace~inserted~in~replacement~text. }
+ {
+ There~ \int_compare:nTF { #1 = 1 } { was } { were } ~ #1~
+ missing~right~\int_compare:nTF { #1 = 1 } { brace } { braces } .
+ }
+\__msg_kernel_new:nnnn { regex } { replacement-missing-rparen }
+ { Missing~right~parenthesis~inserted~in~replacement~text. }
+ {
+ There~ \int_compare:nTF { #1 = 1 } { was } { were } ~ #1~
+ missing~right~\int_compare:nTF { #1 = 1 } { parenthesis } { parentheses } .
+ }
+% \end{macrocode}
+%
+% \begin{macro}[aux]{\@@_msg_repeated:nnN}
+% This is not technically a message, but seems related enough to go
+% there. The arguments are: |#1| is the minimum number of repetitions;
+% |#2| is the number of allowed extra repetitions ($-1$ for infinite
+% number), and |#3| tells us about lazyness.
+% \begin{macrocode}
+\cs_new:Npn \@@_msg_repeated:nnN #1#2#3
+ {
+ \str_if_eq_x:nnF { #1 #2 } { 1 0 }
+ {
+ , ~ repeated ~
+ \int_case:nnF {#2}
+ {
+ { -1 } { #1~or~more~times,~\bool_if:NTF #3 { lazy } { greedy } }
+ { 0 } { #1~times }
+ }
+ {
+ between~#1~and~\int_eval:n {#1+#2}~times,~
+ \bool_if:NTF #3 { lazy } { greedy }
+ }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsection{Code for tracing}
+%
+% The tracing code is still very experimental, and is meant to be used
+% with the \pkg{l3trace} package, currently in \texttt{l3trial}.
+%
+% \begin{macro}[int]{\@@_trace_states:n}
+% This function lists the contents of all states of the \textsc{nfa},
+% stored in \tn{toks} from $0$ to \cs{l_@@_max_state_int}
+% (excluded).
+% \begin{macrocode}
+%<*trace>
+\cs_new_protected:Npn \@@_trace_states:n #1
+ {
+ \int_step_inline:nnnn
+ \l_@@_min_state_int
+ { 1 }
+ { \l_@@_max_state_int - 1 }
+ {
+ \trace:nnx { regex } { #1 }
+ { \iow_char:N \\toks ##1 = { \@@_toks_use:w ##1 } }
+ }
+ }
+%</trace>
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macrocode}
+%</initex|package>
+% \end{macrocode}
+%
+% \end{implementation}
+%
+% \PrintIndex
+% \endinput
+%^^A NOT IMPLEMENTED
+%^^A \p{xx} a character with the xx property
+%^^A \P{xx} a character without the xx property
+%^^A [[:xxx:]] positive POSIX named set
+%^^A [[:^xxx:]] negative POSIX named set
+%^^A (?=...) positive look ahead
+%^^A (?!...) negative look ahead
+%^^A (?<=...) positive look behind
+%^^A (?<!...) negative look behind
+%^^A (?<name>...) or (?'name'...) or (?P<name>...)
+%^^A named capturing group
+%^^A \R a newline sequence
+%^^A \X an extended Unicode sequence
+%^^A (?C) or (?Cn) callout with data n
+%^^A (?R) recurse whole pattern
+%^^A (?[+-]n) or \g<[+-]n> or (?&name) or (?P>name) or \g<name>
+%^^A call subpattern
+%^^A (?([+-]n)... or (?(<name>)...
+%^^A reference condition
+%^^A (?(R)... or (?(Rn)... or (?(R&name)...
+%^^A recursion condition
+%^^A (?(DEFINE)... define subpattern for reference
+%^^A (?(assert)... assertion condition
+%^^A (*ACCEPT) force successful match
+%^^A (*FAIL) force backtrack; synonym (*F)
+%^^A (*COMMIT) overall failure, no advance of starting point
+%^^A (*PRUNE) advance to next starting character
+%^^A (*SKIP) advance start to current matching position
+%^^A (*THEN) local failure, backtrack to next alternation
+%^^A (*CR) or (*LF) or (*CRLF) or (*ANYCRLF) or (*ANY)
+%^^A newline convention
+%^^A (*BSR_ANYCRLF) or (*BSR_UNICODE)
+%^^A change what \R matches.
+%^^A
+%^^A \cx "control-x", where x is any ASCII character
+%^^A \C one byte, even in UTF-8 mode (best avoided)
+%^^A + possessive quantifiers
+%^^A (?>...) atomic, non-capturing group
+%^^A (?#....) comment (not nestable)
+%^^A (?JmsUx) options (duplicate names; multiline; single line;
+%^^A ungreedy; extended)
+%^^A (*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE)
+%^^A (*UTF8) set UTF-8 mode (PCRE_UTF8)
+%^^A (*UCP) set PCRE_UCP (use Unicode properties for \d etc)
+%^^A \n or \gn or \g{[-]n} or \g{name} or (?P=name)
+%^^A or \k<name> or \k'name' or \k{name}
+%^^A back-references
Property changes on: trunk/Master/texmf-dist/source/latex/l3kernel/l3regex.dtx
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3seq.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3seq.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3seq.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3skip.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3skip.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3skip.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -42,7 +42,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3sort.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3sort.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3sort.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3str.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3str.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3str.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3sys.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3sys.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3sys.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Added: trunk/Master/texmf-dist/source/latex/l3kernel/l3tl-analysis.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3tl-analysis.dtx (rev 0)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3tl-analysis.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -0,0 +1,1109 @@
+% \iffalse meta-comment
+%
+%% File: l3tl-analysis.dtx Copyright (C) 2011-2012,2015-2017 The LaTeX3 Project%
+%
+% It may be distributed and/or modified under the conditions of the
+% LaTeX Project Public License (LPPL), either version 1.3c of this
+% license or (at your option) any later version. The latest version
+% of this license is in the file
+%
+% http://www.latex-project.org/lppl.txt
+%
+% This file is part of the "l3kernel bundle" (The Work in LPPL)
+% and all files in that bundle must be distributed together.
+%
+% -----------------------------------------------------------------------
+%
+% The development version of the bundle can be found at
+%
+% https://github.com/latex3/latex3
+%
+% for those people who are interested.
+%
+%<*driver>
+\documentclass[full]{l3doc}
+\begin{document}
+ \DocInput{\jobname.dtx}
+\end{document}
+%</driver>
+% \fi
+%
+%
+% \title{^^A
+% The \textsf{l3tl-analysis} package: analysing token lists^^A
+% }
+%
+% \author{^^A
+% The \LaTeX3 Project\thanks
+% {^^A
+% E-mail:
+% \href{mailto:latex-team at latex-project.org}
+% {latex-team at latex-project.org}^^A
+% }^^A
+% }
+%
+% \date{Released 2017/05/29}
+%
+% \maketitle
+%
+% \begin{documentation}
+%
+% \section{\pkg{l3tl-analysis} documentation}
+%
+% This module mostly provides internal functions for use in the
+% \pkg{l3regex} module. However, it provides as a side-effect a user
+% debugging function, very similar to the \cs{ShowTokens} macro from the
+% \pkg{ted} package.
+%
+% \begin{function}[added = 2017-05-26]{\tl_show_analysis:N, \tl_show_analysis:n}
+% \begin{syntax}
+% \cs{tl_show_analysis:n} \Arg{token list}
+% \end{syntax}
+% Displays to the terminal the detailed decomposition of the
+% \meta{token list} into tokens, showing the category code of each
+% character token, the meaning of control sequences and active
+% characters, and the value of registers.
+% \end{function}
+%
+% \end{documentation}
+%
+% \begin{implementation}
+%
+% \section{\pkg{l3tl-analysis} implementation}
+%
+% \subsection{Internal functions}
+%
+% \begin{variable}{\s__tl}
+% The format used to store token lists internally uses the scan mark
+% \cs{s__tl} as a delimiter.
+% \end{variable}
+%
+% \begin{function}{\__tl_analysis_map_inline:nn}
+% \begin{syntax}
+% \cs{__tl_analysis_map_inline:nn} \Arg{token list} \Arg{inline function}
+% \end{syntax}
+% Applies the \meta{inline function} to each individual \meta{token}
+% in the \meta{token list}. The \meta{inline function} receives three
+% arguments:
+% \begin{itemize}
+% \item \meta{tokens}, which both \texttt{o}-expand and
+% \texttt{x}-expand to the \meta{token}. The detailed form of
+% \meta{token} may change in later releases.
+% \item \meta{catcode}, a capital hexadecimal digit which denotes
+% the category code of the \meta{token} (0: control sequence, 1:
+% begin-group, 2: end-group, 3: math shift, 4: alignment tab, 6:
+% parameter, 7: superscript, 8: subscript, A: space, B: letter,
+% C:other, D:active).
+% \item \meta{char code}, a decimal representation of the character
+% code of the token, $-1$ if it is a control sequence (with
+% \meta{catcode} $0$).
+% \end{itemize}
+% \end{function}
+%
+% For optimizations in \pkg{l3regex} (when matching control sequences),
+% it may be useful to provide a \cs{__tl_analysis_from_str_map_inline:nn}
+% function, perhaps named \cs{__str_analysis_map_inline:nn}.
+%
+% \subsection{Internal format}
+%
+% The task of the \pkg{l3tl-analysis} module is to convert token lists
+% to an internal format which allows us to extract all the relevant
+% information about individual tokens (category code, character code),
+% as well as reconstruct the token list quickly. This internal format is
+% used in \pkg{l3regex} where we need to support arbitrary tokens, and
+% it is used in conversion functions in \pkg{l3str-convert}, where we wish to
+% support clusters of characters instead of single tokens.
+%
+% We thus need a way to encode any \meta{token} (even begin-group and
+% end-group character tokens) in a way amenable to manipulating tokens
+% individually. The best we can do is to find \meta{tokens} which both
+% \texttt{o}-expand and \texttt{x}-expand to the given
+% \meta{token}. Collecting more information about the category code and
+% character code is also useful for regular expressions, since most
+% regexes are catcode-agnostic. The internal format thus takes the form
+% of a succession of items of the form
+% \begin{quote}
+% \meta{tokens} \cs{s__tl} \meta{catcode} \meta{char code} \cs{s__tl}
+% \end{quote}
+% The \meta{tokens} \texttt{o}- \emph{and} \texttt{x}-expand to the
+% original token in the token list or to the cluster of tokens
+% corresponding to one Unicode character in the given encoding (for
+% \pkg{l3str-convert}). The \meta{catcode} is given as a single hexadecimal
+% digit, $0$ for control sequences. The \meta{char code} is given as a
+% decimal number, $-1$ for control sequences.
+%
+% Using delimited arguments lets us build the \meta{tokens}
+% progressively when doing an encoding conversion in \pkg{l3str-convert}. On the
+% other hand, the delimiter \cs{s__tl} may not appear unbraced in
+% \meta{tokens}. This is not a problem because we are careful to wrap
+% control sequences in braces (as an argument to \cs{exp_not:n}) when
+% converting from a general token list to the internal format.
+%
+% The current rule for converting a \meta{token} to a balanced set of
+% \meta{tokens} which both \texttt{o}-expands and \texttt{x}-expands to
+% it is the following.
+% \begin{itemize}
+% \item A control sequence |\cs| becomes |\exp_not:n { \cs }|
+% \cs{s__tl} $0$ $-1$ \cs{s__tl}.
+% \item A begin-group character |{| becomes \cs{exp_after:wN} |{|
+% \cs{if_false:} |}| \cs{fi:} \cs{s__tl} $1$ \meta{char code}
+% \cs{s__tl}.
+% \item An end-group character |}| becomes \cs{if_false:} |{| \cs{fi:}
+% |}| \cs{s__tl} $2$ \meta{char code} \cs{s__tl}.
+% \item A character with any other category code becomes
+% \cs{exp_not:n} \Arg{character} \cs{s__tl} \meta{hex catcode}
+% \meta{char code} \cs{s__tl}.
+% \end{itemize}
+%
+% ^^A todo: ask LuaTeX list for an \ifx\undefined <active char>
+% ^^A which does not add the <active char> in memory.
+%
+% \begin{macrocode}
+%<*initex|package>
+% \end{macrocode}
+%
+% \begin{macrocode}
+%<@@=tl_analysis>
+% \end{macrocode}
+%
+% \subsection{Variables and helper functions}
+%
+% \begin{variable}{\s__tl}
+% The scan mark \cs{s__tl} is used as a delimiter in the internal
+% format. This is more practical than using a quark, because we would
+% then need to control expansion much more carefully: compare
+% \cs{__int_value:w} |`#1| \cs{s__tl} with \cs{__int_value:w} |`#1|
+% \cs{exp_stop_f:} \cs{exp_not:N} \cs{q_mark} to extract a character
+% code followed by the delimiter in an \texttt{x}-expansion.
+% \begin{macrocode}
+\__scan_new:N \s__tl
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_internal_tl}
+% This token list variable is used to hand the argument of
+% \cs{tl_show_analysis:n} to \cs{tl_show_analysis:N}.
+% \begin{macrocode}
+\tl_new:N \l_@@_internal_tl
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_token}
+% \begin{variable}{\l_@@_char_token}
+% The tokens in the token list are probed with the \TeX{} primitive
+% \tn{futurelet}. We use \cs{l_@@_token} in that
+% construction. In some cases, we convert the following token to a
+% string before probing it: then the token variable used is
+% \cs{l_@@_char_token}.
+% \begin{macrocode}
+\cs_new_eq:NN \l_@@_token ?
+\cs_new_eq:NN \l_@@_char_token ?
+% \end{macrocode}
+% \end{variable}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_normal_int}
+% The number of normal (\texttt{N}-type argument) tokens since the
+% last special token.
+% \begin{macrocode}
+\int_new:N \l_@@_normal_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_index_int}
+% During the first pass, this is the index in the array being built.
+% During the second pass, it is equal to the maximum index in the
+% array from the first pass.
+% \begin{macrocode}
+\int_new:N \l_@@_index_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_nesting_int}
+% Nesting depth of explicit begin-group and end-group characters
+% during the first pass. This lets us detect the end of the token list
+% without a reserved end-marker.
+% \begin{macrocode}
+\int_new:N \l_@@_nesting_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_type_int}
+% When encountering special characters, we record their \enquote{type}
+% in this integer.
+% \begin{macrocode}
+\int_new:N \l_@@_type_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\g_@@_result_tl}
+% The result of the conversion is stored in this token list, with a
+% succession of items of the form
+% \begin{quote}
+% \meta{tokens} \cs{s__tl} \meta{catcode} \meta{char code} \cs{s__tl}
+% \end{quote}
+% \begin{macrocode}
+\tl_new:N \g_@@_result_tl
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{macro}[int, EXP]{\@@_extract_charcode:}
+% \begin{macro}[aux, EXP]{\@@_extract_charcode_aux:w}
+% Extracting the character code from the meaning of
+% \cs{l_@@_token}. This has no error checking, and should
+% only be assumed to work for begin-group and end-group character
+% tokens. It produces a number in the form |`|\meta{char}.
+% \begin{macrocode}
+\cs_new:Npn \@@_extract_charcode:
+ {
+ \exp_after:wN \@@_extract_charcode_aux:w
+ \token_to_meaning:N \l_@@_token
+ }
+\cs_new:Npn \@@_extract_charcode_aux:w #1 ~ #2 ~ { ` }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int, EXP]{\@@_cs_space_count:NN}
+% \begin{macro}[aux, EXP]{\@@_cs_space_count:w}
+% \begin{macro}[aux, EXP]{\@@_cs_space_count_end:w}
+% Counts the number of spaces in the string representation of its
+% second argument, as well as the number of characters following the
+% last space in that representation, and feeds the two numbers as
+% semicolon-delimited arguments to the first argument. When this
+% function is used, the escape character is printable and non-space.
+% \begin{macrocode}
+\cs_new:Npn \@@_cs_space_count:NN #1 #2
+ {
+ \exp_after:wN #1
+ \__int_value:w \__int_eval:w 0
+ \exp_after:wN \@@_cs_space_count:w
+ \token_to_str:N #2
+ \fi: \@@_cs_space_count_end:w ; ~ !
+ }
+\cs_new:Npn \@@_cs_space_count:w #1 ~
+ {
+ \if_false: #1 #1 \fi:
+ + 1
+ \@@_cs_space_count:w
+ }
+\cs_new:Npn \@@_cs_space_count_end:w ; #1 \fi: #2 !
+ { \exp_after:wN ; \__int_value:w \str_count_ignore_spaces:n {#1} ; }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
+% \subsection{Plan of attack}
+%
+% Our goal is to produce a token list of the form roughly
+% \begin{quote}
+% \meta{token 1} \cs{s__tl} \meta{catcode 1} \meta{char code 1} \cs{s__tl} \\
+% \meta{token 2} \cs{s__tl} \meta{catcode 2} \meta{char code 2} \cs{s__tl} \\
+% \ldots{}
+% \meta{token N} \cs{s__tl} \meta{catcode N} \meta{char code N} \cs{s__tl}
+% \end{quote}
+% Most but not all tokens can be grabbed as an undelimited
+% (\texttt{N}-type) argument by \TeX{}. The plan is to have a two pass
+% system. In the first pass, locate special tokens, and store them in
+% various \tn{toks} registers. In the second pass, which is done within
+% an \texttt{x}-expanding assignment, normal tokens are taken in as
+% \texttt{N}-type arguments, and special tokens are retrieved from the
+% \tn{toks} registers, and removed from the input stream by some means.
+% The whole process takes linear time, because we avoid building the
+% result one item at a time.
+%
+% To ease the difficult first pass, we first do some setup with
+% \cs{@@_setup:n}. Active characters set equal to non-active
+% characters cause trouble, so we disable all active characters by
+% setting them equal to \texttt{undefined} locally. We also set there
+% the escape character to be printable (backslash, but this later
+% oscillates between slash and backslash): this makes it possible to
+% distinguish characters from control sequences.
+%
+% A token has two characteristics: its \tn{meaning}, and what it looks
+% like for \TeX{} when it is in scanning mode (\emph{e.g.}, when
+% capturing parameters for a macro). For our purposes, we distinguish
+% the following meanings:
+% \begin{itemize}
+% \item begin-group token (category code $1$), either space (character
+% code $32$), or non-space;
+% \item end-group token (category code $2$), either space (character
+% code $32$), or non-space;
+% \item space token (category code $10$, character code $32$);
+% \item anything else (then the token is always an \texttt{N}-type
+% argument).
+% \end{itemize}
+% The token itself can \enquote{look like} one of the following
+% \begin{itemize}
+% \item a non-active character, in which case its meaning is
+% automatically that associated to its character code and category
+% code, we call it \enquote{true} character;
+% \item an active character (we eliminate those in the setup step);
+% \item a control sequence.
+% \end{itemize}
+% The only tokens which are not valid \texttt{N}-type arguments are true
+% begin-group characters, true end-group characters, and true spaces.
+% We will detect those characters by scanning ahead with \tn{futurelet},
+% then distinguishing true characters from control sequences set equal
+% to them using the \tn{string} representation.
+%
+% The second pass is a simple exercise in expandable loops.
+%
+% \begin{macro}[int]{\@@:n}
+% Everything is done within a group, and all definitions will be
+% local. We use \cs{group_align_safe_begin/end:} to avoid problems in
+% case \cs{@@:n} is used within an alignment and its argument
+% contains alignment tab tokens.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@:n #1
+ {
+ \group_begin:
+ \group_align_safe_begin:
+ \@@_setup:n {#1}
+ \@@_a:n {#1}
+ \@@_b:n {#1}
+ \group_align_safe_end:
+ \group_end:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \subsection{Setup}
+%
+% \begin{macro}[int]{\@@_setup:n}
+% \begin{macro}[aux]{\@@_disable_loop:N}
+% Active characters can cause problems later on in the processing,
+% so the first step is to disable them, by setting them to
+% \texttt{undefined}. Since Unicode contains too many characters
+% to loop over all of them, we instead loop over the input token
+% list as a string: any active character in the token list
+% must appear in its string representation. The string is shortened
+% a little by making the escape character unprintable. The active
+% space must be disabled separately (the loop skips over it otherwise),
+% and we end the loop by feeding an odd non-\texttt{N}-type argument
+% to the looping macro. For \pTeX{} and \upTeX{} we skip characters
+% beyond $[0,255]$ because \tn{lccode} only allows those values.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_setup:n #1
+ {
+ \int_set:Nn \tex_escapechar:D { -1 }
+ \exp_after:wN \@@_disable_loop:N
+ \tl_to_str:n {#1} { ~ } { ? \__prg_break: }
+ \__prg_break_point:
+ \scan_stop:
+ }
+\group_begin:
+ \char_set_catcode_active:N \^^@
+ \cs_new_protected:Npn \@@_disable_loop:N #1
+ {
+ \tex_lccode:D 0 = `#1 ~
+ \tex_lowercase:D { \tex_let:D ^^@ } \tex_undefined:D
+ \@@_disable_loop:N
+ }
+ \cs_if_exist:NT \ptex_kanjiskip:D
+ {
+ \cs_gset_protected:Npn \@@_disable_loop:N #1
+ {
+ \use_none:n #1 \scan_stop:
+ \if_int_compare:w 256 > `#1 \exp_stop_f:
+ \tex_lccode:D 0 = `#1 ~
+ \tex_lowercase:D { \tex_let:D ^^@ } \tex_undefined:D
+ \fi:
+ \@@_disable_loop:N
+ }
+ }
+\group_end:
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \subsection{First pass}
+%
+% The goal of this pass is to detect special (non-\texttt{N}-type) tokens,
+% and count how many \texttt{N}-type tokens lie between special tokens.
+% Also, we wish to store some representation of each special token
+% in a \tn{toks} register.
+%
+% After the setup step, we have $11$ types of tokens:
+% \begin{itemize}
+% \item[1.] a true non-space begin-group character;
+% \item[2.] a true space begin-group character;
+% \item[3.] a true non-space end-group character;
+% \item[4.] a true space end-group character;
+% \item[5.] a true space blank space character;
+% \item[6.] an undefined active character;
+% \item[7.] any other true character;
+% \item[8.] a control sequence equal to a begin-group token (category code $1$);
+% \item[9.] a control sequence equal to an end-group token (category code $2$);
+% \item[10.] a control sequence equal to a space token
+% (character code $32$, category code $10$);
+% \item[11.] any other control sequence.
+% \end{itemize}
+% Our first tool is \tn{futurelet}. This cannot distinguish
+% case $8$ from $1$ or $2$, nor case $9$ from $3$ or $4$,
+% nor case $10$ from case $5$. Those cases will be distinguished
+% by applying the \tn{string} primitive to the following token,
+% after possibly changing the escape character to ensure that
+% a control sequence's string representation cannot be mistaken
+% for the true character.
+%
+% In cases $6$, $7$, and $11$, the following token is a valid
+% \texttt{N}-type argument, so we grab it and distinguish the case
+% of a character from a control sequence: in the latter case,
+% \cs{str_tail:n} \Arg{token} is non-empty, because the
+% escape character is printable.
+%
+% \begin{macro}[int]{\@@_a:n}
+% We read tokens one by one using \tn{futurelet}.
+% While performing the loop, we keep track of the number of
+% true begin-group characters minus the number of
+% true end-group characters in \cs{l_@@_nesting_int}.
+% This reaches $-1$ when we read the closing brace.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_a:n #1
+ {
+ \int_set:Nn \tex_escapechar:D { 92 }
+ \int_zero:N \l_@@_normal_int
+ \int_zero:N \l_@@_index_int
+ \int_zero:N \l_@@_nesting_int
+ \if_false: { \fi: \@@_a_loop:w #1 }
+ \int_decr:N \l_@@_index_int
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_a_loop:w}
+% Read one character and check its type.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_a_loop:w
+ { \tex_futurelet:D \l_@@_token \@@_a_type:w }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_a_type:w}
+% At this point, \cs{l_@@_token} holds the meaning
+% of the following token. We store in \cs{l_@@_type_int}
+% the meaning of the token ahead:
+% \begin{itemize}
+% \item 0 space token;
+% \item 1 begin-group token;
+% \item -1 end-group token;
+% \item 2 other.
+% \end{itemize}
+% The values $0$, $1$, $-1$ correspond to how much a true such
+% character changes the nesting level ($2$ is used only here,
+% and is irrelevant later). Then call the auxiliary for each case.
+% Note that nesting conditionals here is safe because we only skip
+% over \cs{l_@@_token} if it matches with one of the
+% character tokens (hence is not a primitive conditional).
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_a_type:w
+ {
+ \l_@@_type_int =
+ \if_meaning:w \l_@@_token \c_space_token
+ 0
+ \else:
+ \if_catcode:w \exp_not:N \l_@@_token \c_group_begin_token
+ 1
+ \else:
+ \if_catcode:w \exp_not:N \l_@@_token \c_group_end_token
+ - 1
+ \else:
+ 2
+ \fi:
+ \fi:
+ \fi:
+ \exp_stop_f:
+ \if_case:w \l_@@_type_int
+ \exp_after:wN \@@_a_space:w
+ \or: \exp_after:wN \@@_a_bgroup:w
+ \or: \exp_after:wN \@@_a_safe:N
+ \else: \exp_after:wN \@@_a_egroup:w
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_a_space:w}
+% \begin{macro}[aux]{\@@_a_space_test:w}
+% In this branch, the following token's meaning is a blank space.
+% Apply \tn{string} to that token: if it is a control sequence
+% the result starts with the escape character; otherwise it is
+% a true blank space, whose string representation is also a blank space.
+% We test for that in \cs{@@_a_space_test:w},
+% after grabbing as \cs{l_@@_char_token} the first character
+% of the string representation.
+% Also, since \cs{@@_a_store:} expects the special token to be
+% stored in the relevant \tn{toks} register, we do that. The extra
+% \cs{exp_not:n} is unnecessary of course, but it makes the treatment
+% of all tokens more homogeneous.
+% If we discover that the next token was actually a control sequence
+% instead of a true space, then we step the counter of normal tokens.
+% We now have in front of us the whole string representation of
+% the control sequence, including potential spaces; those will appear
+% to be true spaces later in this pass. Hence, all other branches of
+% the code in this first pass need to consider the string representation,
+% so that the second pass does not need to test the meaning of tokens,
+% only strings.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_a_space:w
+ {
+ \tex_afterassignment:D \@@_a_space_test:w
+ \exp_after:wN \cs_set_eq:NN
+ \exp_after:wN \l_@@_char_token
+ \token_to_str:N
+ }
+\cs_new_protected:Npn \@@_a_space_test:w
+ {
+ \if_meaning:w \l_@@_char_token \c_space_token
+ \tex_toks:D \l_@@_index_int { \exp_not:n { ~ } }
+ \@@_a_store:
+ \else:
+ \int_incr:N \l_@@_normal_int
+ \fi:
+ \@@_a_loop:w
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_a_bgroup:w, \@@_a_egroup:w}
+% \begin{macro}[aux]{\@@_a_group:nw}
+% \begin{macro}[aux]{\@@_a_group_test:w}
+% The token might be either a true character token with
+% catcode $1$ or $2$, or it could be a control sequence.
+% The only tricky case is if the character code happens
+% to be equal to the escape character: then we change
+% the escape character from backslash to solidus or back,
+% so that the string representation of the true character
+% and of a control sequence set equal to it start differently.
+% Then probe what the first character of that string
+% representation is: this is the place where we need
+% \cs{l_@@_char_token} to be a separate control
+% sequence from \cs{l_@@_token}, to compare them.
+% \begin{macrocode}
+\group_begin:
+ \char_set_catcode_group_begin:N \^^@
+ \char_set_catcode_group_end:N \^^E
+ \cs_new_protected:Npn \@@_a_bgroup:w
+ { \@@_a_group:nw { \exp_after:wN ^^@ \if_false: ^^E \fi: } }
+ \char_set_catcode_group_begin:N \^^B
+ \char_set_catcode_group_end:N \^^@
+ \cs_new_protected:Npn \@@_a_egroup:w
+ { \@@_a_group:nw { \if_false: ^^B \fi: ^^@ } }
+\group_end:
+\cs_new_protected:Npn \@@_a_group:nw #1
+ {
+ \tex_lccode:D 0 = \@@_extract_charcode: \scan_stop:
+ \tex_lowercase:D { \tex_toks:D \l_@@_index_int {#1} }
+ \if_int_compare:w \tex_lccode:D 0 = \tex_escapechar:D
+ \int_set:Nn \tex_escapechar:D { 139 - \tex_escapechar:D }
+ \fi:
+ \tex_afterassignment:D \@@_a_group_test:w
+ \exp_after:wN \cs_set_eq:NN
+ \exp_after:wN \l_@@_char_token
+ \token_to_str:N
+ }
+\cs_new_protected:Npn \@@_a_group_test:w
+ {
+ \if_charcode:w \l_@@_token \l_@@_char_token
+ \@@_a_store:
+ \else:
+ \int_incr:N \l_@@_normal_int
+ \fi:
+ \@@_a_loop:w
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_a_store:}
+% This function is called each time we meet a special token;
+% at this point, the \tn{toks} register \cs{l_@@_index_int}
+% holds a token list which expands to the given special token.
+% Also, the value of \cs{l_@@_type_int} indicates which case
+% we are in:
+% \begin{itemize}
+% \item -1 end-group character;
+% \item 0 space character;
+% \item 1 begin-group character.
+% \end{itemize}
+% We need to distinguish further the case of a space character
+% (code $32$) from other character codes, because those will
+% behave differently in the second pass. Namely, after testing
+% the \tn{lccode} of $0$ (which holds the present character code)
+% we change the cases above to
+% \begin{itemize}
+% \item -2 space end-group character;
+% \item -1 non-space end-group character;
+% \item 0 space blank space character;
+% \item 1 non-space begin-group character;
+% \item 2 space begin-group character.
+% \end{itemize}
+% This has the property that non-space characters correspond to odd
+% values of \cs{l_@@_type_int}.
+% The number of normal tokens, and the type of special token,
+% are packed into a \tn{skip} register.
+% Finally, we check whether we reached the last closing brace, in which
+% case we stop by disabling the looping function (locally).
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_a_store:
+ {
+ \tex_advance:D \l_@@_nesting_int \l_@@_type_int
+ \if_int_compare:w \tex_lccode:D 0 = `\ \exp_stop_f:
+ \tex_multiply:D \l_@@_type_int 2 \exp_stop_f:
+ \fi:
+ \tex_skip:D \l_@@_index_int
+ = \l_@@_normal_int sp plus \l_@@_type_int sp \scan_stop:
+ \int_incr:N \l_@@_index_int
+ \int_zero:N \l_@@_normal_int
+ \if_int_compare:w \l_@@_nesting_int = -1 \exp_stop_f:
+ \cs_set_eq:NN \@@_a_loop:w \scan_stop:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int]{\@@_a_safe:N}
+% \begin{macro}[aux]{\@@_a_cs:ww}
+% This should be the simplest case: since the upcoming token is safe,
+% we can simply grab it in a second pass. However, other branches of
+% the code must pass their tokens through \tn{string}, hence we do it
+% here as well, with some optimizations. If the token is a single
+% character (including space), the \cs{if_charcode:w} test yields
+% true, and we simply count one \enquote{normal} token. On the other
+% hand, if the token is a control sequence, we should replace it by
+% its string representation for compatibility with other code
+% branches. Instead of slowly looping through the characters with
+% the main code, we use the knowledge of how the second pass works:
+% if the control sequence name contains no space, count that token
+% as a number of normal tokens equal to its string length. If the
+% control sequence contains spaces, they should be registered as
+% special characters by increasing \cs{l_@@_index_int}
+% (no need to carefully count character between each space), and
+% all characters after the last space should be counted in the
+% following sequence of \enquote{normal} tokens.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_a_safe:N #1
+ {
+ \if_charcode:w
+ \scan_stop:
+ \exp_after:wN \use_none:n \token_to_str:N #1 \prg_do_nothing:
+ \scan_stop:
+ \int_incr:N \l_@@_normal_int
+ \else:
+ \@@_cs_space_count:NN \@@_a_cs:ww #1
+ \fi:
+ \@@_a_loop:w
+ }
+\cs_new_protected:Npn \@@_a_cs:ww #1; #2;
+ {
+ \if_int_compare:w #1 > 0 \exp_stop_f:
+ \tex_skip:D \l_@@_index_int
+ = \__int_eval:w \l_@@_normal_int + 1 sp \scan_stop:
+ \tex_advance:D \l_@@_index_int #1 \exp_stop_f:
+ \l_@@_normal_int #2 \exp_stop_f:
+ \else:
+ \tex_advance:D \l_@@_normal_int #2 \exp_stop_f:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \subsection{Second pass}
+%
+% The second pass is an exercise in expandable loops.
+% All the necessary information is stored in \tn{skip}
+% and \tn{toks} registers.
+%
+% \begin{macro}[int]{\@@_b:n}
+% \begin{macro}[int, EXP]{\@@_b_loop:w}
+% Start the loop with the index $0$. No need for an end-marker:
+% the loop will stop by itself when the last index is read.
+% We will repeatedly oscillate between reading long stretches
+% of normal tokens, and reading special tokens.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_b:n #1
+ {
+ \tl_gset:Nx \g_@@_result_tl
+ {
+ \@@_b_loop:w 0; #1
+ \__prg_break_point:
+ }
+ }
+\cs_new:Npn \@@_b_loop:w #1;
+ {
+ \exp_after:wN \@@_b_normals:ww
+ \__int_value:w \tex_skip:D #1 ; #1 ;
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int, EXP]{\@@_b_normals:ww}
+% \begin{macro}[aux, EXP]{\@@_b_normal:wwN}
+% The first argument is the number of normal tokens which remain
+% to be read, and the second argument is the index in the array
+% produced in the first step.
+% A character's string representation is always one character long,
+% while a control sequence is always longer (we have set the escape
+% character to a printable value). In both cases, we leave
+% \cs{exp_not:n} \Arg{token} \cs{s__tl} in the input stream
+% (after \texttt{x}-expansion). Here, \cs{exp_not:n} is used
+% rather than \cs{exp_not:N} because |#3| could be \cs{s__tl},
+% hence must be hidden behind braces in the result.
+% \begin{macrocode}
+\cs_new:Npn \@@_b_normals:ww #1;
+ {
+ \if_int_compare:w #1 = 0 \exp_stop_f:
+ \@@_b_special:w
+ \fi:
+ \@@_b_normal:wwN #1;
+ }
+\cs_new:Npn \@@_b_normal:wwN #1; #2; #3
+ {
+ \exp_not:n { \exp_not:n { #3 } } \s__tl
+ \if_charcode:w
+ \scan_stop:
+ \exp_after:wN \use_none:n \token_to_str:N #3 \prg_do_nothing:
+ \scan_stop:
+ \exp_after:wN \@@_b_char:Nww
+ \else:
+ \exp_after:wN \@@_b_cs:Nww
+ \fi:
+ #3 #1; #2;
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int, EXP]{\@@_b_char:Nww}
+% If the normal token we grab is a character, leave
+% \meta{catcode} \meta{charcode} followed by \cs{s__tl}
+% in the input stream, and call \cs{@@_b_normals:ww}
+% with its first argument decremented.
+% \begin{macrocode}
+\cs_new:Npx \@@_b_char:Nww #1
+ {
+ \exp_not:N \if_meaning:w #1 \exp_not:N \tex_undefined:D
+ \token_to_str:N D \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_catcode_other_token
+ \token_to_str:N C \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_catcode_letter_token
+ \token_to_str:N B \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_math_toggle_token 3 \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_alignment_token 4 \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_math_superscript_token 7 \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_math_subscript_token 8 \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_space_token
+ \token_to_str:N A \exp_not:N \else:
+ 6
+ \exp_not:n { \fi: \fi: \fi: \fi: \fi: \fi: \fi: \fi: }
+ \exp_not:N \__int_value:w `#1 \s__tl
+ \exp_not:N \exp_after:wN \exp_not:N \@@_b_normals:ww
+ \exp_not:N \__int_value:w \exp_not:N \__int_eval:w - 1 +
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[int, EXP]{\@@_b_cs:Nww}
+% \begin{macro}[aux, EXP]{\@@_b_cs_test:ww}
+% If the token we grab is a control sequence, leave
+% |0 -1| (as category code and character code) in the input stream,
+% followed by \cs{s__tl},
+% and call \cs{@@_b_normals:ww} with updated arguments.
+% \begin{macrocode}
+\cs_new:Npn \@@_b_cs:Nww #1
+ {
+ 0 -1 \s__tl
+ \@@_cs_space_count:NN \@@_b_cs_test:ww #1
+ }
+\cs_new:Npn \@@_b_cs_test:ww #1 ; #2 ; #3 ; #4 ;
+ {
+ \exp_after:wN \@@_b_normals:ww
+ \__int_value:w \__int_eval:w
+ \if_int_compare:w #1 = 0 \exp_stop_f:
+ #3
+ \else:
+ \tex_skip:D \__int_eval:w #4 + #1 \__int_eval_end:
+ \fi:
+ - #2
+ \exp_after:wN ;
+ \__int_value:w \__int_eval:w #4 + #1 ;
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[int, EXP]{\@@_b_special:w}
+% \begin{macro}[aux, EXP]{\@@_b_special_char:wN}
+% \begin{macro}[aux, EXP]{\@@_b_special_space:w}
+% Here, |#1| is the current index in the array built in the first pass.
+% Check now whether we reached the end (we shouldn't keep the trailing
+% end-group character that marked the end of the token list in the
+% first pass).
+% Unpack the \tn{toks} register: when \texttt{x}-expanding again,
+% we will get the special token.
+% Then leave the category code in the input stream, followed by
+% the character code, and call \cs{@@_b_loop:w} with the next index.
+% \begin{macrocode}
+\group_begin:
+ \char_set_catcode_other:N A
+ \cs_new:Npn \@@_b_special:w
+ \fi: \@@_b_normal:wwN 0 ; #1 ;
+ {
+ \fi:
+ \if_int_compare:w #1 = \l_@@_index_int
+ \exp_after:wN \__prg_break:
+ \fi:
+ \tex_the:D \tex_toks:D #1 \s__tl
+ \if_case:w \etex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
+ A
+ \or: 1
+ \or: 1
+ \else: 2
+ \fi:
+ \if_int_odd:w \etex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
+ \exp_after:wN \@@_b_special_char:wN \__int_value:w
+ \else:
+ \exp_after:wN \@@_b_special_space:w \__int_value:w
+ \fi:
+ \__int_eval:w 1 + #1 \exp_after:wN ;
+ \token_to_str:N
+ }
+\group_end:
+\cs_new:Npn \@@_b_special_char:wN #1 ; #2
+ {
+ \__int_value:w `#2 \s__tl
+ \@@_b_loop:w #1 ;
+ }
+\cs_new:Npn \@@_b_special_space:w #1 ; ~
+ {
+ 32 \s__tl
+ \@@_b_loop:w #1 ;
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
+% \subsection{Mapping through the analysis}
+%
+% \begin{macro}[int]{\@@_map_inline:nn}
+% \begin{macro}[aux]{\@@_map_inline_aux:Nn}
+% First obtain the analysis of the token list into
+% \cs{g_@@_result_tl}. To allow nested mappings, increase the
+% nesting depth \cs{g__prg_map_int} (shared between all modules), then
+% define the looping macro, which has a name specific to that nesting
+% depth. That looping grabs the \meta{tokens}, \meta{catcode} and
+% \meta{char code}; it checks for the end of the loop with
+% \cs{use_none:n} |##2|, normally empty, but which becomes
+% \cs{tl_map_break:} at the end; it then performs the user's code
+% |#2|, and loops by calling itself. When the loop ends, remember to
+% decrease the nesting depth.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_map_inline:nn #1
+ {
+ \@@:n {#1}
+ \int_gincr:N \g__prg_map_int
+ \exp_args:Nc \@@_map_inline_aux:Nn
+ { @@_map_inline_ \int_use:N \g__prg_map_int :wNw }
+ }
+\cs_new_protected:Npn \@@_map_inline_aux:Nn #1#2
+ {
+ \cs_gset_protected:Npn #1 ##1 \s__tl ##2 ##3 \s__tl
+ {
+ \use_none:n ##2
+ #2
+ #1
+ }
+ \exp_after:wN #1
+ \g_@@_result_tl
+ \s__tl { ? \tl_map_break: } \s__tl
+ \__prg_break_point:Nn \tl_map_break: { \int_gdecr:N \g__prg_map_int }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \subsection{Showing the results}
+%
+% \begin{macro}{\tl_show_analysis:N, \tl_show_analysis:n}
+% \begin{macro}[int]{\@@_show:}
+% Add to \cs{@@:n} a third pass to display tokens to the terminal.
+% If the token list variable is not defined, throw the same error
+% as \cs{tl_show:N} by simply calling that function.
+% \begin{macrocode}
+\cs_new_protected:Npn \tl_show_analysis:N #1
+ {
+ \tl_if_exist:NTF #1
+ {
+ \exp_args:No \@@:n {#1}
+ \__msg_show_pre:nnxxxx { LaTeX / kernel } { show-tl-analysis }
+ { \token_to_str:N #1 } { \tl_if_empty:NTF #1 { } { ? } } { } { }
+ \@@_show:
+ }
+ { \tl_show:N #1 }
+ }
+\cs_new_protected:Npn \tl_show_analysis:n #1
+ {
+ \@@:n {#1}
+ \__msg_show_pre:nnxxxx { LaTeX / kernel } { show-tl-analysis }
+ { } { \tl_if_empty:nTF {#1} { } { ? } } { } { }
+ \@@_show:
+ }
+\cs_new_protected:Npn \@@_show:
+ {
+ \group_begin:
+ \exp_args:NNx
+ \group_end:
+ \__msg_show_wrap:n
+ {
+ \exp_after:wN \@@_show_loop:wNw \g_@@_result_tl
+ \s__tl { ? \__prg_break: } \s__tl
+ \__prg_break_point:
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[aux, rEXP]{\@@_show_loop:wNw}
+% Here, |#1| \texttt{o}- and \texttt{x}-expands to the token;
+% |#2| is the category code (one uppercase hexadecimal digit),
+% $0$ for control sequences;
+% |#3| is the character code, which we ignore.
+% In the cases of control sequences and active characters,
+% the meaning may overflow one line, and we want to truncate
+% it. Those cases are thus separated out.
+% \begin{macrocode}
+\cs_new:Npn \@@_show_loop:wNw #1 \s__tl #2 #3 \s__tl
+ {
+ \use_none:n #2
+ \exp_not:n { \\ > \ \ }
+ \if_int_compare:w "#2 = 0 \exp_stop_f:
+ \exp_after:wN \@@_show_cs:n
+ \else:
+ \if_int_compare:w "#2 = 13 \exp_stop_f:
+ \exp_after:wN \exp_after:wN
+ \exp_after:wN \@@_show_active:n
+ \else:
+ \exp_after:wN \exp_after:wN
+ \exp_after:wN \@@_show_normal:n
+ \fi:
+ \fi:
+ {#1}
+ \@@_show_loop:wNw
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux, rEXP]{\@@_show_normal:n}
+% Non-active characters are a simple matter of printing
+% the character, and its meaning. Our test suite checks that
+% begin-group and end-group characters do not mess up
+% \TeX{}'s alignment status.
+% \begin{macrocode}
+\cs_new:Npn \@@_show_normal:n #1
+ {
+ \exp_after:wN \token_to_str:N #1 ~
+ ( \exp_after:wN \token_to_meaning:N #1 )
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[EXP]{\@@_show_value:N}
+% This expands to the value of |#1| if it has any.
+% \begin{macrocode}
+\cs_new:Npn \@@_show_value:N #1
+ {
+ \token_if_expandable:NF #1
+ {
+ \token_if_chardef:NTF #1 \__prg_break: { }
+ \token_if_mathchardef:NTF #1 \__prg_break: { }
+ \token_if_dim_register:NTF #1 \__prg_break: { }
+ \token_if_int_register:NTF #1 \__prg_break: { }
+ \token_if_skip_register:NTF #1 \__prg_break: { }
+ \token_if_toks_register:NTF #1 \__prg_break: { }
+ \use_none:nnn
+ \__prg_break_point:
+ \use:n { \exp_after:wN = \tex_the:D #1 }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[aux, rEXP]{\@@_show_cs:n}
+% \begin{macro}[aux, rEXP]{\@@_show_active:n}
+% \begin{macro}[aux, rEXP]{\@@_show_long:nn}
+% \begin{macro}[aux, rEXP]{\@@_show_long_aux:nnnn}
+% Control sequences and active characters are printed in the same way,
+% making sure not to go beyond the \cs{l_iow_line_count_int}. In case
+% of an overflow, we replace the last characters by
+% \cs{c_@@_show_etc_str}.
+% \begin{macrocode}
+\cs_new:Npn \@@_show_cs:n #1
+ { \exp_args:No \@@_show_long:nn {#1} { control~sequence= } }
+\cs_new:Npn \@@_show_active:n #1
+ { \exp_args:No \@@_show_long:nn {#1} { active~character= } }
+\cs_new:Npn \@@_show_long:nn #1
+ {
+ \@@_show_long_aux:oofn
+ { \token_to_str:N #1 }
+ { \token_to_meaning:N #1 }
+ { \@@_show_value:N #1 }
+ }
+\cs_new:Npn \@@_show_long_aux:nnnn #1#2#3#4
+ {
+ \int_compare:nNnTF
+ { \str_count:n { #1 ~ ( #4 #2 #3 ) } }
+ > { \l_iow_line_count_int - 3 }
+ {
+ \str_range:nnn { #1 ~ ( #4 #2 #3 ) } { 1 }
+ {
+ \l_iow_line_count_int - 3
+ - \str_count:N \c_@@_show_etc_str
+ }
+ \c_@@_show_etc_str
+ }
+ { #1 ~ ( #4 #2 #3 ) }
+ }
+\cs_generate_variant:Nn \@@_show_long_aux:nnnn { oof }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
+% \subsection{Messages}
+%
+% \begin{variable}{\c_@@_show_etc_str}
+% When a control sequence (or active character)
+% and its meaning are too long to fit in one line
+% of the terminal, the end is replaced by this token list.
+% \begin{macrocode}
+\tl_const:Nx \c_@@_show_etc_str % (
+ { \token_to_str:N \ETC.) }
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{macrocode}
+\__msg_kernel_new:nnn { kernel } { show-tl-analysis }
+ {
+ The~token~list~ \tl_if_empty:nF {#1} { #1 ~ }
+ \tl_if_empty:nTF {#2}
+ { is~empty }
+ { contains~the~tokens: }
+ }
+% \end{macrocode}
+%
+% \begin{macrocode}
+%</initex|package>
+% \end{macrocode}
+%
+% \end{implementation}
+%
+% \PrintIndex
Property changes on: trunk/Master/texmf-dist/source/latex/l3kernel/l3tl-analysis.dtx
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: trunk/Master/texmf-dist/source/latex/l3kernel/l3tl-build.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3tl-build.dtx (rev 0)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3tl-build.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -0,0 +1,280 @@
+% \iffalse meta-comment
+%
+%% File: l3tl-build.dtx Copyright (C) 2011-2017 The LaTeX3 Project
+%
+% It may be distributed and/or modified under the conditions of the
+% LaTeX Project Public License (LPPL), either version 1.3c of this
+% license or (at your option) any later version. The latest version
+% of this license is in the file
+%
+% http://www.latex-project.org/lppl.txt
+%
+% This file is part of the "l3kernel bundle" (The Work in LPPL)
+% and all files in that bundle must be distributed together.
+%
+% -----------------------------------------------------------------------
+%
+% The development version of the bundle can be found at
+%
+% https://github.com/latex3/latex3
+%
+% for those people who are interested.
+%
+%<*driver>
+\documentclass[full]{l3doc}
+\begin{document}
+ \DocInput{\jobname.dtx}
+\end{document}
+%</driver>
+% \fi
+%
+%
+% \title{^^A
+% The \textsf{l3tl-build} package: building token lists^^A
+% }
+%
+% \author{^^A
+% The \LaTeX3 Project\thanks
+% {^^A
+% E-mail:
+% \href{mailto:latex-team at latex-project.org}
+% {latex-team at latex-project.org}^^A
+% }^^A
+% }
+%
+% \date{Released 2017/05/29}
+%
+% \maketitle
+%
+% \begin{documentation}
+%
+% \section{\pkg{l3tl-build} documentation}
+%
+% This module provides no user function: it is meant for kernel use
+% only.
+%
+% There are two main ways of building token lists from individual
+% tokens. Either in one go within an \texttt{x}-expanding assignment, or
+% by repeatedly using \cs{tl_put_right:Nn}. The first method takes a
+% linear time, but only allows expandable operations. The second method
+% takes a time quadratic in the length of the token list, but allows
+% expandable and non-expandable operations.
+%
+% The goal of this module is to provide functions to build a token list
+% piece by piece in linear time, while allowing non-expandable
+% operations. This is achieved by abusing \tn{toks}: adding some tokens
+% to the token list is done by storing them in a free token register
+% (time $O(1)$ for each such operation). Those token registers are only
+% put together at the end, within an \texttt{x}-expanding assignment,
+% which takes a linear time.\footnote{If we run out of token registers,
+% then the currently filled-up \tn{toks} are put together in a
+% temporary token list, and cleared, and we ultimately use
+% \cs{tl_put_right:Nx} to put those chunks together. Hence the true
+% asymptotic is quadratic, with a very small constant.} Of course,
+% all this must be done in a group: we can't go and clobber the values
+% of legitimate \tn{toks} used by \LaTeXe{}.
+%
+% Since none of the current applications need the ability to insert
+% material on the left of the token list, I have not implemented
+% that. This could be done for instance by using odd-numbered \tn{toks}
+% for the left part, and even-numbered \tn{toks} for the right part.
+%
+% \subsection{Internal functions}
+%
+% \begin{function}
+% {
+% \__tl_build:Nw, \__tl_gbuild:Nw,
+% \__tl_build_x:Nw, \__tl_gbuild_x:Nw
+% }
+% \begin{syntax}
+% \cs{__tl_build:Nw} \meta{tl~var} \texttt{\ldots{}}
+% \cs{__tl_build_one:n} \Arg{tokens_1} \texttt{\ldots{}}
+% \cs{__tl_build_one:n} \Arg{tokens_2} \texttt{\ldots{}}
+% \ldots{}
+% \cs{__tl_build_end:}
+% \end{syntax}
+% Defines the \meta{tl~var} to contain the contents of \meta{tokens1}
+% followed by \meta{tokens2}, \emph{etc.} This is built in such a way
+% to be more efficient than repeatedly using \cs{tl_put_right:Nn}. The
+% code in \enquote{\texttt{\ldots{}}} does not need to be
+% expandable. The commands \cs{__tl_build:Nw} and \cs{__tl_build_end:}
+% start and end a group. The assignment to the \meta{tl~var} occurs
+% just after the end of that group, using \cs{tl_set:Nn},
+% \cs{tl_gset:Nn}, \cs{tl_set:Nx}, or \cs{tl_gset:Nx}.
+% \end{function}
+%
+% \begin{function}{\__tl_build_one:n, \__tl_build_one:o, \__tl_build_one:x}
+% \begin{syntax}
+% \cs{__tl_build_one:n} \Arg{tokens}
+% \end{syntax}
+% This function may only be used within the scope of a
+% \cs{__tl_build:Nw} function. It adds the \meta{tokens} on the
+% right of the current token list.
+% \end{function}
+%
+% \begin{function}{\__tl_build_end:}
+% Ends the scope started by \cs{__tl_build:Nw}, and performs the
+% relevant assignment.
+% \end{function}
+%
+% \end{documentation}
+%
+% \begin{implementation}
+%
+% \section{\pkg{l3tl-build} implementation}
+%
+% \begin{macrocode}
+%<*initex|package>
+% \end{macrocode}
+%
+% \begin{macrocode}
+%<@@=tl_build>
+% \end{macrocode}
+%
+% \subsection{Variables and helper functions}
+%
+% \begin{variable}{\l_@@_start_index_int, \l_@@_index_int}
+% Integers pointing to the starting index (currently always starts at
+% zero), and the current index. The corresponding \tn{toks} are
+% accessed directly by number.
+% \begin{macrocode}
+\int_new:N \l_@@_start_index_int
+\int_new:N \l_@@_index_int
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_result_tl}
+% The resulting token list is normally built in one go by unpacking
+% all \tn{toks} in some range. In the rare cases where there are too
+% many \cs{@@_one:n} commands, leading to the depletion of
+% registers, the contents of the current set of \tn{toks} is unpacked
+% into \cs{l_@@_result_tl}. This prevents overflow from
+% affecting the end-user (beyond an obvious performance hit).
+% \begin{macrocode}
+\tl_new:N \l_@@_result_tl
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{macro}{\@@_unpack:}
+% \begin{macro}[aux, EXP]{\@@_unpack_loop:w}
+% The various pieces of the token list are built in \tn{toks} from the
+% \texttt{start_index} (inclusive) to the (current) \texttt{index}
+% (excluded). Those \tn{toks} are unpacked and stored in order in the
+% \texttt{result} token list. Optimizations would be possible here,
+% for instance, unpacking $10$ \tn{toks} at a time with a macro
+% expanding to |\the\toks#10...\the\toks#19|, but this should be kept
+% for much later.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_unpack:
+ {
+ \tl_put_right:Nx \l_@@_result_tl
+ {
+ \exp_after:wN \@@_unpack_loop:w
+ \int_use:N \l_@@_start_index_int ;
+ \__prg_break_point:
+ }
+ }
+\cs_new:Npn \@@_unpack_loop:w #1 ;
+ {
+ \if_int_compare:w #1 = \l_@@_index_int
+ \exp_after:wN \__prg_break:
+ \fi:
+ \tex_the:D \tex_toks:D #1 \exp_stop_f:
+ \exp_after:wN \@@_unpack_loop:w
+ \int_use:N \__int_eval:w #1 + 1 ;
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \subsection{Building the token list}
+%
+% \begin{macro}
+% {
+% \@@:Nw , \@@_x:Nw ,
+% \__tl_gbuild:Nw , \__tl_gbuild_x:Nw
+% }
+% \begin{macro}[aux]{\@@_aux:NNw}
+% Similar to what is done for coffins: redefine some command, here
+% \cs{@@_end_aux:n} to hold the relevant assignment (see
+% \cs{@@_end:} for details). Then initialize the start index and
+% the current index at zero, and empty the \texttt{result} token list.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@:Nw
+ { \@@_aux:NNw \tl_set:Nn }
+\cs_new_protected:Npn \@@_x:Nw
+ { \@@_aux:NNw \tl_set:Nx }
+\cs_new_protected:Npn \__tl_gbuild:Nw
+ { \@@_aux:NNw \tl_gset:Nn }
+\cs_new_protected:Npn \__tl_gbuild_x:Nw
+ { \@@_aux:NNw \tl_gset:Nx }
+\cs_new_protected:Npn \@@_aux:NNw #1#2
+ {
+ \group_begin:
+ \cs_set:Npn \@@_end_assignment:n
+ { \group_end: #1 #2 }
+ \int_zero:N \l_@@_start_index_int
+ \int_zero:N \l_@@_index_int
+ \tl_clear:N \l_@@_result_tl
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}{\@@_end:}
+% \begin{macro}[aux]{\@@_end_assignment:n}
+% When we are done building a token list, unpack all \tn{toks} into
+% the \texttt{result} token list, and expand this list before closing
+% the group. The \cs{@@_end_assignment:n} function is defined by
+% \cs{@@_aux:NNw} to end the group and hold the relevant
+% assignment. Its value outside is irrelevant, but just in case, we
+% set it to a function which would clean up the contents of
+% \cs{l_@@_result_tl}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_end:
+ {
+ \@@_unpack:
+ \exp_args:No
+ \@@_end_assignment:n \l_@@_result_tl
+ }
+\cs_new_eq:NN \@@_end_assignment:n \use_none:n
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}{\@@_one:n, \@@_one:o, \@@_one:x}
+% Store the tokens in a free \tn{toks}, then move the pointer to the
+% next one. If we overflow, unpack the current \tn{toks}, and reset
+% the current index, preparing to fill more \tn{toks}. This could be
+% optimized by avoiding to read |#1|, using \tn{afterassignment}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_one:n #1
+ {
+ \tex_toks:D \l_@@_index_int {#1}
+ \int_incr:N \l_@@_index_int
+ \if_int_compare:w \l_@@_index_int > \c_max_register_int
+ \@@_unpack:
+ \l_@@_index_int \l_@@_start_index_int
+ \fi:
+ }
+\cs_new_protected:Npn \@@_one:o #1
+ {
+ \tex_toks:D \l_@@_index_int \exp_after:wN {#1}
+ \int_incr:N \l_@@_index_int
+ \if_int_compare:w \l_@@_index_int > \c_max_register_int
+ \@@_unpack:
+ \l_@@_index_int \l_@@_start_index_int
+ \fi:
+ }
+\cs_new_protected:Npn \@@_one:x #1
+ { \use:x { \@@_one:n {#1} } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macrocode}
+%</initex|package>
+% \end{macrocode}
+%
+% \end{implementation}
+%
+% \PrintIndex
Property changes on: trunk/Master/texmf-dist/source/latex/l3kernel/l3tl-build.dtx
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3tl.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3tl.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3tl.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -605,7 +605,7 @@
%
% \section{Using token lists}
%
-% \begin{function}[EXP]{\tl_to_str:n}
+% \begin{function}[EXP]{\tl_to_str:n, \tl_to_str:V}
% \begin{syntax}
% \cs{tl_to_str:n} \Arg{token list}
% \end{syntax}
@@ -2267,8 +2267,11 @@
%
% \subsection{Using token lists}
%
-% \begin{macro}{\tl_to_str:n}
+% \begin{macro}{\tl_to_str:n, \tl_to_str:V}
% Another name for a primitive: defined in \pkg{l3basics}.
+% \begin{macrocode}
+\cs_generate_variant:Nn \tl_to_str:n { V }
+% \end{macrocode}
% \end{macro}
%
% \begin{macro}{\tl_to_str:N, \tl_to_str:c}
Modified: trunk/Master/texmf-dist/source/latex/l3kernel/l3token.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3kernel/l3token.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3kernel/l3token.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -41,7 +41,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
Modified: trunk/Master/texmf-dist/source/latex/l3packages/l3keys2e/l3keys2e.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3packages/l3keys2e/l3keys2e.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3packages/l3keys2e/l3keys2e.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -23,8 +23,8 @@
%<*driver|package>
% The version of expl3 required is tested as early as possible, as
% some really old versions do not define \ProvidesExplPackage.
-\RequirePackage{expl3}[2017/05/13]
-%<package>\@ifpackagelater{expl3}{2017/05/13}
+\RequirePackage{expl3}[2017/05/29]
+%<package>\@ifpackagelater{expl3}{2017/05/29}
%<package> {}
%<package> {%
%<package> \PackageError{l3keys2e}{Support package l3kernel too old}
@@ -60,7 +60,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -131,7 +131,7 @@
% \end{macrocode}
%
% \begin{macrocode}
-\ProvidesExplPackage{l3keys2e}{2017/05/13}{}
+\ProvidesExplPackage{l3keys2e}{2017/05/29}{}
{LaTeX2e option processing using LaTeX3 keys}
% \end{macrocode}
%
Modified: trunk/Master/texmf-dist/source/latex/l3packages/xfp/xfp.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3packages/xfp/xfp.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3packages/xfp/xfp.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -23,8 +23,8 @@
%<*driver|package>
% The version of expl3 required is tested as early as possible, as
% some really old versions do not define \ProvidesExplPackage.
-\RequirePackage{expl3}[2017/05/13]
-%<package>\@ifpackagelater{expl3}{2017/05/13}
+\RequirePackage{expl3}[2017/05/29]
+%<package>\@ifpackagelater{expl3}{2017/05/29}
%<package> {}
%<package> {%
%<package> \PackageError{xfpu}{Support package l3kernel too old}
@@ -62,7 +62,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -143,7 +143,7 @@
% \end{macrocode}
%
% \begin{macrocode}
-\ProvidesExplPackage{xfp}{2017/05/13}{}
+\ProvidesExplPackage{xfp}{2017/05/29}{}
{L3 Floating point unit}
% \end{macrocode}
%
Modified: trunk/Master/texmf-dist/source/latex/l3packages/xfrac/xfrac.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3packages/xfrac/xfrac.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3packages/xfrac/xfrac.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -24,8 +24,8 @@
%<*driver|package>
% The version of expl3 required is tested as early as possible, as
% some really old versions do not define \ProvidesExplPackage.
-\RequirePackage{expl3}[2017/05/13]
-%<package>\@ifpackagelater{expl3}{2017/05/13}
+\RequirePackage{expl3}[2017/05/29]
+%<package>\@ifpackagelater{expl3}{2017/05/29}
%<package> {}
%<package> {%
%<package> \PackageError{xfrac}{Support package l3kernel too old}
@@ -63,7 +63,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -533,7 +533,7 @@
% \end{macrocode}
%
% \begin{macrocode}
-\ProvidesExplPackage{xfrac}{2017/05/13}{}
+\ProvidesExplPackage{xfrac}{2017/05/29}{}
{L3 Experimental split-level fractions}
% \end{macrocode}
%
Modified: trunk/Master/texmf-dist/source/latex/l3packages/xparse/xparse.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3packages/xparse/xparse.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3packages/xparse/xparse.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -27,8 +27,8 @@
%<*driver|package>
% The version of expl3 required is tested as early as possible, as
% some really old versions do not define \ProvidesExplPackage.
-\RequirePackage{expl3}[2017/05/13]
-%<package>\@ifpackagelater{expl3}{2017/05/13}
+\RequirePackage{expl3}[2017/05/29]
+%<package>\@ifpackagelater{expl3}{2017/05/29}
%<package> {}
%<package> {%
%<package> \PackageError{xparse}{Support package l3kernel too old}
@@ -67,7 +67,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -776,7 +776,7 @@
% \end{macrocode}
%
% \begin{macrocode}
-\ProvidesExplPackage{xparse}{2017/05/13}{}
+\ProvidesExplPackage{xparse}{2017/05/29}{}
{L3 Experimental document command parser}
% \end{macrocode}
%
Modified: trunk/Master/texmf-dist/source/latex/l3packages/xtemplate/xtemplate.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/l3packages/xtemplate/xtemplate.dtx 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/source/latex/l3packages/xtemplate/xtemplate.dtx 2017-06-05 23:17:08 UTC (rev 44483)
@@ -27,8 +27,8 @@
%<*driver|package>
% The version of expl3 required is tested as early as possible, as
% some really old versions do not define \ProvidesExplPackage.
-\RequirePackage{expl3}[2017/05/13]
-%<package>\@ifpackagelater{expl3}{2017/05/13}
+\RequirePackage{expl3}[2017/05/29]
+%<package>\@ifpackagelater{expl3}{2017/05/29}
%<package> {}
%<package> {%
%<package> \PackageError{xtemplate}{Support package l3kernel too old}
@@ -63,7 +63,7 @@
% }^^A
% }
%
-% \date{Released 2017/05/13}
+% \date{Released 2017/05/29}
%
% \maketitle
%
@@ -682,7 +682,7 @@
% \end{macrocode}
%
% \begin{macrocode}
-\ProvidesExplPackage{xtemplate}{2017/05/13}{}
+\ProvidesExplPackage{xtemplate}{2017/05/29}{}
{L3 Experimental prototype document functions}
% \end{macrocode}
%
Modified: trunk/Master/texmf-dist/tex/latex/l3build/l3build.lua
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3build/l3build.lua 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3build/l3build.lua 2017-06-05 23:17:08 UTC (rev 44483)
@@ -23,7 +23,7 @@
--]]
-- Version information
-release_date = "2017/05/19"
+release_date = "2017/05/29"
-- "module" is a deprecated function in Lua 5.2: as we want the name
-- for other purposes, and it should eventually be 'free', simply
@@ -468,6 +468,15 @@
os_yes = "for /l %I in (1,1,200) do @echo y"
end
+-- Return an absolute path from a relative one
+function abspath(path)
+ local oldpwd = lfs.currentdir()
+ lfs.chdir(path)
+ local result = lfs.currentdir()
+ lfs.chdir(oldpwd)
+ return gsub(result, "\\", "/")
+end
+
-- For cleaning out a directory, which also ensures that it exists
function cleandir(dir)
local errorlevel = mkdir(dir)
@@ -566,15 +575,6 @@
end
end
--- Return an absolute path from a relative one
-function abspath(path)
- local oldpwd = lfs.currentdir()
- lfs.chdir(path)
- local result = lfs.currentdir()
- lfs.chdir(oldpwd)
- return result
-end
-
-- Rename
function ren(dir, source, dest)
local dir = dir .. "/"
@@ -1352,7 +1352,7 @@
.. checkopts .. " " .. asciiopt .. lvtfile
.. (hide and (" > " .. os_null) or "")
.. os_concat ..
- runtest_tasks(stripext(lvtfile))
+ runtest_tasks(jobname(lvtfile))
)
end
if makepdf and fileexists(testdir .. "/" .. name .. dviext) then
@@ -1402,12 +1402,6 @@
end
end
--- Strip the extension from a file name (if present)
-function stripext(file)
- local name = match(file, "^(.*)%.")
- return name or file
-end
-
-- Strip the path from a file name (if present)
function basename(file)
local name = match(file, "^.*/([^/]*)$")
@@ -1414,6 +1408,12 @@
return name or file
end
+-- Strip the extension from a file name (if present)
+function jobname(file)
+ local name = match(basename(file), "^(.*)%.")
+ return name or file
+end
+
-- Look for a test: could be in the testfiledir or the unpackdir
function testexists(test)
return(locate({testfiledir, unpackdir}, {test .. lvtext}))
@@ -1506,7 +1506,7 @@
end
function typesetpdf(file)
- local name = stripext(basename(file))
+ local name = jobname(file)
print("Typesetting " .. name)
local errorlevel = typeset(file)
if errorlevel == 0 then
@@ -1523,7 +1523,7 @@
if errorlevel ~= 0 then
return errorlevel
else
- local name = stripext(basename(file))
+ local name = jobname(file)
errorlevel = biber(name) + bibtex(name)
if errorlevel == 0 then
local function cycle(name)
@@ -1590,7 +1590,7 @@
-- No names passed: find all test files
if not next(names) then
for _,i in pairs(filelist(testfiledir, "*" .. lvtext)) do
- insert(names, stripext(i))
+ insert(names, jobname(i))
end
for _,i in ipairs(filelist(unpackdir, "*" .. lvtext)) do
if fileexists(testfiledir .. "/" .. i) then
@@ -1597,7 +1597,7 @@
print("Duplicate test file: " .. i)
return 1
else
- insert(names, stripext(i))
+ insert(names, jobname(i))
end
end
end
@@ -1701,7 +1701,7 @@
print("Checking source files")
for _,i in ipairs(cmdchkfiles) do
for _,j in ipairs(filelist(".", i)) do
- print(" " .. stripext(j))
+ print(" " .. jobname(j))
run(
testdir,
os_setenv .. " TEXINPUTS=." .. os_pathsep .. localdir
@@ -1711,7 +1711,7 @@
" \"\\PassOptionsToClass{check}{l3doc} \\input " .. j .. "\""
.. " > " .. os_null
)
- for line in lines(testdir .. "/" .. stripext(j) .. ".cmds") do
+ for line in lines(testdir .. "/" .. jobname(j) .. ".cmds") do
if match(line, "^%!") then
print(" - " .. match(line, "^%! (.*)"))
end
@@ -1870,7 +1870,7 @@
if files and next(files) then
typeset = false
for _,k in ipairs(files) do
- if k == stripext(j) then
+ if k == jobname(j) then
typeset = true
break
end
Deleted: trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3intarray.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3intarray.sty 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3intarray.sty 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,104 +0,0 @@
-%%
-%% This is file `l3intarray.sty',
-%% generated with the docstrip utility.
-%%
-%% The original source files were:
-%%
-%% l3intarray.dtx (with options: `package')
-%%
-%% Copyright (C) 2011-2017 The LaTeX3 Project
-%%
-%% It may be distributed and/or modified under the conditions of
-%% the LaTeX Project Public License (LPPL), either version 1.3c of
-%% this license or (at your option) any later version. The latest
-%% version of this license is in the file:
-%%
-%% http://www.latex-project.org/lppl.txt
-%%
-%% This file is part of the "l3experimental bundle" (The Work in LPPL)
-%% and all files in that bundle must be distributed together.
-%%
-%% File: l3intarray.dtx Copyright (C) 2017 The LaTeX3 Project
-\RequirePackage{expl3}[2017/05/13]
-\@ifpackagelater{expl3}{2017/05/13}
- {}
- {%
- \PackageError{l3intarray}{Support package l3kernel too old}
- {%
- Please install an up to date version of l3kernel\MessageBreak
- using your TeX package manager or from CTAN.\MessageBreak
- \MessageBreak
- Loading l3intarray will abort!%
- }%
- \endinput
- }
-\ProvidesExplPackage{l3intarray}{2017/05/13}{}
- {L3 Experimental low-level arrays of small integers}
-\int_new:N \g__intarray_font_int
-\cs_new_protected:Npn \__intarray_new:Nn #1#2
- {
- \__chk_if_free_cs:N #1
- \int_gincr:N \g__intarray_font_int
- \tex_global:D \tex_font:D #1 = cmr10~at~ \g__intarray_font_int sp \scan_stop:
- \tex_hyphenchar:D #1 = \int_eval:n {#2} \scan_stop:
- \int_compare:nNnT { \tex_hyphenchar:D #1 } > 0
- { \tex_fontdimen:D \tex_hyphenchar:D #1 #1 = 0 sp \scan_stop: }
- \int_step_inline:nnnn { 1 } { 1 } { 8 }
- { \tex_fontdimen:D ##1 #1 = 0 sp \scan_stop: }
- }
-\cs_new:Npn \__intarray_count:N #1 { \tex_the:D \tex_hyphenchar:D #1 }
-\cs_new_protected:Npn \__intarray_gset_fast:Nnn #1#2#3
- { \tex_fontdimen:D \int_eval:n {#2} #1 = \int_eval:n {#3} sp \scan_stop: }
-\cs_new_protected:Npn \__intarray_gset:Nnn #1#2#3
- {
- \exp_args:Nff \__intarray_gset_aux:Nnn #1
- { \int_eval:n {#2} } { \int_eval:n {#3} }
- }
-\cs_new_protected:Npn \__intarray_gset_aux:Nnn #1#2#3
- {
- \int_compare:nTF { 1 <= #2 <= \__intarray_count:N #1 }
- {
- \int_compare:nTF { - \c_max_dim <= \int_abs:n {#3} <= \c_max_dim }
- { \__intarray_gset_fast:Nnn #1 {#2} {#3} }
- {
- \__msg_kernel_error:nnxxxx { intarray } { overflow }
- { \token_to_str:N #1 } {#2} {#3}
- { \int_compare:nNnT {#3} < 0 { - } \__int_value:w \c_max_dim }
- \__intarray_gset_fast:Nnn #1 {#2}
- { \int_compare:nNnT {#3} < 0 { - } \c_max_dim }
- }
- }
- {
- \__msg_kernel_error:nnxxx { intarray } { out-of-bounds }
- { \token_to_str:N #1 } {#2} { \__intarray_count:N #1 }
- }
- }
-\cs_new:Npn \__intarray_item_fast:Nn #1#2
- { \__int_value:w \tex_fontdimen:D \int_eval:n {#2} #1 }
-\cs_new:Npn \__intarray_item:Nn #1#2
- { \exp_args:Nf \__intarray_item_aux:Nn #1 { \int_eval:n {#2} } }
-\cs_new:Npn \__intarray_item_aux:Nn #1#2
- {
- \int_compare:nTF { 1 <= #2 <= \__intarray_count:N #1 }
- { \__intarray_item_fast:Nn #1 {#2} }
- {
- \__msg_kernel_expandable_error:nnnnn { intarray } { out-of-bounds }
- { \token_to_str:N #1 } {#2} { \__intarray_count:N #1 }
- 0
- }
- }
-\__msg_kernel_new:nnnn { intarray } { overflow }
- { Integers~larger~than~2^{30}-1~cannot~be~stored~in~arrays. }
- {
- An~attempt~was~made~to~store~#3~at~position~#2~in~the~array~'#1'.~
- The~largest~allowed~value~#4~will~be~used~instead.
- }
-\__msg_kernel_new:nnnn { intarray } { out-of-bounds }
- { Access~to~an~entry~beyond~an~array's~bounds. }
- {
- An~attempt~was~made~to~access~or~store~data~at~position~#2~of~the~
- array~'#1',~but~this~array~has~entries~at~positions~from~1~to~#3.
- }
-%%
-%%
-%% End of file `l3intarray.sty'.
Deleted: trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3regex-trace.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3regex-trace.sty 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3regex-trace.sty 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,3043 +0,0 @@
-%%
-%% This is file `l3regex-trace.sty',
-%% generated with the docstrip utility.
-%%
-%% The original source files were:
-%%
-%% l3regex.dtx (with options: `package,trace')
-%%
-%% Copyright (C) 2011-2017 The LaTeX3 Project
-%%
-%% It may be distributed and/or modified under the conditions of
-%% the LaTeX Project Public License (LPPL), either version 1.3c of
-%% this license or (at your option) any later version. The latest
-%% version of this license is in the file:
-%%
-%% http://www.latex-project.org/lppl.txt
-%%
-%% This file is part of the "l3experimental bundle" (The Work in LPPL)
-%% and all files in that bundle must be distributed together.
-%%
-%% File: l3regex.dtx Copyright (C) 2011-2017 The LaTeX3 Project
-\RequirePackage{expl3}[2017/05/13]
-\@ifpackagelater{expl3}{2017/05/13}
- {}
- {%
- \PackageError{l3regex}{Support package l3kernel too old}
- {%
- Please install an up to date version of l3kernel\MessageBreak
- using your TeX package manager or from CTAN.\MessageBreak
- \MessageBreak
- Loading l3regex will abort!%
- }%
- \endinput
- }
-\ProvidesExplPackage{l3regex}{2017/05/13}{}
- {L3 Experimental regular expressions}
-\RequirePackage{l3tl-build, l3tl-analysis, l3intarray}
-\cs_generate_variant:Nn \tl_to_str:n { V }
-\cs_new_protected:Npn \__regex_standard_escapechar:
- { \int_set:Nn \tex_escapechar:D { `\\ } }
-\cs_new:Npn \__regex_toks_use:w { \tex_the:D \tex_toks:D }
-\cs_new_protected:Npn \__regex_toks_clear:N #1
- { \tex_toks:D #1 { } }
-\cs_new_eq:NN \__regex_toks_set:Nn \tex_toks:D
-\cs_new_protected:Npn \__regex_toks_set:No #1
- { \__regex_toks_set:Nn #1 \exp_after:wN }
-\cs_new_protected:Npn \__regex_toks_memcpy:NNn #1#2#3
- {
- \prg_replicate:nn {#3}
- {
- \tex_toks:D #1 = \tex_toks:D #2
- \int_incr:N #1
- \int_incr:N #2
- }
- }
-\cs_new_protected:Npn \__regex_toks_put_left:Nx #1#2
- {
- \cs_set:Npx \__regex_tmp:w { #2 }
- \tex_toks:D #1 \exp_after:wN \exp_after:wN \exp_after:wN
- { \exp_after:wN \__regex_tmp:w \tex_the:D \tex_toks:D #1 }
- }
-\cs_new_protected:Npn \__regex_toks_put_right:Nx #1#2
- {
- \cs_set:Npx \__regex_tmp:w {#2}
- \tex_toks:D #1 \exp_after:wN
- { \tex_the:D \tex_toks:D \exp_after:wN #1 \__regex_tmp:w }
- }
-\cs_new_protected:Npn \__regex_toks_put_right:Nn #1#2
- { \tex_toks:D #1 \exp_after:wN { \tex_the:D \tex_toks:D #1 #2 } }
-\cs_new:Npn \__regex_current_cs_to_str:
- {
- \exp_after:wN \exp_after:wN \exp_after:wN \cs_to_str:N
- \tex_the:D \tex_toks:D \l__regex_current_pos_int
- }
-\cs_new:Npn \__regex_tmp:w { }
-\tl_new:N \l__regex_internal_a_tl
-\tl_new:N \l__regex_internal_b_tl
-\int_new:N \l__regex_internal_a_int
-\int_new:N \l__regex_internal_b_int
-\int_new:N \l__regex_internal_c_int
-\bool_new:N \l__regex_internal_bool
-\seq_new:N \l__regex_internal_seq
-\tl_new:N \g__regex_internal_tl
-\tl_const:Nn \c__regex_no_match_regex
- {
- \__regex_branch:n
- { \__regex_class:NnnnN \c_true_bool { } { 1 } { 0 } \c_true_bool }
- }
-\__intarray_new:Nn \g__regex_charcode_intarray { 65536 }
-\__intarray_new:Nn \g__regex_catcode_intarray { 65536 }
-\__intarray_new:Nn \g__regex_balance_intarray { 65536 }
-\int_new:N \l__regex_balance_int
-\tl_new:N \l__regex_cs_name_tl
-\int_const:Nn \c__regex_ascii_min_int { 0 }
-\int_const:Nn \c__regex_ascii_max_control_int { 31 }
-\int_const:Nn \c__regex_ascii_max_int { 127 }
-\int_const:Nn \c__regex_ascii_lower_int { `a - `A }
-\cs_new_protected:Npn \__regex_break_true:w
- #1 \__regex_break_point:TF #2 #3 {#2}
-\cs_new_protected:Npn \__regex_break_point:TF #1 #2 { #2 }
-\cs_new_protected:Npn \__regex_item_reverse:n #1
- {
- #1
- \__regex_break_point:TF { } \__regex_break_true:w
- }
-\cs_new_protected:Npn \__regex_item_caseful_equal:n #1
- {
- \if_int_compare:w #1 = \l__regex_current_char_int
- \exp_after:wN \__regex_break_true:w
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_caseful_range:nn #1 #2
- {
- \reverse_if:N \if_int_compare:w #1 > \l__regex_current_char_int
- \reverse_if:N \if_int_compare:w #2 < \l__regex_current_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_caseless_equal:n #1
- {
- \if_int_compare:w #1 = \l__regex_current_char_int
- \exp_after:wN \__regex_break_true:w
- \fi:
- \if_int_compare:w \l__regex_case_changed_char_int = \c_max_int
- \__regex_compute_case_changed_char:
- \fi:
- \if_int_compare:w #1 = \l__regex_case_changed_char_int
- \exp_after:wN \__regex_break_true:w
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_caseless_range:nn #1 #2
- {
- \reverse_if:N \if_int_compare:w #1 > \l__regex_current_char_int
- \reverse_if:N \if_int_compare:w #2 < \l__regex_current_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
- \fi:
- \fi:
- \if_int_compare:w \l__regex_case_changed_char_int = \c_max_int
- \__regex_compute_case_changed_char:
- \fi:
- \reverse_if:N \if_int_compare:w #1 > \l__regex_case_changed_char_int
- \reverse_if:N \if_int_compare:w #2 < \l__regex_case_changed_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_compute_case_changed_char:
- {
- \int_set_eq:NN \l__regex_case_changed_char_int \l__regex_current_char_int
- \if_int_compare:w \l__regex_current_char_int > `Z \exp_stop_f:
- \if_int_compare:w \l__regex_current_char_int > `z \exp_stop_f: \else:
- \if_int_compare:w \l__regex_current_char_int < `a \exp_stop_f: \else:
- \int_sub:Nn \l__regex_case_changed_char_int { \c__regex_ascii_lower_int }
- \fi:
- \fi:
- \else:
- \if_int_compare:w \l__regex_current_char_int < `A \exp_stop_f: \else:
- \int_add:Nn \l__regex_case_changed_char_int { \c__regex_ascii_lower_int }
- \fi:
- \fi:
- }
-\cs_new_eq:NN \__regex_item_equal:n ?
-\cs_new_eq:NN \__regex_item_range:nn ?
-\cs_new_protected:Npn \__regex_item_catcode:
- {
- "
- \if_case:w \l__regex_current_catcode_int
- 1 \or: 4 \or: 10 \or: 40
- \or: 100 \or: \or: 1000 \or: 4000
- \or: 10000 \or: \or: 100000 \or: 400000
- \or: 1000000 \or: 4000000 \else: 1*0
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_catcode:nT #1
- {
- \if_int_odd:w \__int_eval:w #1 / \__regex_item_catcode: \__int_eval_end:
- \exp_after:wN \use:n
- \else:
- \exp_after:wN \use_none:n
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_catcode_reverse:nT #1#2
- { \__regex_item_catcode:nT {#1} { \__regex_item_reverse:n {#2} } }
-\cs_new_protected:Npn \__regex_item_exact:nn #1#2
- {
- \if_int_compare:w #1 = \l__regex_current_catcode_int
- \if_int_compare:w #2 = \l__regex_current_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_exact_cs:n #1
- {
- \int_compare:nNnTF \l__regex_current_catcode_int = 0
- {
- \tl_set:Nx \l__regex_internal_a_tl
- { \scan_stop: \__regex_current_cs_to_str: \scan_stop: }
- \tl_if_in:noTF { \scan_stop: #1 \scan_stop: } \l__regex_internal_a_tl
- { \__regex_break_true:w } { }
- }
- { }
- }
-\cs_new_protected:Npn \__regex_item_cs:n #1
- {
- \int_compare:nNnT \l__regex_current_catcode_int = 0
- {
- \group_begin:
- \tl_set:Nx \l__regex_cs_name_tl { \__regex_current_cs_to_str: }
- \__regex_single_match:
- \__regex_disable_submatches:
- \__regex_build_for_cs:n {#1}
- \bool_set_eq:NN \l__regex_saved_success_bool \g__regex_success_bool
- \exp_args:NV \__regex_match:n \l__regex_cs_name_tl
- \if_meaning:w \c_true_bool \g__regex_success_bool
- \group_insert_after:N \__regex_break_true:w
- \fi:
- \bool_gset_eq:NN \g__regex_success_bool \l__regex_saved_success_bool
- \group_end:
- }
- }
-\cs_new_protected:Npn \__regex_prop_d:
- { \__regex_item_caseful_range:nn { `0 } { `9 } }
-\cs_new_protected:Npn \__regex_prop_h:
- {
- \__regex_item_caseful_equal:n { `\ }
- \__regex_item_caseful_equal:n { `\^^I }
- }
-\cs_new_protected:Npn \__regex_prop_s:
- {
- \__regex_item_caseful_equal:n { `\ }
- \__regex_item_caseful_equal:n { `\^^I }
- \__regex_item_caseful_equal:n { `\^^J }
- \__regex_item_caseful_equal:n { `\^^L }
- \__regex_item_caseful_equal:n { `\^^M }
- }
-\cs_new_protected:Npn \__regex_prop_v:
- { \__regex_item_caseful_range:nn { `\^^J } { `\^^M } } % lf, vtab, ff, cr
-\cs_new_protected:Npn \__regex_prop_w:
- {
- \__regex_item_caseful_range:nn { `a } { `z }
- \__regex_item_caseful_range:nn { `A } { `Z }
- \__regex_item_caseful_range:nn { `0 } { `9 }
- \__regex_item_caseful_equal:n { `_ }
- }
-\cs_new_protected:Npn \__regex_prop_N:
- {
- \__regex_item_reverse:n
- { \__regex_item_caseful_equal:n { `\^^J } }
- }
-\cs_new_protected:Npn \__regex_posix_alnum:
- { \__regex_posix_alpha: \__regex_posix_digit: }
-\cs_new_protected:Npn \__regex_posix_alpha:
- { \__regex_posix_lower: \__regex_posix_upper: }
-\cs_new_protected:Npn \__regex_posix_ascii:
- {
- \__regex_item_caseful_range:nn
- \c__regex_ascii_min_int
- \c__regex_ascii_max_int
- }
-\cs_new_eq:NN \__regex_posix_blank: \__regex_prop_h:
-\cs_new_protected:Npn \__regex_posix_cntrl:
- {
- \__regex_item_caseful_range:nn
- \c__regex_ascii_min_int
- \c__regex_ascii_max_control_int
- \__regex_item_caseful_equal:n \c__regex_ascii_max_int
- }
-\cs_new_eq:NN \__regex_posix_digit: \__regex_prop_d:
-\cs_new_protected:Npn \__regex_posix_graph:
- { \__regex_item_caseful_range:nn { `! } { `\~ } }
-\cs_new_protected:Npn \__regex_posix_lower:
- { \__regex_item_caseful_range:nn { `a } { `z } }
-\cs_new_protected:Npn \__regex_posix_print:
- { \__regex_item_caseful_range:nn { `\ } { `\~ } }
-\cs_new_protected:Npn \__regex_posix_punct:
- {
- \__regex_item_caseful_range:nn { `! } { `/ }
- \__regex_item_caseful_range:nn { `: } { `@ }
- \__regex_item_caseful_range:nn { `[ } { `` }
- \__regex_item_caseful_range:nn { `\{ } { `\~ }
- }
-\cs_new_protected:Npn \__regex_posix_space:
- {
- \__regex_item_caseful_equal:n { `\ }
- \__regex_item_caseful_range:nn { `\^^I } { `\^^M }
- }
-\cs_new_protected:Npn \__regex_posix_upper:
- { \__regex_item_caseful_range:nn { `A } { `Z } }
-\cs_new_eq:NN \__regex_posix_word: \__regex_prop_w:
-\cs_new_protected:Npn \__regex_posix_xdigit:
- {
- \__regex_posix_digit:
- \__regex_item_caseful_range:nn { `A } { `F }
- \__regex_item_caseful_range:nn { `a } { `f }
- }
-\cs_new_protected:Npn \__regex_escape_use:nnnn #1#2#3#4
- {
- \trace_push:nnn { regex } { 1 } { __regex_escape_use:nnnn }
- \__tl_build:Nw \l__regex_internal_a_tl
- \cs_set:Npn \__regex_escape_unescaped:N ##1 { #1 }
- \cs_set:Npn \__regex_escape_escaped:N ##1 { #2 }
- \cs_set:Npn \__regex_escape_raw:N ##1 { #3 }
- \__regex_standard_escapechar:
- \tl_gset:Nx \g__regex_internal_tl { \__str_to_other_fast:n {#4} }
- \tl_set:Nx \l__regex_internal_b_tl
- {
- \exp_after:wN \__regex_escape_loop:N \g__regex_internal_tl
- { break } \__prg_break_point:
- }
- \__tl_build_one:o \l__regex_internal_b_tl
- \__tl_build_end:
- \trace_pop:nnn { regex } { 1 } { __regex_escape_use:nnnn }
- \l__regex_internal_a_tl
- }
-\cs_new:Npn \__regex_escape_loop:N #1
- {
- \cs_if_exist_use:cF { __regex_escape_\token_to_str:N #1:w }
- { \__regex_escape_unescaped:N #1 }
- \__regex_escape_loop:N
- }
-\cs_new:cpn { __regex_escape_ \c_backslash_str :w }
- \__regex_escape_loop:N #1
- {
- \cs_if_exist_use:cF { __regex_escape_/\token_to_str:N #1:w }
- { \__regex_escape_escaped:N #1 }
- \__regex_escape_loop:N
- }
-\cs_new_eq:NN \__regex_escape_unescaped:N ?
-\cs_new_eq:NN \__regex_escape_escaped:N ?
-\cs_new_eq:NN \__regex_escape_raw:N ?
-\cs_new_eq:NN \__regex_escape_break:w \__prg_break:
-\cs_new:cpn { __regex_escape_/break:w }
- {
- \if_false: { \fi: }
- \__msg_kernel_error:nn { regex } { trailing-backslash }
- \exp_after:wN \use_none:n \exp_after:wN { \if_false: } \fi:
- }
-\cs_new:cpn { __regex_escape_~:w } { }
-\cs_new:cpx { __regex_escape_/a:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^G }
-\cs_new:cpx { __regex_escape_/t:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^I }
-\cs_new:cpx { __regex_escape_/n:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^J }
-\cs_new:cpx { __regex_escape_/f:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^L }
-\cs_new:cpx { __regex_escape_/r:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^M }
-\cs_new:cpx { __regex_escape_/e:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^[ }
-\cs_new:cpn { __regex_escape_/x:w } \__regex_escape_loop:N
- {
- \exp_after:wN \__regex_escape_x_end:w
- \__int_value:w "0 \__regex_escape_x_test:N
- }
-\cs_new:Npn \__regex_escape_x_end:w #1 ;
- {
- \int_compare:nNnTF {#1} > \c_max_char_int
- {
- \if_false: { \fi: }
- \__tl_build_one:o \l__regex_internal_b_tl
- \__msg_kernel_error:nnx { regex } { x-overflow } {#1}
- \tl_set:Nx \l__regex_internal_b_tl
- { \if_false: } \fi:
- }
- {
- \exp_last_unbraced:Nf \__regex_escape_raw:N
- { \char_generate:nn {#1} { 12 } }
- }
- }
-\cs_new:Npn \__regex_escape_x_test:N #1
- {
- \str_if_eq_x:nnTF {#1} { break } { ; }
- {
- \if_charcode:w \c_space_token #1
- \exp_after:wN \__regex_escape_x_test:N
- \else:
- \exp_after:wN \__regex_escape_x_testii:N
- \exp_after:wN #1
- \fi:
- }
- }
-\cs_new:Npn \__regex_escape_x_testii:N #1
- {
- \if_charcode:w \c_left_brace_str #1
- \exp_after:wN \__regex_escape_x_loop:N
- \else:
- \__regex_hexadecimal_use:NTF #1
- { \exp_after:wN \__regex_escape_x:N }
- { ; \exp_after:wN \__regex_escape_loop:N \exp_after:wN #1 }
- \fi:
- }
-\cs_new:Npn \__regex_escape_x:N #1
- {
- \str_if_eq_x:nnTF {#1} { break } { ; }
- {
- \__regex_hexadecimal_use:NTF #1
- { ; \__regex_escape_loop:N }
- { ; \__regex_escape_loop:N #1 }
- }
- }
-\cs_new:Npn \__regex_escape_x_loop:N #1
- {
- \str_if_eq_x:nnTF {#1} { break }
- { ; \__regex_escape_x_loop_error:n { } {#1} }
- {
- \__regex_hexadecimal_use:NTF #1
- { \__regex_escape_x_loop:N }
- {
- \token_if_eq_charcode:NNTF \c_space_token #1
- { \__regex_escape_x_loop:N }
- {
- ;
- \exp_after:wN
- \token_if_eq_charcode:NNTF \c_right_brace_str #1
- { \__regex_escape_loop:N }
- { \__regex_escape_x_loop_error:n {#1} }
- }
- }
- }
- }
-\cs_new:Npn \__regex_escape_x_loop_error:n #1
- {
- \if_false: { \fi: }
- \__tl_build_one:o \l__regex_internal_b_tl
- \__msg_kernel_error:nnx { regex } { x-missing-rbrace } {#1}
- \tl_set:Nx \l__regex_internal_b_tl
- { \if_false: } \fi: \__regex_escape_loop:N #1
- }
-\prg_new_conditional:Npnn \__regex_hexadecimal_use:N #1 { TF }
- {
- \if_int_compare:w 1 < "1 \token_to_str:N #1 \exp_stop_f:
- #1 \prg_return_true:
- \else:
- \if_case:w \__int_eval:w
- \exp_after:wN ` \token_to_str:N #1 - `a
- \__int_eval_end:
- A
- \or: B
- \or: C
- \or: D
- \or: E
- \or: F
- \else:
- \prg_return_false:
- \exp_after:wN \use_none:n
- \fi:
- \prg_return_true:
- \fi:
- }
-\prg_new_conditional:Npnn \__regex_char_if_special:N #1 { TF }
- {
- \if_int_compare:w `#1 > `Z \exp_stop_f:
- \if_int_compare:w `#1 > `z \exp_stop_f:
- \if_int_compare:w `#1 < \c__regex_ascii_max_int
- \prg_return_true: \else: \prg_return_false: \fi:
- \else:
- \if_int_compare:w `#1 < `a \exp_stop_f:
- \prg_return_true: \else: \prg_return_false: \fi:
- \fi:
- \else:
- \if_int_compare:w `#1 > `9 \exp_stop_f:
- \if_int_compare:w `#1 < `A \exp_stop_f:
- \prg_return_true: \else: \prg_return_false: \fi:
- \else:
- \if_int_compare:w `#1 < `0 \exp_stop_f:
- \if_int_compare:w `#1 < `\ \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \else: \prg_return_false: \fi:
- \fi:
- \fi:
- }
-\prg_new_conditional:Npnn \__regex_char_if_alphanumeric:N #1 { TF }
- {
- \if_int_compare:w `#1 > `Z \exp_stop_f:
- \if_int_compare:w `#1 > `z \exp_stop_f:
- \prg_return_false:
- \else:
- \if_int_compare:w `#1 < `a \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \fi:
- \else:
- \if_int_compare:w `#1 > `9 \exp_stop_f:
- \if_int_compare:w `#1 < `A \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \else:
- \if_int_compare:w `#1 < `0 \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \fi:
- \fi:
- }
-\int_new:N \l__regex_group_level_int
-\int_new:N \l__regex_mode_int
-\int_const:Nn \c__regex_cs_in_class_mode_int { -6 }
-\int_const:Nn \c__regex_cs_mode_int { -2 }
-\int_const:Nn \c__regex_outer_mode_int { 0 }
-\int_const:Nn \c__regex_catcode_mode_int { 2 }
-\int_const:Nn \c__regex_class_mode_int { 3 }
-\int_const:Nn \c__regex_catcode_in_class_mode_int { 6 }
-\int_new:N \l__regex_catcodes_int
-\int_new:N \l__regex_default_catcodes_int
-\bool_new:N \l__regex_catcodes_bool
-\int_const:Nn \c__regex_catcode_C_int { "1 }
-\int_const:Nn \c__regex_catcode_B_int { "4 }
-\int_const:Nn \c__regex_catcode_E_int { "10 }
-\int_const:Nn \c__regex_catcode_M_int { "40 }
-\int_const:Nn \c__regex_catcode_T_int { "100 }
-\int_const:Nn \c__regex_catcode_P_int { "1000 }
-\int_const:Nn \c__regex_catcode_U_int { "4000 }
-\int_const:Nn \c__regex_catcode_D_int { "10000 }
-\int_const:Nn \c__regex_catcode_S_int { "100000 }
-\int_const:Nn \c__regex_catcode_L_int { "400000 }
-\int_const:Nn \c__regex_catcode_O_int { "1000000 }
-\int_const:Nn \c__regex_catcode_A_int { "4000000 }
-\int_const:Nn \c__regex_all_catcodes_int { "5515155 }
-\cs_new_eq:NN \l__regex_internal_regex \c__regex_no_match_regex
-\seq_new:N \l__regex_show_prefix_seq
-\int_new:N \l__regex_show_lines_int
-\cs_new_protected:Npn \__regex_get_digits:NTFw #1#2#3#4#5
- {
- \__regex_if_raw_digit:NNTF #4 #5
- { #1 = #5 \__regex_get_digits_loop:nw {#2} }
- { #3 #4 #5 }
- }
-\cs_new:Npn \__regex_get_digits_loop:nw #1#2#3
- {
- \__regex_if_raw_digit:NNTF #2 #3
- { #3 \__regex_get_digits_loop:nw {#1} }
- { \scan_stop: #1 #2 #3 }
- }
-\prg_new_conditional:Npnn \__regex_if_raw_digit:NN #1#2 { TF }
- {
- \if_meaning:w \__regex_compile_raw:N #1
- \if_int_compare:w 1 < 1 #2 \exp_stop_f:
- \prg_return_true:
- \else:
- \prg_return_false:
- \fi:
- \else:
- \prg_return_false:
- \fi:
- }
-\cs_new:Npn \__regex_if_in_class:TF
- {
- \if_int_odd:w \l__regex_mode_int
- \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \use_ii:nn
- \fi:
- }
-\cs_new:Npn \__regex_if_in_cs:TF
- {
- \if_int_odd:w \l__regex_mode_int
- \exp_after:wN \use_ii:nn
- \else:
- \if_int_compare:w \l__regex_mode_int < \c__regex_outer_mode_int
- \exp_after:wN \exp_after:wN \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \exp_after:wN \exp_after:wN \use_ii:nn
- \fi:
- \fi:
- }
-\cs_new:Npn \__regex_if_in_class_or_catcode:TF
- {
- \if_int_odd:w \l__regex_mode_int
- \exp_after:wN \use_i:nn
- \else:
- \if_int_compare:w \l__regex_mode_int > \c__regex_outer_mode_int
- \exp_after:wN \exp_after:wN \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \exp_after:wN \exp_after:wN \use_ii:nn
- \fi:
- \fi:
- }
-\cs_new:Npn \__regex_if_within_catcode:TF
- {
- \if_int_compare:w \l__regex_mode_int > \c__regex_outer_mode_int
- \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \use_ii:nn
- \fi:
- }
-\cs_new_protected:Npn \__regex_chk_c_allowed:T
- {
- \if_int_compare:w \l__regex_mode_int = \c__regex_outer_mode_int
- \exp_after:wN \use:n
- \else:
- \if_int_compare:w \l__regex_mode_int = \c__regex_class_mode_int
- \exp_after:wN \exp_after:wN \exp_after:wN \use:n
- \else:
- \__msg_kernel_error:nn { regex } { c-bad-mode }
- \exp_after:wN \exp_after:wN \exp_after:wN \use_none:n
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_mode_quit_c:
- {
- \if_int_compare:w \l__regex_mode_int = \c__regex_catcode_mode_int
- \int_set_eq:NN \l__regex_mode_int \c__regex_outer_mode_int
- \else:
- \if_int_compare:w \l__regex_mode_int = \c__regex_catcode_in_class_mode_int
- \int_set_eq:NN \l__regex_mode_int \c__regex_class_mode_int
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_compile:w
- {
- \__tl_build_x:Nw \l__regex_internal_regex
- \int_zero:N \l__regex_group_level_int
- \int_set_eq:NN \l__regex_default_catcodes_int \c__regex_all_catcodes_int
- \int_set_eq:NN \l__regex_catcodes_int \l__regex_default_catcodes_int
- \cs_set:Npn \__regex_item_equal:n { \__regex_item_caseful_equal:n }
- \cs_set:Npn \__regex_item_range:nn { \__regex_item_caseful_range:nn }
- \__tl_build_one:n { \__regex_branch:n { \if_false: } \fi: }
- }
-\cs_new_protected:Npn \__regex_compile_end:
- {
- \__regex_if_in_class:TF
- {
- \__msg_kernel_error:nn { regex } { missing-rbrack }
- \use:c { __regex_compile_]: }
- \prg_do_nothing: \prg_do_nothing:
- }
- { }
- \if_int_compare:w \l__regex_group_level_int > 0 \exp_stop_f:
- \__msg_kernel_error:nnx { regex } { missing-rparen }
- { \int_use:N \l__regex_group_level_int }
- \prg_replicate:nn
- { \l__regex_group_level_int }
- {
- \__tl_build_one:n
- {
- \if_false: { \fi: }
- \if_false: { \fi: } { 1 } { 0 } \c_true_bool
- }
- \__tl_build_end:
- \__tl_build_one:o \l__regex_internal_regex
- }
- \fi:
- \__tl_build_one:n { \if_false: { \fi: } }
- \__tl_build_end:
- }
-\cs_new_protected:Npn \__regex_compile:n #1
- {
- \__regex_compile:w
- \__regex_standard_escapechar:
- \int_set_eq:NN \l__regex_mode_int \c__regex_outer_mode_int
- \__regex_escape_use:nnnn
- {
- \__regex_char_if_special:NTF ##1
- \__regex_compile_special:N \__regex_compile_raw:N ##1
- }
- {
- \__regex_char_if_alphanumeric:NTF ##1
- \__regex_compile_escaped:N \__regex_compile_raw:N ##1
- }
- { \__regex_compile_raw:N ##1 }
- { #1 }
- \prg_do_nothing: \prg_do_nothing:
- \prg_do_nothing: \prg_do_nothing:
- \int_compare:nNnT \l__regex_mode_int = \c__regex_catcode_mode_int
- { \__msg_kernel_error:nn { regex } { c-trailing } }
- \int_compare:nNnT \l__regex_mode_int < \c__regex_outer_mode_int
- {
- \__msg_kernel_error:nn { regex } { c-missing-rbrace }
- \__regex_compile_end_cs:
- \prg_do_nothing: \prg_do_nothing:
- \prg_do_nothing: \prg_do_nothing:
- }
- \__regex_compile_end:
- }
-\cs_new_protected:Npn \__regex_compile_special:N #1
- {
- \cs_if_exist_use:cF { __regex_compile_#1: }
- { \__regex_compile_raw:N #1 }
- }
-\cs_new_protected:Npn \__regex_compile_escaped:N #1
- {
- \cs_if_exist_use:cF { __regex_compile_/#1: }
- { \__regex_compile_raw:N #1 }
- }
-\cs_new_protected:Npn \__regex_compile_one:x #1
- {
- \__regex_mode_quit_c:
- \__regex_if_in_class:TF { }
- {
- \__tl_build_one:n
- { \__regex_class:NnnnN \c_true_bool { \if_false: } \fi: }
- }
- \__tl_build_one:x
- {
- \if_int_compare:w \l__regex_catcodes_int < \c__regex_all_catcodes_int
- \__regex_item_catcode:nT { \int_use:N \l__regex_catcodes_int }
- { \exp_not:N \exp_not:n {#1} }
- \else:
- \exp_not:N \exp_not:n {#1}
- \fi:
- }
- \int_set_eq:NN \l__regex_catcodes_int \l__regex_default_catcodes_int
- \__regex_if_in_class:TF { } { \__regex_compile_quantifier:w }
- }
-\cs_new_protected:Npn \__regex_compile_abort_tokens:n #1
- {
- \use:x
- {
- \exp_args:No \tl_map_function:nN { \tl_to_str:n {#1} }
- \__regex_compile_raw:N
- }
- }
-\cs_generate_variant:Nn \__regex_compile_abort_tokens:n { x }
-\cs_new_protected:Npn \__regex_compile_quantifier:w #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_compile_special:N
- {
- \cs_if_exist_use:cF { __regex_compile_quantifier_#2:w }
- { \__regex_compile_quantifier_none: #1 #2 }
- }
- { \__regex_compile_quantifier_none: #1 #2 }
- }
-\cs_new_protected:Npn \__regex_compile_quantifier_none:
- { \__tl_build_one:n { \if_false: { \fi: } { 1 } { 0 } \c_false_bool } }
-\cs_new_protected:Npn \__regex_compile_quantifier_abort:xNN #1#2#3
- {
- \__regex_compile_quantifier_none:
- \__msg_kernel_warning:nnxx { regex } { invalid-quantifier } {#1} {#3}
- \__regex_compile_abort_tokens:x {#1}
- #2 #3
- }
-\cs_new_protected:Npn \__regex_compile_quantifier_lazyness:nnNN #1#2#3#4
- {
- \str_if_eq:nnTF { #3 #4 } { \__regex_compile_special:N ? }
- { \__tl_build_one:n { \if_false: { \fi: } { #1 } { #2 } \c_true_bool } }
- {
- \__tl_build_one:n { \if_false: { \fi: } { #1 } { #2 } \c_false_bool }
- #3 #4
- }
- }
-\cs_new_protected:cpn { __regex_compile_quantifier_?:w }
- { \__regex_compile_quantifier_lazyness:nnNN { 0 } { 1 } }
-\cs_new_protected:cpn { __regex_compile_quantifier_*:w }
- { \__regex_compile_quantifier_lazyness:nnNN { 0 } { -1 } }
-\cs_new_protected:cpn { __regex_compile_quantifier_+:w }
- { \__regex_compile_quantifier_lazyness:nnNN { 1 } { -1 } }
-\cs_new_protected:cpn { __regex_compile_quantifier_ \c_left_brace_str :w }
- {
- \__regex_get_digits:NTFw \l__regex_internal_a_int
- { \__regex_compile_quantifier_braced_auxi:w }
- { \__regex_compile_quantifier_abort:xNN { \c_left_brace_str } }
- }
-\cs_new_protected:Npn \__regex_compile_quantifier_braced_auxi:w #1#2
- {
- \str_case_x:nnF { #1 #2 }
- {
- { \__regex_compile_special:N \c_right_brace_str }
- {
- \exp_args:No \__regex_compile_quantifier_lazyness:nnNN
- { \int_use:N \l__regex_internal_a_int } { 0 }
- }
- { \__regex_compile_special:N , }
- {
- \__regex_get_digits:NTFw \l__regex_internal_b_int
- { \__regex_compile_quantifier_braced_auxiii:w }
- { \__regex_compile_quantifier_braced_auxii:w }
- }
- }
- {
- \__regex_compile_quantifier_abort:xNN
- { \c_left_brace_str \int_use:N \l__regex_internal_a_int }
- #1 #2
- }
- }
-\cs_new_protected:Npn \__regex_compile_quantifier_braced_auxii:w #1#2
- {
- \str_if_eq_x:nnTF
- { #1 #2 } { \__regex_compile_special:N \c_right_brace_str }
- {
- \exp_args:No \__regex_compile_quantifier_lazyness:nnNN
- { \int_use:N \l__regex_internal_a_int } { -1 }
- }
- {
- \__regex_compile_quantifier_abort:xNN
- { \c_left_brace_str \int_use:N \l__regex_internal_a_int , }
- #1 #2
- }
- }
-\cs_new_protected:Npn \__regex_compile_quantifier_braced_auxiii:w #1#2
- {
- \str_if_eq_x:nnTF
- { #1 #2 } { \__regex_compile_special:N \c_right_brace_str }
- {
- \if_int_compare:w \l__regex_internal_a_int > \l__regex_internal_b_int
- \__msg_kernel_error:nnxx { regex } { backwards-quantifier }
- { \int_use:N \l__regex_internal_a_int }
- { \int_use:N \l__regex_internal_b_int }
- \int_zero:N \l__regex_internal_b_int
- \else:
- \int_sub:Nn \l__regex_internal_b_int \l__regex_internal_a_int
- \fi:
- \exp_args:Noo \__regex_compile_quantifier_lazyness:nnNN
- { \int_use:N \l__regex_internal_a_int }
- { \int_use:N \l__regex_internal_b_int }
- }
- {
- \__regex_compile_quantifier_abort:xNN
- {
- \c_left_brace_str
- \int_use:N \l__regex_internal_a_int ,
- \int_use:N \l__regex_internal_b_int
- }
- #1 #2
- }
- }
-\cs_new_protected:Npn \__regex_compile_raw_error:N #1
- {
- \__msg_kernel_error:nnx { regex } { bad-escape } {#1}
- \__regex_compile_raw:N #1
- }
-\cs_new_protected:Npn \__regex_compile_raw:N #1#2#3
- {
- \__regex_if_in_class:TF
- {
- \str_if_eq:nnTF {#2#3} { \__regex_compile_special:N - }
- { \__regex_compile_range:Nw #1 }
- {
- \__regex_compile_one:x
- { \__regex_item_equal:n { \__int_value:w `#1 ~ } }
- #2 #3
- }
- }
- {
- \__regex_compile_one:x
- { \__regex_item_equal:n { \__int_value:w `#1 ~ } }
- #2 #3
- }
- }
-\prg_new_protected_conditional:Npnn \__regex_if_end_range:NN #1#2 { TF }
- {
- \if_meaning:w \__regex_compile_raw:N #1
- \prg_return_true:
- \else:
- \if_meaning:w \__regex_compile_special:N #1
- \if_charcode:w ] #2
- \prg_return_false:
- \else:
- \prg_return_true:
- \fi:
- \else:
- \prg_return_false:
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_compile_range:Nw #1#2#3
- {
- \__regex_if_end_range:NNTF #2 #3
- {
- \if_int_compare:w `#1 > `#3 \exp_stop_f:
- \__msg_kernel_error:nnxx { regex } { range-backwards } {#1} {#3}
- \else:
- \__tl_build_one:x
- {
- \if_int_compare:w `#1 = `#3 \exp_stop_f:
- \__regex_item_equal:n
- \else:
- \__regex_item_range:nn { \__int_value:w `#1 ~ }
- \fi:
- { \__int_value:w `#3 ~ }
- }
- \fi:
- }
- {
- \__msg_kernel_warning:nnxx { regex } { range-missing-end }
- {#1} { \c_backslash_str #3 }
- \__tl_build_one:x
- {
- \__regex_item_equal:n { \__int_value:w `#1 ~ }
- \__regex_item_equal:n { \__int_value:w `- ~ }
- }
- #2#3
- }
- }
-\cs_new_protected:cpx { __regex_compile_.: }
- {
- \exp_not:N \__regex_if_in_class:TF
- { \__regex_compile_raw:N . }
- { \__regex_compile_one:x \exp_not:c { __regex_prop_.: } }
- }
-\cs_new_protected:cpn { __regex_prop_.: }
- {
- \if_int_compare:w \l__regex_current_char_int > - 2 \exp_stop_f:
- \exp_after:wN \__regex_break_true:w
- \fi:
- }
-\cs_set_protected:Npn \__regex_tmp:w #1#2
- {
- \cs_new_protected:cpx { __regex_compile_/#1: }
- { \__regex_compile_one:x \exp_not:c { __regex_prop_#1: } }
- \cs_new_protected:cpx { __regex_compile_/#2: }
- {
- \__regex_compile_one:x
- { \__regex_item_reverse:n \exp_not:c { __regex_prop_#1: } }
- }
- }
-\__regex_tmp:w d D
-\__regex_tmp:w h H
-\__regex_tmp:w s S
-\__regex_tmp:w v V
-\__regex_tmp:w w W
-\cs_new_protected:cpn { __regex_compile_/N: }
- { \__regex_compile_one:x \__regex_prop_N: }
-\cs_new_protected:Npn \__regex_compile_anchor:NF #1#2
- {
- \__regex_if_in_class_or_catcode:TF {#2}
- {
- \__tl_build_one:n
- { \__regex_assertion:Nn \c_true_bool { \__regex_anchor:N #1 } }
- }
- }
-\cs_set_protected:Npn \__regex_tmp:w #1#2
- {
- \cs_new_protected:cpn { __regex_compile_/#1: }
- { \__regex_compile_anchor:NF #2 { \__regex_compile_raw_error:N #1 } }
- }
-\__regex_tmp:w A \l__regex_min_pos_int
-\__regex_tmp:w G \l__regex_start_pos_int
-\__regex_tmp:w Z \l__regex_max_pos_int
-\__regex_tmp:w z \l__regex_max_pos_int
-\cs_set_protected:Npn \__regex_tmp:w #1#2
- {
- \cs_new_protected:cpn { __regex_compile_#1: }
- { \__regex_compile_anchor:NF #2 { \__regex_compile_raw:N #1 } }
- }
-\exp_args:Nx \__regex_tmp:w { \iow_char:N \^ } \l__regex_min_pos_int
-\exp_args:Nx \__regex_tmp:w { \iow_char:N \$ } \l__regex_max_pos_int
-\cs_new_protected:cpn { __regex_compile_/b: }
- {
- \__regex_if_in_class_or_catcode:TF
- { \__regex_compile_raw_error:N b }
- {
- \__tl_build_one:n
- { \__regex_assertion:Nn \c_true_bool { \__regex_b_test: } }
- }
- }
-\cs_new_protected:cpn { __regex_compile_/B: }
- {
- \__regex_if_in_class_or_catcode:TF
- { \__regex_compile_raw_error:N B }
- {
- \__tl_build_one:n
- { \__regex_assertion:Nn \c_false_bool { \__regex_b_test: } }
- }
- }
-\cs_new_protected:cpn { __regex_compile_]: }
- {
- \__regex_if_in_class:TF
- {
- \if_int_compare:w \l__regex_mode_int > \c__regex_catcode_in_class_mode_int
- \__tl_build_one:n { \if_false: { \fi: } }
- \fi:
- \tex_advance:D \l__regex_mode_int - 15 \exp_stop_f:
- \tex_divide:D \l__regex_mode_int 13 \exp_stop_f:
- \if_int_odd:w \l__regex_mode_int \else:
- \exp_after:wN \__regex_compile_quantifier:w
- \fi:
- }
- { \__regex_compile_raw:N ] }
- }
-\cs_new_protected:cpn { __regex_compile_[: }
- {
- \__regex_if_in_class:TF
- { \__regex_compile_class_posix_test:w }
- {
- \__regex_if_within_catcode:TF
- {
- \exp_after:wN \__regex_compile_class_catcode:w
- \int_use:N \l__regex_catcodes_int ;
- }
- { \__regex_compile_class_normal:w }
- }
- }
-\cs_new_protected:Npn \__regex_compile_class_normal:w
- {
- \__regex_compile_class:TFNN
- { \__regex_class:NnnnN \c_true_bool }
- { \__regex_class:NnnnN \c_false_bool }
- }
-\cs_new_protected:Npn \__regex_compile_class_catcode:w #1;
- {
- \if_int_compare:w \l__regex_mode_int = \c__regex_catcode_mode_int
- \__tl_build_one:n
- { \__regex_class:NnnnN \c_true_bool { \if_false: } \fi: }
- \fi:
- \int_set_eq:NN \l__regex_catcodes_int \l__regex_default_catcodes_int
- \__regex_compile_class:TFNN
- { \__regex_item_catcode:nT {#1} }
- { \__regex_item_catcode_reverse:nT {#1} }
- }
-\cs_new_protected:Npn \__regex_compile_class:TFNN #1#2#3#4
- {
- \l__regex_mode_int = \__int_value:w \l__regex_mode_int 3 \exp_stop_f:
- \str_if_eq:nnTF { #3 #4 } { \__regex_compile_special:N ^ }
- {
- \__tl_build_one:n { #2 { \if_false: } \fi: }
- \__regex_compile_class:NN
- }
- {
- \__tl_build_one:n { #1 { \if_false: } \fi: }
- \__regex_compile_class:NN #3 #4
- }
- }
-\cs_new_protected:Npn \__regex_compile_class:NN #1#2
- {
- \token_if_eq_charcode:NNTF #2 ]
- { \__regex_compile_raw:N #2 }
- { #1 #2 }
- }
-\cs_new_protected:Npn \__regex_compile_class_posix_test:w #1#2
- {
- \token_if_eq_meaning:NNT \__regex_compile_special:N #1
- {
- \str_case:nn { #2 }
- {
- : { \__regex_compile_class_posix:NNNNw }
- = { \__msg_kernel_warning:nnx { regex } { posix-unsupported } { = } }
- . { \__msg_kernel_warning:nnx { regex } { posix-unsupported } { . } }
- }
- }
- \__regex_compile_raw:N [ #1 #2
- }
-\cs_new_protected:Npn \__regex_compile_class_posix:NNNNw #1#2#3#4#5#6
- {
- \str_if_eq:nnTF { #5 #6 } { \__regex_compile_special:N ^ }
- {
- \bool_set_false:N \l__regex_internal_bool
- \tl_set:Nx \l__regex_internal_a_tl { \if_false: } \fi:
- \__regex_compile_class_posix_loop:w
- }
- {
- \bool_set_true:N \l__regex_internal_bool
- \tl_set:Nx \l__regex_internal_a_tl { \if_false: } \fi:
- \__regex_compile_class_posix_loop:w #5 #6
- }
- }
-\cs_new:Npn \__regex_compile_class_posix_loop:w #1#2
- {
- \token_if_eq_meaning:NNTF \__regex_compile_raw:N #1
- { #2 \__regex_compile_class_posix_loop:w }
- { \if_false: { \fi: } \__regex_compile_class_posix_end:w #1 #2 }
- }
-\cs_new_protected:Npn \__regex_compile_class_posix_end:w #1#2#3#4
- {
- \str_if_eq:nnTF { #1 #2 #3 #4 }
- { \__regex_compile_special:N : \__regex_compile_special:N ] }
- {
- \cs_if_exist:cTF { __regex_posix_ \l__regex_internal_a_tl : }
- {
- \__regex_compile_one:x
- {
- \bool_if:NF \l__regex_internal_bool \__regex_item_reverse:n
- \exp_not:c { __regex_posix_ \l__regex_internal_a_tl : }
- }
- }
- {
- \__msg_kernel_warning:nnx { regex } { posix-unknown }
- { \l__regex_internal_a_tl }
- \__regex_compile_abort_tokens:x
- {
- [: \bool_if:NF \l__regex_internal_bool { ^ }
- \l__regex_internal_a_tl :]
- }
- }
- }
- {
- \__msg_kernel_error:nnxx { regex } { posix-missing-close }
- { [: \l__regex_internal_a_tl } { #2 #4 }
- \__regex_compile_abort_tokens:x { [: \l__regex_internal_a_tl }
- #1 #2 #3 #4
- }
- }
-\cs_new_protected:Npn \__regex_compile_group_begin:N #1
- {
- \__tl_build_one:n { #1 { \if_false: } \fi: }
- \__regex_mode_quit_c:
- \__tl_build:Nw \l__regex_internal_regex
- \int_set_eq:NN \l__regex_default_catcodes_int \l__regex_catcodes_int
- \int_incr:N \l__regex_group_level_int
- \__tl_build_one:n { \__regex_branch:n { \if_false: } \fi: }
- }
-\cs_new_protected:Npn \__regex_compile_group_end:
- {
- \if_int_compare:w \l__regex_group_level_int > 0 \exp_stop_f:
- \__tl_build_one:n { \if_false: { \fi: } }
- \__tl_build_end:
- \int_set_eq:NN \l__regex_catcodes_int \l__regex_default_catcodes_int
- \__tl_build_one:o \l__regex_internal_regex
- \exp_after:wN \__regex_compile_quantifier:w
- \else:
- \__msg_kernel_warning:nn { regex } { extra-rparen }
- \exp_after:wN \__regex_compile_raw:N \exp_after:wN )
- \fi:
- }
-\cs_new_protected:cpn { __regex_compile_(: }
- {
- \__regex_if_in_class:TF { \__regex_compile_raw:N ( }
- { \__regex_compile_lparen:w }
- }
-\cs_new_protected:Npn \__regex_compile_lparen:w #1#2#3#4
- {
- \str_if_eq:nnTF { #1 #2 } { \__regex_compile_special:N ? }
- {
- \cs_if_exist_use:cF
- { __regex_compile_special_group_\token_to_str:N #4 :w }
- {
- \__msg_kernel_warning:nnx { regex } { special-group-unknown }
- { (? #4 }
- \__regex_compile_group_begin:N \__regex_group:nnnN
- \__regex_compile_raw:N ? #3 #4
- }
- }
- {
- \__regex_compile_group_begin:N \__regex_group:nnnN
- #1 #2 #3 #4
- }
- }
-\cs_new_protected:cpn { __regex_compile_|: }
- {
- \__regex_if_in_class:TF { \__regex_compile_raw:N | }
- {
- \__tl_build_one:n
- { \if_false: { \fi: } \__regex_branch:n { \if_false: } \fi: }
- }
- }
-\cs_new_protected:cpn { __regex_compile_): }
- {
- \__regex_if_in_class:TF { \__regex_compile_raw:N ) }
- { \__regex_compile_group_end: }
- }
-\cs_new_protected:cpn { __regex_compile_special_group_::w }
- { \__regex_compile_group_begin:N \__regex_group_no_capture:nnnN }
-\cs_new_protected:cpn { __regex_compile_special_group_|:w }
- { \__regex_compile_group_begin:N \__regex_group_resetting:nnnN }
-\cs_new_protected:Npn \__regex_compile_special_group_i:w #1#2
- {
- \str_if_eq:nnTF { #1 #2 } { \__regex_compile_special:N ) }
- {
- \cs_set:Npn \__regex_item_equal:n { \__regex_item_caseless_equal:n }
- \cs_set:Npn \__regex_item_range:nn { \__regex_item_caseless_range:nn }
- }
- {
- \__msg_kernel_warning:nnx { regex } { unknown-option } { (?i #2 }
- \__regex_compile_raw:N (
- \__regex_compile_raw:N ?
- \__regex_compile_raw:N i
- #1 #2
- }
- }
-\cs_new_protected:cpn { __regex_compile_special_group_-:w } #1#2#3#4
- {
- \str_if_eq:nnTF { #1 #2 #3 #4 }
- { \__regex_compile_raw:N i \__regex_compile_special:N ) }
- {
- \cs_set:Npn \__regex_item_equal:n { \__regex_item_caseful_equal:n }
- \cs_set:Npn \__regex_item_range:nn { \__regex_item_caseful_range:nn }
- }
- {
- \__msg_kernel_warning:nnx { regex } { unknown-option } { (?-#2#4 }
- \__regex_compile_raw:N (
- \__regex_compile_raw:N ?
- \__regex_compile_raw:N -
- #1 #2 #3 #4
- }
- }
-\cs_new_protected:cpn { __regex_compile_/c: }
- { \__regex_chk_c_allowed:T { \__regex_compile_c_test:NN } }
-\cs_new_protected:Npn \__regex_compile_c_test:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_compile_raw:N
- {
- \int_if_exist:cTF { c__regex_catcode_#2_int }
- {
- \int_set_eq:Nc \l__regex_catcodes_int { c__regex_catcode_#2_int }
- \l__regex_mode_int
- = \if_case:w \l__regex_mode_int
- \c__regex_catcode_mode_int
- \else:
- \c__regex_catcode_in_class_mode_int
- \fi:
- }
- }
- { \cs_if_exist_use:cF { __regex_compile_c_#2:w } }
- {
- \__msg_kernel_error:nnx { regex } { c-missing-category } {#2}
- #1 #2
- }
- }
-\cs_new_protected:cpn { __regex_compile_c_[:w } #1#2
- {
- \l__regex_mode_int
- = \if_case:w \l__regex_mode_int
- \c__regex_catcode_mode_int
- \else:
- \c__regex_catcode_in_class_mode_int
- \fi:
- \int_zero:N \l__regex_catcodes_int
- \str_if_eq:nnTF { #1 #2 } { \__regex_compile_special:N ^ }
- {
- \bool_set_false:N \l__regex_catcodes_bool
- \__regex_compile_c_lbrack_loop:NN
- }
- {
- \bool_set_true:N \l__regex_catcodes_bool
- \__regex_compile_c_lbrack_loop:NN
- #1 #2
- }
- }
-\cs_new_protected:Npn \__regex_compile_c_lbrack_loop:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_compile_raw:N
- {
- \int_if_exist:cTF { c__regex_catcode_#2_int }
- {
- \exp_args:Nc \__regex_compile_c_lbrack_add:N
- { c__regex_catcode_#2_int }
- \__regex_compile_c_lbrack_loop:NN
- }
- }
- {
- \token_if_eq_charcode:NNTF #2 ]
- { \__regex_compile_c_lbrack_end: }
- }
- {
- \__msg_kernel_error:nnx { regex } { c-missing-rbrack } {#2}
- \__regex_compile_c_lbrack_end:
- #1 #2
- }
- }
-\cs_new_protected:Npn \__regex_compile_c_lbrack_add:N #1
- {
- \if_int_odd:w \__int_eval:w \l__regex_catcodes_int / #1 \__int_eval_end:
- \else:
- \int_add:Nn \l__regex_catcodes_int {#1}
- \fi:
- }
-\cs_new_protected:Npn \__regex_compile_c_lbrack_end:
- {
- \if_meaning:w \c_false_bool \l__regex_catcodes_bool
- \int_set:Nn \l__regex_catcodes_int
- { \c__regex_all_catcodes_int - \l__regex_catcodes_int }
- \fi:
- }
-\cs_new_protected:cpn { __regex_compile_c_ \c_left_brace_str :w }
- {
- \__regex_compile:w
- \__regex_disable_submatches:
- \l__regex_mode_int
- = \if_case:w \l__regex_mode_int
- \c__regex_cs_mode_int
- \else:
- \c__regex_cs_in_class_mode_int
- \fi:
- }
-\flag_new:n { __regex_cs }
-\cs_new_protected:cpn { __regex_compile_ \c_right_brace_str : }
- {
- \__regex_if_in_cs:TF
- { \__regex_compile_end_cs: }
- { \exp_after:wN \__regex_compile_raw:N \c_right_brace_str }
- }
-\cs_new_protected:Npn \__regex_compile_end_cs:
- {
- \__regex_compile_end:
- \flag_clear:n { __regex_cs }
- \tl_set:Nx \l__regex_internal_a_tl
- {
- \exp_after:wN \__regex_compile_cs_aux:Nn \l__regex_internal_regex
- \q_nil \q_nil \q_recursion_stop
- }
- \exp_args:Nx \__regex_compile_one:x
- {
- \flag_if_raised:nTF { __regex_cs }
- { \__regex_item_cs:n { \exp_not:o \l__regex_internal_regex } }
- { \__regex_item_exact_cs:n { \tl_tail:N \l__regex_internal_a_tl } }
- }
- }
-\cs_new:Npn \__regex_compile_cs_aux:Nn #1#2
- {
- \cs_if_eq:NNTF #1 \__regex_branch:n
- {
- \scan_stop:
- \__regex_compile_cs_aux:NNnnnN #2
- \q_nil \q_nil \q_nil \q_nil \q_nil \q_nil \q_recursion_stop
- \__regex_compile_cs_aux:Nn
- }
- {
- \quark_if_nil:NF #1 { \flag_raise:n { __regex_cs } }
- \use_none_delimit_by_q_recursion_stop:w
- }
- }
-\cs_new:Npn \__regex_compile_cs_aux:NNnnnN #1#2#3#4#5#6
- {
- \bool_lazy_all:nTF
- {
- { \cs_if_eq_p:NN #1 \__regex_class:NnnnN }
- {#2}
- { \tl_if_head_eq_meaning_p:nN {#3} \__regex_item_caseful_equal:n }
- { \int_compare_p:nNn { \tl_count:n {#3} } = { 2 } }
- { \int_compare_p:nNn {#5} = { 0 } }
- }
- {
- \prg_replicate:nn {#4}
- { \char_generate:nn { \use_ii:nn #3 } {12} }
- \__regex_compile_cs_aux:NNnnnN
- }
- {
- \quark_if_nil:NF #1
- {
- \flag_raise:n { __regex_cs }
- \use_i_delimit_by_q_recursion_stop:nw
- }
- \use_none_delimit_by_q_recursion_stop:w
- }
- }
-\cs_new_protected:cpn { __regex_compile_/u: } #1#2
- {
- \__regex_if_in_class_or_catcode:TF
- { \__regex_compile_raw_error:N u #1 #2 }
- {
- \str_if_eq_x:nnTF {#1#2} { \__regex_compile_special:N \c_left_brace_str }
- {
- \tl_set:Nx \l__regex_internal_a_tl { \if_false: } \fi:
- \__regex_compile_u_loop:NN
- }
- {
- \__msg_kernel_error:nn { regex } { u-missing-lbrace }
- \__regex_compile_raw:N u #1 #2
- }
- }
- }
-\cs_new:Npn \__regex_compile_u_loop:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_compile_raw:N
- { #2 \__regex_compile_u_loop:NN }
- {
- \token_if_eq_meaning:NNTF #1 \__regex_compile_special:N
- {
- \exp_after:wN \token_if_eq_charcode:NNTF \c_right_brace_str #2
- { \if_false: { \fi: } \__regex_compile_u_end: }
- { #2 \__regex_compile_u_loop:NN }
- }
- {
- \if_false: { \fi: }
- \__msg_kernel_error:nnx { regex } { u-missing-rbrace } {#2}
- \__regex_compile_u_end:
- #1 #2
- }
- }
- }
-\cs_new_protected:Npn \__regex_compile_u_end:
- {
- \tl_set:Nv \l__regex_internal_a_tl { \l__regex_internal_a_tl }
- \if_int_compare:w \l__regex_mode_int = \c__regex_outer_mode_int
- \__regex_compile_u_not_cs:
- \else:
- \__regex_compile_u_in_cs:
- \fi:
- }
-\cs_new_protected:Npn \__regex_compile_u_in_cs:
- {
- \tl_gset:Nx \g__regex_internal_tl
- { \exp_args:No \__str_to_other_fast:n { \l__regex_internal_a_tl } }
- \__tl_build_one:x
- {
- \tl_map_function:NN \g__regex_internal_tl
- \__regex_compile_u_in_cs_aux:n
- }
- }
-\cs_new:Npn \__regex_compile_u_in_cs_aux:n #1
- {
- \__regex_class:NnnnN \c_true_bool
- { \__regex_item_caseful_equal:n { \__int_value:w `#1 } }
- { 1 } { 0 } \c_false_bool
- }
-\cs_new_protected:Npn \__regex_compile_u_not_cs:
- {
- \exp_args:No \__tl_analysis_map_inline:nn { \l__regex_internal_a_tl }
- {
- \__tl_build_one:n
- {
- \__regex_class:NnnnN \c_true_bool
- {
- \if_int_compare:w "##2 = 0 \exp_stop_f:
- \__regex_item_exact_cs:n { \exp_after:wN \cs_to_str:N ##1 }
- \else:
- \__regex_item_exact:nn { \__int_value:w "##2 } { ##3 }
- \fi:
- }
- { 1 } { 0 } \c_false_bool
- }
- }
- }
-\cs_new_protected:cpn { __regex_compile_/K: }
- {
- \int_compare:nNnTF \l__regex_mode_int = \c__regex_outer_mode_int
- { \__tl_build_one:n { \__regex_command_K: } }
- { \__regex_compile_raw_error:N K }
- }
-\cs_new_protected:Npn \__regex_show:Nn #1#2
- {
- \__tl_build:Nw \l__regex_internal_a_tl
- \cs_set_protected:Npn \__regex_branch:n
- {
- \seq_pop_right:NN \l__regex_show_prefix_seq \l__regex_internal_a_tl
- \__regex_show_one:n { +-branch }
- \seq_put_right:No \l__regex_show_prefix_seq \l__regex_internal_a_tl
- \use:n
- }
- \cs_set_protected:Npn \__regex_group:nnnN
- { \__regex_show_group_aux:nnnnN { } }
- \cs_set_protected:Npn \__regex_group_no_capture:nnnN
- { \__regex_show_group_aux:nnnnN { ~(no~capture) } }
- \cs_set_protected:Npn \__regex_group_resetting:nnnN
- { \__regex_show_group_aux:nnnnN { ~(resetting) } }
- \cs_set_eq:NN \__regex_class:NnnnN \__regex_show_class:NnnnN
- \cs_set_protected:Npn \__regex_command_K:
- { \__regex_show_one:n { reset~match~start~(\iow_char:N\\K) } }
- \cs_set_protected:Npn \__regex_assertion:Nn ##1##2
- { \__regex_show_one:n { \bool_if:NF ##1 { negative~ } assertion:~##2 } }
- \cs_set:Npn \__regex_b_test: { word~boundary }
- \cs_set_eq:NN \__regex_anchor:N \__regex_show_anchor_to_str:N
- \cs_set_protected:Npn \__regex_item_caseful_equal:n ##1
- { \__regex_show_one:n { char~code~\int_eval:n{##1} } }
- \cs_set_protected:Npn \__regex_item_caseful_range:nn ##1##2
- { \__regex_show_one:n { range~[\int_eval:n{##1}, \int_eval:n{##2}] } }
- \cs_set_protected:Npn \__regex_item_caseless_equal:n ##1
- { \__regex_show_one:n { char~code~\int_eval:n{##1}~(caseless) } }
- \cs_set_protected:Npn \__regex_item_caseless_range:nn ##1##2
- {
- \__regex_show_one:n
- { Range~[\int_eval:n{##1}, \int_eval:n{##2}]~(caseless) }
- }
- \cs_set_protected:Npn \__regex_item_catcode:nT
- { \__regex_show_item_catcode:NnT \c_true_bool }
- \cs_set_protected:Npn \__regex_item_catcode_reverse:nT
- { \__regex_show_item_catcode:NnT \c_false_bool }
- \cs_set_protected:Npn \__regex_item_reverse:n
- { \__regex_show_scope:nn { Reversed~match } }
- \cs_set_protected:Npn \__regex_item_exact:nn ##1##2
- { \__regex_show_one:n { char~##2,~catcode~##1 } }
- \cs_set_eq:NN \__regex_item_exact_cs:n \__regex_show_item_exact_cs:n
- \cs_set_protected:Npn \__regex_item_cs:n
- { \__regex_show_scope:nn { control~sequence } }
- \cs_set:cpn { __regex_prop_.: } { \__regex_show_one:n { any~token } }
- \seq_clear:N \l__regex_show_prefix_seq
- \__regex_show_push:n { ~ }
- \cs_if_exist_use:N #1
- \__tl_build_end:
- \__msg_show_variable:NNNnn #1 \cs_if_exist:NTF ? { }
- { >~Compiled~regex~#2: \l__regex_internal_a_tl }
- }
-\cs_new_protected:Npn \__regex_show_one:n #1
- {
- \int_incr:N \l__regex_show_lines_int
- \__tl_build_one:x
- {
- \exp_not:N \\
- \seq_map_function:NN \l__regex_show_prefix_seq \use:n
- #1
- }
- }
-\cs_new_protected:Npn \__regex_show_push:n #1
- { \seq_put_right:Nx \l__regex_show_prefix_seq { #1 ~ } }
-\cs_new_protected:Npn \__regex_show_pop:
- { \seq_pop_right:NN \l__regex_show_prefix_seq \l__regex_internal_a_tl }
-\cs_new_protected:Npn \__regex_show_scope:nn #1#2
- {
- \__regex_show_one:n {#1}
- \__regex_show_push:n { ~ }
- #2
- \__regex_show_pop:
- }
-\cs_new_protected:Npn \__regex_show_group_aux:nnnnN #1#2#3#4#5
- {
- \__regex_show_one:n { ,-group~begin #1 }
- \__regex_show_push:n { | }
- \use_ii:nn #2
- \__regex_show_pop:
- \__regex_show_one:n
- { `-group~end \__regex_msg_repeated:nnN {#3} {#4} #5 }
- }
-\cs_set:Npn \__regex_show_class:NnnnN #1#2#3#4#5
- {
- \__tl_build:Nw \l__regex_internal_a_tl
- \int_zero:N \l__regex_show_lines_int
- \__regex_show_push:n {~}
- #2
- \exp_last_unbraced:Nf
- \int_case:nnF { \l__regex_show_lines_int }
- {
- {0}
- {
- \__tl_build_end:
- \__regex_show_one:n { \bool_if:NTF #1 { Fail } { Pass } }
- }
- {1}
- {
- \__tl_build_end:
- \bool_if:NTF #1
- {
- #2
- \__tl_build_one:n { \__regex_msg_repeated:nnN {#3} {#4} #5 }
- }
- {
- \__regex_show_one:n
- { Don't~match~\__regex_msg_repeated:nnN {#3} {#4} #5 }
- \__tl_build_one:o \l__regex_internal_a_tl
- }
- }
- }
- {
- \__tl_build_end:
- \__regex_show_one:n
- {
- \bool_if:NTF #1 { M } { Don't~m } atch
- \__regex_msg_repeated:nnN {#3} {#4} #5
- }
- \__tl_build_one:o \l__regex_internal_a_tl
- }
- }
-\cs_new:Npn \__regex_show_anchor_to_str:N #1
- {
- anchor~at~
- \str_case:nnF { #1 }
- {
- { \l__regex_min_pos_int } { start~(\iow_char:N\\A) }
- { \l__regex_start_pos_int } { start~of~match~(\iow_char:N\\G) }
- { \l__regex_max_pos_int } { end~(\iow_char:N\\Z) }
- }
- { <error:~'#1'~not~recognized> }
- }
-\cs_new_protected:Npn \__regex_show_item_catcode:NnT #1#2
- {
- \seq_set_split:Nnn \l__regex_internal_seq { } { CBEMTPUDSLOA }
- \seq_set_filter:NNn \l__regex_internal_seq \l__regex_internal_seq
- { \int_if_odd_p:n { #2 / \int_use:c { c__regex_catcode_##1_int } } }
- \__regex_show_scope:nn
- {
- categories~
- \seq_map_function:NN \l__regex_internal_seq \use:n
- , ~
- \bool_if:NF #1 { negative~ } class
- }
- }
-\cs_new_protected:Npn \__regex_show_item_exact_cs:n #1
- {
- \seq_set_split:Nnn \l__regex_internal_seq { \scan_stop: } {#1}
- \seq_set_map:NNn \l__regex_internal_seq
- \l__regex_internal_seq { \iow_char:N\\##1 }
- \__regex_show_one:n
- { control~sequence~ \seq_use:Nn \l__regex_internal_seq { ~or~ } }
- }
-\int_new:N \l__regex_min_state_int
-\int_set:Nn \l__regex_min_state_int { 1 }
-\int_new:N \l__regex_max_state_int
-\int_new:N \l__regex_left_state_int
-\int_new:N \l__regex_right_state_int
-\seq_new:N \l__regex_left_state_seq
-\seq_new:N \l__regex_right_state_seq
-\int_new:N \l__regex_capturing_group_int
-\cs_new_protected:Npn \__regex_build:n #1
- {
- \__regex_compile:n {#1}
- \__regex_build:N \l__regex_internal_regex
- }
-\cs_new_protected:Npn \__regex_build:N #1
- {
- \trace_push:nnn { regex } { 1 } { __regex_build }
- \__regex_standard_escapechar:
- \int_zero:N \l__regex_capturing_group_int
- \int_set_eq:NN \l__regex_max_state_int \l__regex_min_state_int
- \__regex_build_new_state:
- \__regex_build_new_state:
- \__regex_toks_put_right:Nn \l__regex_left_state_int
- { \__regex_action_start_wildcard: }
- \__regex_group:nnnN {#1} { 1 } { 0 } \c_false_bool
- \__regex_toks_put_right:Nn \l__regex_right_state_int
- { \__regex_action_success: }
- \__regex_trace_states:n { 2 }
- \trace_pop:nnn { regex } { 1 } { __regex_build }
- }
-\cs_new_protected:Npn \__regex_build_for_cs:n #1
- {
- \trace_push:nnn { regex } { 1 } { __regex_build_for_cs }
- \int_set_eq:NN \l__regex_max_state_int \l__regex_min_state_int
- \__regex_build_new_state:
- \__regex_build_new_state:
- \__regex_push_lr_states:
- #1
- \__regex_pop_lr_states:
- \__regex_toks_put_right:Nn \l__regex_right_state_int
- {
- \if_int_compare:w \l__regex_current_pos_int = \l__regex_max_pos_int
- \exp_after:wN \__regex_action_success:
- \fi:
- }
- \__regex_trace_states:n { 2 }
- \trace_pop:nnn { regex } { 1 } { __regex_build_for_cs }
- }
-\cs_new_protected:Npn \__regex_push_lr_states:
- {
- \seq_push:No \l__regex_left_state_seq
- { \int_use:N \l__regex_left_state_int }
- \seq_push:No \l__regex_right_state_seq
- { \int_use:N \l__regex_right_state_int }
- }
-\cs_new_protected:Npn \__regex_pop_lr_states:
- {
- \seq_pop:NN \l__regex_left_state_seq \l__regex_internal_a_tl
- \int_set:Nn \l__regex_left_state_int \l__regex_internal_a_tl
- \seq_pop:NN \l__regex_right_state_seq \l__regex_internal_a_tl
- \int_set:Nn \l__regex_right_state_int \l__regex_internal_a_tl
- }
-\cs_new_protected:Npn \__regex_build_transition_left:NNN #1#2#3
- { \__regex_toks_put_left:Nx #2 { #1 { \int_eval:n { #3 - #2 } } } }
-\cs_new_protected:Npn \__regex_build_transition_right:nNn #1#2#3
- { \__regex_toks_put_right:Nx #2 { #1 { \int_eval:n { #3 - #2 } } } }
-\cs_new_protected:Npn \__regex_build_new_state:
- {
- \trace:nnx { regex } { 2 }
- {
- regex~new~state~
- L=\int_use:N \l__regex_left_state_int ~ -> ~
- R=\int_use:N \l__regex_right_state_int ~ -> ~
- M=\int_use:N \l__regex_max_state_int ~ -> ~
- \int_eval:n { \l__regex_max_state_int + 1 }
- }
- \__regex_toks_clear:N \l__regex_max_state_int
- \int_set_eq:NN \l__regex_left_state_int \l__regex_right_state_int
- \int_set_eq:NN \l__regex_right_state_int \l__regex_max_state_int
- \int_incr:N \l__regex_max_state_int
- }
-\cs_new_protected:Npn \__regex_build_transitions_lazyness:NNNNN #1#2#3#4#5
- {
- \__regex_build_new_state:
- \__regex_toks_put_right:Nx \l__regex_left_state_int
- {
- \if_meaning:w \c_true_bool #1
- #2 { \int_eval:n { #3 - \l__regex_left_state_int } }
- #4 { \int_eval:n { #5 - \l__regex_left_state_int } }
- \else:
- #4 { \int_eval:n { #5 - \l__regex_left_state_int } }
- #2 { \int_eval:n { #3 - \l__regex_left_state_int } }
- \fi:
- }
- }
-\cs_new_protected:Npn \__regex_class:NnnnN #1#2#3#4#5
- {
- \cs_set:Npx \__regex_tests_action_cost:n ##1
- {
- \exp_not:n { \exp_not:n {#2} }
- \bool_if:NTF #1
- { \__regex_break_point:TF { \__regex_action_cost:n {##1} } { } }
- { \__regex_break_point:TF { } { \__regex_action_cost:n {##1} } }
- }
- \if_case:w - #4 \exp_stop_f:
- \__regex_class_repeat:n {#3}
- \or: \__regex_class_repeat:nN {#3} #5
- \else: \__regex_class_repeat:nnN {#3} {#4} #5
- \fi:
- }
-\cs_new:Npn \__regex_tests_action_cost:n { \__regex_action_cost:n }
-\cs_new_protected:Npn \__regex_class_repeat:n #1
- {
- \prg_replicate:nn {#1}
- {
- \__regex_build_new_state:
- \__regex_build_transition_right:nNn \__regex_tests_action_cost:n
- \l__regex_left_state_int \l__regex_right_state_int
- }
- }
-\cs_new_protected:Npn \__regex_class_repeat:nN #1#2
- {
- \if_int_compare:w #1 = 0 \exp_stop_f:
- \__regex_build_transitions_lazyness:NNNNN #2
- \__regex_action_free:n \l__regex_right_state_int
- \__regex_tests_action_cost:n \l__regex_left_state_int
- \else:
- \__regex_class_repeat:n {#1}
- \int_set_eq:NN \l__regex_internal_a_int \l__regex_left_state_int
- \__regex_build_transitions_lazyness:NNNNN #2
- \__regex_action_free:n \l__regex_right_state_int
- \__regex_action_free:n \l__regex_internal_a_int
- \fi:
- }
-\cs_new_protected:Npn \__regex_class_repeat:nnN #1#2#3
- {
- \__regex_class_repeat:n {#1}
- \int_set:Nn \l__regex_internal_a_int
- { \l__regex_max_state_int + #2 - 1 }
- \prg_replicate:nn { #2 }
- {
- \__regex_build_transitions_lazyness:NNNNN #3
- \__regex_action_free:n \l__regex_internal_a_int
- \__regex_tests_action_cost:n \l__regex_right_state_int
- }
- }
-\cs_new_protected:Npn \__regex_group_aux:nnnnN #1#2#3#4#5
- {
- \trace_push:nnn { regex } { 1 } { __regex_group }
- \if_int_compare:w #3 = 0 \exp_stop_f:
- \__regex_build_new_state:
- \__regex_build_transition_right:nNn \__regex_action_free_group:n
- \l__regex_left_state_int \l__regex_right_state_int
- \fi:
- \__regex_build_new_state:
- \__regex_push_lr_states:
- #2
- \__regex_pop_lr_states:
- \if_case:w - #4 \exp_stop_f:
- \__regex_group_repeat:nn {#1} {#3}
- \or: \__regex_group_repeat:nnN {#1} {#3} #5
- \else: \__regex_group_repeat:nnnN {#1} {#3} {#4} #5
- \fi:
- \trace_pop:nnn { regex } { 1 } { __regex_group }
- }
-\cs_new_protected:Npn \__regex_group:nnnN #1
- {
- \exp_args:No \__regex_group_aux:nnnnN
- { \int_use:N \l__regex_capturing_group_int }
- {
- \int_incr:N \l__regex_capturing_group_int
- #1
- }
- }
-\cs_new_protected:Npn \__regex_group_no_capture:nnnN
- { \__regex_group_aux:nnnnN { -1 } }
-\cs_new_protected:Npn \__regex_group_resetting:nnnN #1
- {
- \__regex_group_aux:nnnnN { -1 }
- {
- \exp_args:Noo \__regex_group_resetting_loop:nnNn
- { \int_use:N \l__regex_capturing_group_int }
- { \int_use:N \l__regex_capturing_group_int }
- #1
- { ?? \__prg_break:n } { }
- \__prg_break_point:
- }
- }
-\cs_new_protected:Npn \__regex_group_resetting_loop:nnNn #1#2#3#4
- {
- \use_none:nn #3 { \int_set:Nn \l__regex_capturing_group_int {#1} }
- \int_set:Nn \l__regex_capturing_group_int {#2}
- #3 {#4}
- \exp_args:Nf \__regex_group_resetting_loop:nnNn
- { \int_max:nn {#1} { \l__regex_capturing_group_int } }
- {#2}
- }
-\cs_new_protected:Npn \__regex_branch:n #1
- {
- \trace_push:nnn { regex } { 1 } { __regex_branch }
- \__regex_build_new_state:
- \seq_get:NN \l__regex_left_state_seq \l__regex_internal_a_tl
- \int_set:Nn \l__regex_left_state_int \l__regex_internal_a_tl
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_left_state_int \l__regex_right_state_int
- #1
- \seq_get:NN \l__regex_right_state_seq \l__regex_internal_a_tl
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_right_state_int \l__regex_internal_a_tl
- \trace_pop:nnn { regex } { 1 } { __regex_branch }
- }
-\cs_new_protected:Npn \__regex_group_repeat:nn #1#2
- {
- \if_int_compare:w #2 = 0 \exp_stop_f:
- \int_set:Nn \l__regex_max_state_int
- { \l__regex_left_state_int - 1 }
- \__regex_build_new_state:
- \else:
- \__regex_group_repeat_aux:n {#2}
- \__regex_group_submatches:nNN {#1}
- \l__regex_internal_a_int \l__regex_right_state_int
- \__regex_build_new_state:
- \fi:
- }
-\cs_new_protected:Npn \__regex_group_submatches:nNN #1#2#3
- {
- \if_int_compare:w #1 > - 1 \exp_stop_f:
- \__regex_toks_put_left:Nx #2 { \__regex_action_submatch:n { #1 < } }
- \__regex_toks_put_left:Nx #3 { \__regex_action_submatch:n { #1 > } }
- \fi:
- }
-\cs_new_protected:Npn \__regex_group_repeat_aux:n #1
- {
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_right_state_int \l__regex_max_state_int
- \int_set_eq:NN \l__regex_internal_a_int \l__regex_left_state_int
- \int_set_eq:NN \l__regex_internal_b_int \l__regex_max_state_int
- \if_int_compare:w \__int_eval:w #1 > 1 \exp_stop_f:
- \int_set:Nn \l__regex_internal_c_int
- {
- ( #1 - 1 )
- * ( \l__regex_internal_b_int - \l__regex_internal_a_int )
- }
- \int_add:Nn \l__regex_right_state_int { \l__regex_internal_c_int }
- \int_add:Nn \l__regex_max_state_int { \l__regex_internal_c_int }
- \__regex_toks_memcpy:NNn
- \l__regex_internal_b_int
- \l__regex_internal_a_int
- \l__regex_internal_c_int
- \fi:
- }
-\cs_new_protected:Npn \__regex_group_repeat:nnN #1#2#3
- {
- \if_int_compare:w #2 = 0 \exp_stop_f:
- \__regex_group_submatches:nNN {#1}
- \l__regex_left_state_int \l__regex_right_state_int
- \int_set:Nn \l__regex_internal_a_int
- { \l__regex_left_state_int - 1 }
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_right_state_int \l__regex_internal_a_int
- \__regex_build_new_state:
- \if_meaning:w \c_true_bool #3
- \__regex_build_transition_left:NNN \__regex_action_free:n
- \l__regex_internal_a_int \l__regex_right_state_int
- \else:
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_internal_a_int \l__regex_right_state_int
- \fi:
- \else:
- \__regex_group_repeat_aux:n {#2}
- \__regex_group_submatches:nNN {#1}
- \l__regex_internal_a_int \l__regex_right_state_int
- \if_meaning:w \c_true_bool #3
- \__regex_build_transition_right:nNn \__regex_action_free_group:n
- \l__regex_right_state_int \l__regex_internal_a_int
- \else:
- \__regex_build_transition_left:NNN \__regex_action_free_group:n
- \l__regex_right_state_int \l__regex_internal_a_int
- \fi:
- \__regex_build_new_state:
- \fi:
- }
-\cs_new_protected:Npn \__regex_group_repeat:nnnN #1#2#3#4
- {
- \__regex_group_submatches:nNN {#1}
- \l__regex_left_state_int \l__regex_right_state_int
- \__regex_group_repeat_aux:n { #2 + #3 }
- \if_meaning:w \c_true_bool #4
- \int_set_eq:NN \l__regex_left_state_int \l__regex_max_state_int
- \prg_replicate:nn { #3 }
- {
- \int_sub:Nn \l__regex_left_state_int
- { \l__regex_internal_b_int - \l__regex_internal_a_int }
- \__regex_build_transition_left:NNN \__regex_action_free:n
- \l__regex_left_state_int \l__regex_max_state_int
- }
- \else:
- \prg_replicate:nn { #3 - 1 }
- {
- \int_sub:Nn \l__regex_right_state_int
- { \l__regex_internal_b_int - \l__regex_internal_a_int }
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_right_state_int \l__regex_max_state_int
- }
- \if_int_compare:w #2 = 0 \exp_stop_f:
- \int_set:Nn \l__regex_right_state_int
- { \l__regex_left_state_int - 1 }
- \else:
- \int_sub:Nn \l__regex_right_state_int
- { \l__regex_internal_b_int - \l__regex_internal_a_int }
- \fi:
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_right_state_int \l__regex_max_state_int
- \fi:
- \__regex_build_new_state:
- }
-\cs_new_protected:Npn \__regex_assertion:Nn #1#2
- {
- \__regex_build_new_state:
- \__regex_toks_put_right:Nx \l__regex_left_state_int
- {
- \exp_not:n {#2}
- \__regex_break_point:TF
- \bool_if:NF #1 { { } }
- {
- \__regex_action_free:n
- {
- \int_eval:n
- { \l__regex_right_state_int - \l__regex_left_state_int }
- }
- }
- \bool_if:NT #1 { { } }
- }
- }
-\cs_new_protected:Npn \__regex_anchor:N #1
- {
- \if_int_compare:w #1 = \l__regex_current_pos_int
- \exp_after:wN \__regex_break_true:w
- \fi:
- }
-\cs_new_protected:Npn \__regex_b_test:
- {
- \group_begin:
- \int_set_eq:NN \l__regex_current_char_int \l__regex_last_char_int
- \__regex_prop_w:
- \__regex_break_point:TF
- { \group_end: \__regex_item_reverse:n \__regex_prop_w: }
- { \group_end: \__regex_prop_w: }
- }
-\cs_new_protected:Npn \__regex_command_K:
- {
- \__regex_build_new_state:
- \__regex_toks_put_right:Nx \l__regex_left_state_int
- {
- \__regex_action_submatch:n { 0< }
- \bool_set_true:N \l__regex_fresh_thread_bool
- \__regex_action_free:n
- { \int_eval:n { \l__regex_right_state_int - \l__regex_left_state_int } }
- \bool_set_false:N \l__regex_fresh_thread_bool
- }
- }
-\int_new:N \l__regex_min_pos_int
-\int_new:N \l__regex_max_pos_int
-\int_new:N \l__regex_current_pos_int
-\int_new:N \l__regex_start_pos_int
-\int_new:N \l__regex_success_pos_int
-\int_new:N \l__regex_current_char_int
-\int_new:N \l__regex_current_catcode_int
-\int_new:N \l__regex_last_char_int
-\int_new:N \l__regex_case_changed_char_int
-\int_new:N \l__regex_current_state_int
-\prop_new:N \l__regex_current_submatches_prop
-\prop_new:N \l__regex_success_submatches_prop
-\int_new:N \l__regex_step_int
-\int_new:N \l__regex_min_active_int
-\int_new:N \l__regex_max_active_int
-\__intarray_new:Nn \g__regex_state_active_intarray { 65536 }
-\__intarray_new:Nn \g__regex_thread_state_intarray { 65536 }
-\tl_new:N \l__regex_every_match_tl
-\bool_new:N \l__regex_fresh_thread_bool
-\bool_new:N \l__regex_empty_success_bool
-\cs_new_eq:NN \__regex_if_two_empty_matches:F \use:n
-\bool_new:N \g__regex_success_bool
-\bool_new:N \l__regex_saved_success_bool
-\bool_new:N \l__regex_match_success_bool
-\cs_new_protected:Npn \__regex_match:n #1
- {
- \trace_push:nnx { regex } { 1 } { __regex_match }
- \trace:nnx { regex } { 1 } { analyzing~query~token~list }
- \int_zero:N \l__regex_balance_int
- \int_set:Nn \l__regex_current_pos_int { 2 * \l__regex_max_state_int }
- \__regex_query_set:nnn { } { -1 } { -2 }
- \int_set_eq:NN \l__regex_min_pos_int \l__regex_current_pos_int
- \__tl_analysis_map_inline:nn {#1}
- { \__regex_query_set:nnn {##1} {"##2} {##3} }
- \int_set_eq:NN \l__regex_max_pos_int \l__regex_current_pos_int
- \__regex_query_set:nnn { } { -1 } { -2 }
- \trace:nnx { regex } { 1 } { initializing }
- \bool_gset_false:N \g__regex_success_bool
- \int_step_inline:nnnn
- \l__regex_min_state_int { 1 } { \l__regex_max_state_int - 1 }
- { \__intarray_gset_fast:Nnn \g__regex_state_active_intarray {##1} { 1 } }
- \int_set_eq:NN \l__regex_min_active_int \l__regex_max_state_int
- \int_zero:N \l__regex_step_int
- \int_set_eq:NN \l__regex_success_pos_int \l__regex_min_pos_int
- \int_set:Nn \l__regex_min_submatch_int
- { 2 * \l__regex_max_state_int }
- \int_set_eq:NN \l__regex_submatch_int \l__regex_min_submatch_int
- \bool_set_false:N \l__regex_empty_success_bool
- \__regex_match_once:
- \trace_pop:nnx { regex } { 1 } { __regex_match }
- }
-\cs_new_protected:Npn \__regex_match_once:
- {
- \if_meaning:w \c_true_bool \l__regex_empty_success_bool
- \cs_set:Npn \__regex_if_two_empty_matches:F
- { \int_compare:nNnF \l__regex_start_pos_int = \l__regex_current_pos_int }
- \else:
- \cs_set_eq:NN \__regex_if_two_empty_matches:F \use:n
- \fi:
- \int_set_eq:NN \l__regex_start_pos_int \l__regex_success_pos_int
- \bool_set_false:N \l__regex_match_success_bool
- \prop_clear:N \l__regex_current_submatches_prop
- \int_set_eq:NN \l__regex_max_active_int \l__regex_min_active_int
- \__regex_store_state:n { \l__regex_min_state_int }
- \int_set:Nn \l__regex_current_pos_int
- { \l__regex_start_pos_int - 1 }
- \__regex_query_get:
- \__regex_match_loop:
- \l__regex_every_match_tl
- }
-\cs_new_protected:Npn \__regex_single_match:
- {
- \tl_set:Nn \l__regex_every_match_tl
- { \bool_gset_eq:NN \g__regex_success_bool \l__regex_match_success_bool }
- }
-\cs_new_protected:Npn \__regex_multi_match:n #1
- {
- \tl_set:Nn \l__regex_every_match_tl
- {
- \if_meaning:w \c_true_bool \l__regex_match_success_bool
- \bool_gset_true:N \g__regex_success_bool
- #1
- \exp_after:wN \__regex_match_once:
- \fi:
- }
- }
-\cs_new_protected:Npn \__regex_match_loop:
- {
- \int_add:Nn \l__regex_step_int { 2 }
- \int_incr:N \l__regex_current_pos_int
- \int_set_eq:NN \l__regex_last_char_int \l__regex_current_char_int
- \int_set_eq:NN \l__regex_case_changed_char_int \c_max_int
- \__regex_query_get:
- \use:x
- {
- \int_set_eq:NN \l__regex_max_active_int \l__regex_min_active_int
- \int_step_function:nnnN
- { \l__regex_min_active_int }
- { 1 }
- { \l__regex_max_active_int - 1 }
- \__regex_match_one_active:n
- }
- \__prg_break_point:
- \bool_set_false:N \l__regex_fresh_thread_bool %^^A was arg of break_point:n
- \if_int_compare:w \l__regex_max_active_int > \l__regex_min_active_int
- \if_int_compare:w \l__regex_current_pos_int < \l__regex_max_pos_int
- \exp_after:wN \exp_after:wN \exp_after:wN \__regex_match_loop:
- \fi:
- \fi:
- }
-\cs_new:Npn \__regex_match_one_active:n #1
- {
- \__regex_use_state_and_submatches:nn
- { \__intarray_item_fast:Nn \g__regex_thread_state_intarray {#1} }
- { \__regex_toks_use:w #1 }
- }
-\cs_new_protected:Npn \__regex_query_set:nnn #1#2#3
- {
- \__intarray_gset_fast:Nnn \g__regex_charcode_intarray
- { \l__regex_current_pos_int } {#3}
- \__intarray_gset_fast:Nnn \g__regex_catcode_intarray
- { \l__regex_current_pos_int } {#2}
- \__intarray_gset_fast:Nnn \g__regex_balance_intarray
- { \l__regex_current_pos_int } { \l__regex_balance_int }
- \__regex_toks_set:Nn \l__regex_current_pos_int {#1}
- \int_incr:N \l__regex_current_pos_int
- \if_case:w #2 \exp_stop_f:
- \or: \int_incr:N \l__regex_balance_int
- \or: \int_decr:N \l__regex_balance_int
- \fi:
- }
-\cs_new_protected:Npn \__regex_query_get:
- {
- \l__regex_current_char_int
- = \__intarray_item_fast:Nn \g__regex_charcode_intarray
- { \l__regex_current_pos_int } \scan_stop:
- \l__regex_current_catcode_int
- = \__intarray_item_fast:Nn \g__regex_catcode_intarray
- { \l__regex_current_pos_int } \scan_stop:
- }
-\cs_new_protected:Npn \__regex_use_state:
- {
- \trace:nnx { regex } { 2 } { state~\int_use:N \l__regex_current_state_int }
- \__intarray_gset_fast:Nnn \g__regex_state_active_intarray
- { \l__regex_current_state_int } { \l__regex_step_int }
- \__regex_toks_use:w \l__regex_current_state_int
- \__intarray_gset_fast:Nnn \g__regex_state_active_intarray
- { \l__regex_current_state_int } { \l__regex_step_int + 1 }
- }
-\cs_new_protected:Npn \__regex_use_state_and_submatches:nn #1 #2
- {
- \int_set:Nn \l__regex_current_state_int {#1}
- \if_int_compare:w
- \__intarray_item_fast:Nn \g__regex_state_active_intarray
- { \l__regex_current_state_int }
- < \l__regex_step_int
- \tl_set:Nn \l__regex_current_submatches_prop {#2}
- \exp_after:wN \__regex_use_state:
- \fi:
- \scan_stop:
- }
-\cs_new_protected:Npn \__regex_action_start_wildcard:
- {
- \bool_set_true:N \l__regex_fresh_thread_bool
- \__regex_action_free:n {1}
- \bool_set_false:N \l__regex_fresh_thread_bool
- \__regex_action_cost:n {0}
- }
-\cs_new_protected:Npn \__regex_action_free:n
- { \__regex_action_free_aux:nn { > \l__regex_step_int \else: } }
-\cs_new_protected:Npn \__regex_action_free_group:n
- { \__regex_action_free_aux:nn { < \l__regex_step_int } }
-\cs_new_protected:Npn \__regex_action_free_aux:nn #1#2
- {
- \use:x
- {
- \int_add:Nn \l__regex_current_state_int {#2}
- \exp_not:n
- {
- \if_int_compare:w
- \__intarray_item_fast:Nn \g__regex_state_active_intarray
- { \l__regex_current_state_int }
- #1
- \exp_after:wN \__regex_use_state:
- \fi:
- }
- \int_set:Nn \l__regex_current_state_int
- { \int_use:N \l__regex_current_state_int }
- \tl_set:Nn \exp_not:N \l__regex_current_submatches_prop
- { \exp_not:o \l__regex_current_submatches_prop }
- }
- }
-\cs_new_protected:Npn \__regex_action_cost:n #1
- {
- \exp_args:No \__regex_store_state:n
- { \__int_value:w \__int_eval:w \l__regex_current_state_int + #1 }
- }
-\cs_new_protected:Npn \__regex_store_state:n #1
- {
- \__regex_store_submatches:
- \__intarray_gset_fast:Nnn \g__regex_thread_state_intarray
- { \l__regex_max_active_int } {#1}
- \int_incr:N \l__regex_max_active_int
- }
-\cs_new_protected:Npn \__regex_store_submatches:
- {
- \__regex_toks_set:No \l__regex_max_active_int
- { \l__regex_current_submatches_prop }
- }
-\cs_new_protected:Npn \__regex_disable_submatches:
- {
- \cs_set_protected:Npn \__regex_store_submatches: { }
- \cs_set_protected:Npn \__regex_action_submatch:n ##1 { }
- }
-\cs_new_protected:Npn \__regex_action_submatch:n #1
- {
- \prop_put:Nno \l__regex_current_submatches_prop {#1}
- { \int_use:N \l__regex_current_pos_int }
- }
-\cs_new_protected:Npn \__regex_action_success:
- {
- \__regex_if_two_empty_matches:F
- {
- \bool_set_true:N \l__regex_match_success_bool
- \bool_set_eq:NN \l__regex_empty_success_bool
- \l__regex_fresh_thread_bool
- \int_set_eq:NN \l__regex_success_pos_int \l__regex_current_pos_int
- \prop_set_eq:NN \l__regex_success_submatches_prop
- \l__regex_current_submatches_prop
- \__prg_break:
- }
- }
-\int_new:N \l__regex_replacement_csnames_int
-\tl_new:N \l__regex_replacement_category_tl
-\seq_new:N \l__regex_replacement_category_seq
-\tl_new:N \l__regex_balance_tl
-\cs_new:Npn \__regex_replacement_balance_one_match:n #1
- { - \__regex_submatch_balance:n {#1} }
-\cs_new:Npn \__regex_replacement_do_one_match:n #1
- {
- \__regex_query_range:nn
- { \__intarray_item_fast:Nn \g__regex_submatch_prev_intarray {#1} }
- { \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray {#1} }
- }
-\cs_new:Npn \__regex_replacement_exp_not:N #1 { \exp_not:n {#1} }
-\cs_new:Npn \__regex_query_range:nn #1#2
- {
- \exp_after:wN \__regex_query_range_loop:ww
- \__int_value:w \__int_eval:w #1 \exp_after:wN ;
- \__int_value:w \__int_eval:w #2 ;
- \__prg_break_point:
- }
-\cs_new:Npn \__regex_query_range_loop:ww #1 ; #2 ;
- {
- \if_int_compare:w #1 < #2 \exp_stop_f:
- \else:
- \exp_after:wN \__prg_break:
- \fi:
- \__regex_toks_use:w #1 \exp_stop_f:
- \exp_after:wN \__regex_query_range_loop:ww
- \__int_value:w \__int_eval:w #1 + 1 ; #2 ;
- }
-\cs_new:Npn \__regex_query_submatch:n #1
- {
- \__regex_query_range:nn
- { \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray {#1} }
- { \__intarray_item_fast:Nn \g__regex_submatch_end_intarray {#1} }
- }
-\cs_new_protected:Npn \__regex_submatch_balance:n #1
- {
- \__int_eval:w
- \int_compare:nNnTF
- { \__intarray_item_fast:Nn \g__regex_submatch_end_intarray {#1} } = 0
- { 0 }
- {
- \__intarray_item_fast:Nn \g__regex_balance_intarray
- { \__intarray_item_fast:Nn \g__regex_submatch_end_intarray {#1} }
- }
- -
- \int_compare:nNnTF
- { \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray {#1} } = 0
- { 0 }
- {
- \__intarray_item_fast:Nn \g__regex_balance_intarray
- { \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray {#1} }
- }
- \__int_eval_end:
- }
-\cs_new_protected:Npn \__regex_replacement:n #1
- {
- \trace_push:nnn { regex } { 1 } { __regex_replacement:n }
- \__tl_build:Nw \l__regex_internal_a_tl
- \int_zero:N \l__regex_balance_int
- \tl_clear:N \l__regex_balance_tl
- \__regex_escape_use:nnnn
- {
- \if_charcode:w \c_right_brace_str ##1
- \__regex_replacement_rbrace:N
- \else:
- \__regex_replacement_normal:n
- \fi:
- ##1
- }
- { \__regex_replacement_escaped:N ##1 }
- { \__regex_replacement_normal:n ##1 }
- {#1}
- \prg_do_nothing: \prg_do_nothing:
- \if_int_compare:w \l__regex_replacement_csnames_int > 0 \exp_stop_f:
- \__msg_kernel_error:nnx { regex } { replacement-missing-rbrace }
- { \int_use:N \l__regex_replacement_csnames_int }
- \__tl_build_one:x
- { \prg_replicate:nn \l__regex_replacement_csnames_int \cs_end: }
- \fi:
- \seq_if_empty:NF \l__regex_replacement_category_seq
- {
- \__msg_kernel_error:nnx { regex } { replacement-missing-rparen }
- { \seq_count:N \l__regex_replacement_category_seq }
- \seq_clear:N \l__regex_replacement_category_seq
- }
- \cs_gset:Npx \__regex_replacement_balance_one_match:n ##1
- {
- + \int_use:N \l__regex_balance_int
- \l__regex_balance_tl
- - \__regex_submatch_balance:n {##1}
- }
- \__tl_build_end:
- \exp_args:No \__regex_replacement_aux:n \l__regex_internal_a_tl
- \trace_pop:nnn { regex } { 1 } { __regex_replacement:n }
- }
-\cs_new_protected:Npn \__regex_replacement_aux:n #1
- {
- \cs_set:Npn \__regex_replacement_do_one_match:n ##1
- {
- \__regex_query_range:nn
- { \__intarray_item_fast:Nn \g__regex_submatch_prev_intarray {##1} }
- { \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray {##1} }
- #1
- }
- }
-\cs_new_protected:Npn \__regex_replacement_normal:n #1
- {
- \tl_if_empty:NTF \l__regex_replacement_category_tl
- { \__tl_build_one:n {#1} }
- { % (
- \token_if_eq_charcode:NNTF #1 )
- {
- \seq_pop:NN \l__regex_replacement_category_seq
- \l__regex_replacement_category_tl
- }
- {
- \use:c { __regex_replacement_c_ \l__regex_replacement_category_tl :w }
- \__regex_replacement_normal:n {#1}
- }
- }
- }
-\cs_new_protected:Npn \__regex_replacement_escaped:N #1
- {
- \cs_if_exist_use:cF { __regex_replacement_#1:w }
- {
- \if_int_compare:w 1 < 1#1 \exp_stop_f:
- \__regex_replacement_put_submatch:n {#1}
- \else:
- \exp_args:No \__regex_replacement_normal:n
- { \token_to_str:N #1 }
- \fi:
- }
- }
-\cs_new_protected:Npn \__regex_replacement_put_submatch:n #1
- {
- \if_int_compare:w #1 < \l__regex_capturing_group_int
- \__tl_build_one:n { \__regex_query_submatch:n { #1 + ##1 } }
- \if_int_compare:w \l__regex_replacement_csnames_int = 0 \exp_stop_f:
- \tl_put_right:Nn \l__regex_balance_tl
- { + \__regex_submatch_balance:n { \__int_eval:w #1+##1 \__int_eval_end: } }
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_replacement_g:w #1#2
- {
- \str_if_eq_x:nnTF { #1#2 } { \__regex_replacement_normal:n \c_left_brace_str }
- { \l__regex_internal_a_int = \__regex_replacement_g_digits:NN }
- { \__regex_replacement_error:NNN g #1 #2 }
- }
-\cs_new:Npn \__regex_replacement_g_digits:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_replacement_normal:n
- {
- \if_int_compare:w 1 < 1#2 \exp_stop_f:
- #2
- \exp_after:wN \use_i:nnn
- \exp_after:wN \__regex_replacement_g_digits:NN
- \else:
- \exp_stop_f:
- \exp_after:wN \__regex_replacement_error:NNN
- \exp_after:wN g
- \fi:
- }
- {
- \exp_stop_f:
- \if_meaning:w \__regex_replacement_rbrace:N #1
- \exp_args:No \__regex_replacement_put_submatch:n
- { \int_use:N \l__regex_internal_a_int }
- \exp_after:wN \use_none:nn
- \else:
- \exp_after:wN \__regex_replacement_error:NNN
- \exp_after:wN g
- \fi:
- }
- #1 #2
- }
-\cs_new_protected:Npn \__regex_replacement_c:w #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_replacement_normal:n
- {
- \exp_after:wN \token_if_eq_charcode:NNTF \c_left_brace_str #2
- { \__regex_replacement_cu_aux:Nw \__regex_replacement_exp_not:N }
- {
- \cs_if_exist:cTF { __regex_replacement_c_#2:w }
- { \__regex_replacement_cat:NNN #2 }
- { \__regex_replacement_error:NNN c #1#2 }
- }
- }
- { \__regex_replacement_error:NNN c #1#2 }
- }
-\cs_new_protected:Npn \__regex_replacement_cu_aux:Nw #1
- {
- \if_case:w \l__regex_replacement_csnames_int
- \__tl_build_one:n { \exp_not:n { \exp_after:wN #1 \cs:w } }
- \else:
- \__tl_build_one:n { \exp_not:n { \exp_after:wN \tl_to_str:V \cs:w } }
- \fi:
- \int_incr:N \l__regex_replacement_csnames_int
- }
-\cs_new_protected:Npn \__regex_replacement_u:w #1#2
- {
- \str_if_eq_x:nnTF { #1#2 } { \__regex_replacement_normal:n \c_left_brace_str }
- { \__regex_replacement_cu_aux:Nw \exp_not:V }
- { \__regex_replacement_error:NNN u #1#2 }
- }
-\cs_new_protected:Npn \__regex_replacement_rbrace:N #1
- {
- \if_int_compare:w \l__regex_replacement_csnames_int > 0 \exp_stop_f:
- \__tl_build_one:n \cs_end:
- \int_decr:N \l__regex_replacement_csnames_int
- \else:
- \__regex_replacement_normal:n {#1}
- \fi:
- }
-\cs_new_protected:Npn \__regex_replacement_cat:NNN #1#2#3
- {
- \token_if_eq_meaning:NNTF \prg_do_nothing: #3
- { \__msg_kernel_error:nn { regex } { replacement-catcode-end } }
- {
- \int_compare:nNnTF { \l__regex_replacement_csnames_int } > 0
- {
- \__msg_kernel_error:nnnn
- { regex } { replacement-catcode-in-cs } {#1} {#3}
- #2 #3
- }
- {
- \str_if_eq:nnTF { #2 #3 } { \__regex_replacement_normal:n ( } % )
- {
- \seq_push:NV \l__regex_replacement_category_seq
- \l__regex_replacement_category_tl
- \tl_set:Nn \l__regex_replacement_category_tl {#1}
- }
- { \use:c { __regex_replacement_c_#1:w } #2 #3 }
- }
- }
- }
-\group_begin:
- \cs_new_protected:Npn \__regex_replacement_char:nNN #1#2#3
- {
- \tex_lccode:D 0 = `#3 \scan_stop:
- \tex_lowercase:D { \__tl_build_one:n {#1} }
- }
- \char_set_catcode_active:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_A:w
- { \__regex_replacement_char:nNN { \exp_not:n { \exp_not:N ^^@ } } }
- \char_set_catcode_group_begin:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_B:w
- {
- \if_int_compare:w \l__regex_replacement_csnames_int = 0 \exp_stop_f:
- \int_incr:N \l__regex_balance_int
- \fi:
- \__regex_replacement_char:nNN
- { \exp_not:n { \exp_after:wN ^^@ \if_false: } \fi: } }
- }
- \cs_new_protected:Npn \__regex_replacement_c_C:w #1#2
- { \__tl_build_one:n { \exp_not:N \exp_not:N \exp_not:c {#2} } }
- \char_set_catcode_math_subscript:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_D:w
- { \__regex_replacement_char:nNN { ^^@ } }
- \char_set_catcode_group_end:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_E:w
- {
- \if_int_compare:w \l__regex_replacement_csnames_int = 0 \exp_stop_f:
- \int_decr:N \l__regex_balance_int
- \fi:
- \__regex_replacement_char:nNN
- { \exp_not:n { \if_false: { \fi: ^^@ } }
- }
- \char_set_catcode_letter:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_L:w
- { \__regex_replacement_char:nNN { ^^@ } }
- \char_set_catcode_math_toggle:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_M:w
- { \__regex_replacement_char:nNN { ^^@ } }
- \char_set_catcode_other:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_O:w
- { \__regex_replacement_char:nNN { ^^@ } }
- \char_set_catcode_parameter:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_P:w
- {
- \__regex_replacement_char:nNN
- { \exp_not:n { \exp_not:n { ^^@^^@^^@^^@ } } }
- }
- \cs_new_protected:Npn \__regex_replacement_c_S:w #1#2
- {
- \if_int_compare:w `#2 = 0 \exp_stop_f:
- \__msg_kernel_error:nn { regex } { replacement-null-space }
- \fi:
- \tex_lccode:D `\ = `#2 \scan_stop:
- \tex_lowercase:D { \__tl_build_one:n {~} }
- }
- \char_set_catcode_alignment:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_T:w
- { \__regex_replacement_char:nNN { ^^@ } }
- \char_set_catcode_math_superscript:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_U:w
- { \__regex_replacement_char:nNN { ^^@ } }
-\group_end:
-\cs_new_protected:Npn \__regex_replacement_error:NNN #1#2#3
- {
- \__msg_kernel_error:nnx { regex } { replacement-#1 } {#3}
- #2 #3
- }
-\cs_new_protected:Npn \regex_new:N #1
- { \cs_new_eq:NN #1 \c__regex_no_match_regex }
-\cs_new_protected:Npn \regex_set:Nn #1#2
- {
- \__regex_compile:n {#2}
- \tl_set_eq:NN #1 \l__regex_internal_regex
- }
-\cs_new_protected:Npn \regex_gset:Nn #1#2
- {
- \__regex_compile:n {#2}
- \tl_gset_eq:NN #1 \l__regex_internal_regex
- }
-\cs_new_protected:Npn \regex_const:Nn #1#2
- {
- \__regex_compile:n {#2}
- \tl_const:Nx #1 { \exp_not:o \l__regex_internal_regex }
- }
-\cs_new_protected:Npn \regex_show:n #1
- {
- \__regex_compile:n {#1}
- \__regex_show:Nn \l__regex_internal_regex
- { { \tl_to_str:n {#1} } }
- }
-\cs_new_protected:Npn \regex_show:N #1
- { \__regex_show:Nn #1 { variable~\token_to_str:N #1 } }
-\prg_new_protected_conditional:Npnn \regex_match:nn #1#2 { T , F , TF }
- {
- \__regex_if_match:nn { \__regex_build:n {#1} } {#2}
- \__regex_return:
- }
-\prg_new_protected_conditional:Npnn \regex_match:Nn #1#2 { T , F , TF }
- {
- \__regex_if_match:nn { \__regex_build:N #1 } {#2}
- \__regex_return:
- }
-\cs_new_protected:Npn \regex_count:nnN #1
- { \__regex_count:nnN { \__regex_build:n {#1} } }
-\cs_new_protected:Npn \regex_count:NnN #1
- { \__regex_count:nnN { \__regex_build:N #1 } }
-\cs_set_protected:Npn \__regex_tmp:w #1#2#3
- {
- \cs_new_protected:Npn #2 ##1 { #1 { \__regex_build:n {##1} } }
- \cs_new_protected:Npn #3 ##1 { #1 { \__regex_build:N ##1 } }
- \prg_new_protected_conditional:Npnn #2 ##1##2##3 { T , F , TF }
- { #1 { \__regex_build:n {##1} } {##2} ##3 \__regex_return: }
- \prg_new_protected_conditional:Npnn #3 ##1##2##3 { T , F , TF }
- { #1 { \__regex_build:N ##1 } {##2} ##3 \__regex_return: }
- }
-\__regex_tmp:w \__regex_extract_once:nnN
- \regex_extract_once:nnN \regex_extract_once:NnN
-\__regex_tmp:w \__regex_extract_all:nnN
- \regex_extract_all:nnN \regex_extract_all:NnN
-\__regex_tmp:w \__regex_replace_once:nnN
- \regex_replace_once:nnN \regex_replace_once:NnN
-\__regex_tmp:w \__regex_replace_all:nnN
- \regex_replace_all:nnN \regex_replace_all:NnN
-\__regex_tmp:w \__regex_split:nnN \regex_split:nnN \regex_split:NnN
-\int_new:N \l__regex_match_count_int
-\flag_new:n { __regex_begin }
-\flag_new:n { __regex_end }
-\int_new:N \l__regex_min_submatch_int
-\int_new:N \l__regex_submatch_int
-\int_new:N \l__regex_zeroth_submatch_int
-\__intarray_new:Nn \g__regex_submatch_prev_intarray { 65536 }
-\__intarray_new:Nn \g__regex_submatch_begin_intarray { 65536 }
-\__intarray_new:Nn \g__regex_submatch_end_intarray { 65536 }
-\cs_new_protected:Npn \__regex_return:
- {
- \if_meaning:w \c_true_bool \g__regex_success_bool
- \prg_return_true:
- \else:
- \prg_return_false:
- \fi:
- }
-\cs_new_protected:Npn \__regex_if_match:nn #1#2
- {
- \group_begin:
- \__regex_disable_submatches:
- \__regex_single_match:
- #1
- \__regex_match:n {#2}
- \group_end:
- }
-\cs_new_protected:Npn \__regex_count:nnN #1#2#3
- {
- \group_begin:
- \__regex_disable_submatches:
- \int_zero:N \l__regex_match_count_int
- \__regex_multi_match:n { \int_incr:N \l__regex_match_count_int }
- #1
- \__regex_match:n {#2}
- \exp_args:NNNo
- \group_end:
- \int_set:Nn #3 { \int_use:N \l__regex_match_count_int }
- }
-\cs_new_protected:Npn \__regex_extract_once:nnN #1#2#3
- {
- \group_begin:
- \__regex_single_match:
- #1
- \__regex_match:n {#2}
- \__regex_extract:
- \__regex_group_end_extract_seq:N #3
- }
-\cs_new_protected:Npn \__regex_extract_all:nnN #1#2#3
- {
- \group_begin:
- \__regex_multi_match:n { \__regex_extract: }
- #1
- \__regex_match:n {#2}
- \__regex_group_end_extract_seq:N #3
- }
-\cs_new_protected:Npn \__regex_split:nnN #1#2#3
- {
- \group_begin:
- \__regex_multi_match:n
- {
- \if_int_compare:w \l__regex_start_pos_int < \l__regex_success_pos_int
- \__regex_extract:
- \__intarray_gset_fast:Nnn \g__regex_submatch_prev_intarray
- { \l__regex_zeroth_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g__regex_submatch_end_intarray
- { \l__regex_zeroth_submatch_int }
- {
- \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray
- { \l__regex_zeroth_submatch_int }
- }
- \__intarray_gset_fast:Nnn \g__regex_submatch_begin_intarray
- { \l__regex_zeroth_submatch_int }
- { \l__regex_start_pos_int }
- \fi:
- }
- #1
- \__regex_match:n {#2}
- \__intarray_gset_fast:Nnn \g__regex_submatch_prev_intarray
- { \l__regex_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g__regex_submatch_end_intarray
- { \l__regex_submatch_int }
- { \l__regex_max_pos_int }
- \__intarray_gset_fast:Nnn \g__regex_submatch_begin_intarray
- { \l__regex_submatch_int }
- { \l__regex_start_pos_int }
- \int_incr:N \l__regex_submatch_int
- \if_meaning:w \c_true_bool \l__regex_empty_success_bool
- \if_int_compare:w \l__regex_start_pos_int = \l__regex_max_pos_int
- \int_decr:N \l__regex_submatch_int
- \fi:
- \fi:
- \__regex_group_end_extract_seq:N #3
- }
-\cs_new_protected:Npn \__regex_group_end_extract_seq:N #1
- {
- \cs_set_eq:NN \__seq_item:n \scan_stop:
- \flag_clear:n { __regex_begin }
- \flag_clear:n { __regex_end }
- \tl_set:Nx \l__regex_internal_a_tl
- {
- \s__seq
- \int_step_function:nnnN
- { \l__regex_min_submatch_int }
- { 1 }
- { \l__regex_submatch_int - 1 }
- \__regex_extract_seq_aux:n
- }
- \int_compare:nNnF
- { \flag_height:n { __regex_begin } + \flag_height:n { __regex_end } }
- = 0
- {
- \__msg_kernel_error:nnxxx { regex } { result-unbalanced }
- { splitting~or~extracting~submatches }
- { \flag_height:n { __regex_end } }
- { \flag_height:n { __regex_begin } }
- }
- \use:x
- {
- \group_end:
- \tl_set:Nn \exp_not:N #1 { \l__regex_internal_a_tl }
- }
- }
-\cs_new:Npn \__regex_extract_seq_aux:n #1
- {
- \__seq_item:n
- {
- \exp_after:wN \__regex_extract_seq_aux:ww
- \__int_value:w \__regex_submatch_balance:n {#1} ; #1;
- }
- }
-\cs_new:Npn \__regex_extract_seq_aux:ww #1; #2;
- {
- \if_int_compare:w #1 < 0 \exp_stop_f:
- \flag_raise:n { __regex_end }
- \prg_replicate:nn {-#1} { \exp_not:n { { \if_false: } \fi: } }
- \fi:
- \__regex_query_submatch:n {#2}
- \if_int_compare:w #1 > 0 \exp_stop_f:
- \flag_raise:n { __regex_begin }
- \prg_replicate:nn {#1} { \exp_not:n { \if_false: { \fi: } } }
- \fi:
- }
-\cs_new_protected:Npn \__regex_extract:
- {
- \if_meaning:w \c_true_bool \g__regex_success_bool
- \int_set_eq:NN \l__regex_zeroth_submatch_int \l__regex_submatch_int
- \prg_replicate:nn \l__regex_capturing_group_int
- {
- \__intarray_gset_fast:Nnn \g__regex_submatch_begin_intarray
- { \l__regex_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g__regex_submatch_end_intarray
- { \l__regex_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g__regex_submatch_prev_intarray
- { \l__regex_submatch_int } { 0 }
- \int_incr:N \l__regex_submatch_int
- }
- \prop_map_inline:Nn \l__regex_success_submatches_prop
- {
- \if_int_compare:w ##1 - 1 \exp_stop_f:
- \exp_after:wN \__regex_extract_e:wn \__int_value:w
- \else:
- \exp_after:wN \__regex_extract_b:wn \__int_value:w
- \fi:
- \__int_eval:w \l__regex_zeroth_submatch_int + ##1 {##2}
- }
- \__intarray_gset_fast:Nnn \g__regex_submatch_prev_intarray
- { \l__regex_zeroth_submatch_int } { \l__regex_start_pos_int }
- \fi:
- }
-\cs_new_protected:Npn \__regex_extract_b:wn #1 < #2
- { \__intarray_gset_fast:Nnn \g__regex_submatch_begin_intarray {#1} {#2} }
-\cs_new_protected:Npn \__regex_extract_e:wn #1 > #2
- { \__intarray_gset_fast:Nnn \g__regex_submatch_end_intarray {#1} {#2} }
-\cs_new_protected:Npn \__regex_replace_once:nnN #1#2#3
- {
- \group_begin:
- \__regex_single_match:
- #1
- \__regex_replacement:n {#2}
- \exp_args:No \__regex_match:n { #3 }
- \if_meaning:w \c_false_bool \g__regex_success_bool
- \group_end:
- \else:
- \__regex_extract:
- \int_set:Nn \l__regex_balance_int
- {
- \__regex_replacement_balance_one_match:n
- { \l__regex_zeroth_submatch_int }
- }
- \tl_set:Nx \l__regex_internal_a_tl
- {
- \__regex_replacement_do_one_match:n { \l__regex_zeroth_submatch_int }
- \__regex_query_range:nn
- {
- \__intarray_item_fast:Nn \g__regex_submatch_end_intarray
- { \l__regex_zeroth_submatch_int }
- }
- { \l__regex_max_pos_int }
- }
- \__regex_group_end_replace:N #3
- \fi:
- }
-\cs_new_protected:Npn \__regex_replace_all:nnN #1#2#3
- {
- \group_begin:
- \__regex_multi_match:n { \__regex_extract: }
- #1
- \__regex_replacement:n {#2}
- \exp_args:No \__regex_match:n {#3}
- \int_set:Nn \l__regex_balance_int
- {
- 0
- \int_step_function:nnnN
- { \l__regex_min_submatch_int }
- \l__regex_capturing_group_int
- { \l__regex_submatch_int - 1 }
- \__regex_replacement_balance_one_match:n
- }
- \tl_set:Nx \l__regex_internal_a_tl
- {
- \int_step_function:nnnN
- { \l__regex_min_submatch_int }
- \l__regex_capturing_group_int
- { \l__regex_submatch_int - 1 }
- \__regex_replacement_do_one_match:n
- \__regex_query_range:nn
- \l__regex_start_pos_int \l__regex_max_pos_int
- }
- \__regex_group_end_replace:N #3
- }
-\cs_new_protected:Npn \__regex_group_end_replace:N #1
- {
- \if_int_compare:w \l__regex_balance_int = 0 \exp_stop_f:
- \else:
- \__msg_kernel_error:nnxxx { regex } { result-unbalanced }
- { replacing }
- { \int_max:nn { - \l__regex_balance_int } { 0 } }
- { \int_max:nn { \l__regex_balance_int } { 0 } }
- \fi:
- \use:x
- {
- \group_end:
- \tl_set:Nn \exp_not:N #1
- {
- \if_int_compare:w \l__regex_balance_int < 0 \exp_stop_f:
- \prg_replicate:nn { - \l__regex_balance_int }
- { { \if_false: } \fi: }
- \fi:
- \l__regex_internal_a_tl
- \if_int_compare:w \l__regex_balance_int > 0 \exp_stop_f:
- \prg_replicate:nn { \l__regex_balance_int }
- { \if_false: { \fi: } }
- \fi:
- }
- }
- }
-\__msg_kernel_new:nnnn { regex } { trailing-backslash }
- { Trailing~escape~character~'\iow_char:N\\'. }
- {
- A~regular~expression~or~its~replacement~text~ends~with~
- the~escape~character~'\iow_char:N\\'.~It~will~be~ignored.
- }
-\__msg_kernel_new:nnnn { regex } { x-missing-rbrace }
- { Missing~closing~brace~in~'\iow_char:N\\x'~hexadecimal~sequence. }
- {
- You~wrote~something~like~
- '\iow_char:N\\x\{...#1'.~
- The~closing~brace~is~missing.
- }
-\__msg_kernel_new:nnnn { regex } { x-overflow }
- { Character~code~'#1'~too~large~in~'\iow_char:N\\x'~hexadecimal~sequence. }
- {
- You~wrote~something~like~
- '\iow_char:N\\x\{\int_to_Hex:n{#1}\}'.~
- The~character~code~#1~is~larger~than~
- the~maximum~value~\int_use:N \c_max_char_int.
- }
-\__msg_kernel_new:nnnn { regex } { invalid-quantifier }
- { Braced~quantifier~'#1'~may~not~be~followed~by~'#2'. }
- {
- The~character~'#2'~is~invalid~in~the~braced~quantifier~'#1'.~
- The~only~valid~quantifiers~are~'*',~'?',~'+',~'{<int>}',~
- '{<min>,}'~and~'{<min>,<max>}',~optionally~followed~by~'?'.
- }
-\__msg_kernel_new:nnnn { regex } { missing-rbrack }
- { Missing~right~bracket~inserted~in~regular~expression. }
- {
- LaTeX~was~given~a~regular~expression~where~a~character~class~
- was~started~with~'[',~but~the~matching~']'~is~missing.
- }
-\__msg_kernel_new:nnnn { regex } { missing-rparen }
- {
- Missing~right~
- \int_compare:nTF { #1 = 1 } { parenthesis } { parentheses } ~
- inserted~in~regular~expression.
- }
- {
- LaTeX~was~given~a~regular~expression~with~\int_eval:n {#1} ~
- more~left~parentheses~than~right~parentheses.
- }
-\__msg_kernel_new:nnnn { regex } { extra-rparen }
- { Extra~right~parenthesis~ignored~in~regular~expression. }
- {
- LaTeX~came~across~a~closing~parenthesis~when~no~submatch~group~
- was~open.~The~parenthesis~will~be~ignored.
- }
-\__msg_kernel_new:nnnn { regex } { bad-escape }
- {
- Invalid~escape~'\iow_char:N\\#1'~
- \__regex_if_in_cs:TF { within~a~control~sequence. }
- {
- \__regex_if_in_class:TF
- { in~a~character~class. }
- { following~a~category~test. }
- }
- }
- {
- The~escape~sequence~'\iow_char:N\\#1'~may~not~appear~
- \__regex_if_in_cs:TF
- {
- within~a~control~sequence~test~introduced~by~
- '\iow_char:N\\c\iow_char:N\{'.
- }
- {
- \__regex_if_in_class:TF
- { within~a~character~class~ }
- { following~a~category~test~such~as~'\iow_char:N\\cL'~ }
- because~it~does~not~match~exactly~one~character.
- }
- }
-\__msg_kernel_new:nnnn { regex } { range-missing-end }
- { Invalid~end-point~for~range~'#1-#2'~in~character~class. }
- {
- The~end-point~'#2'~of~the~range~'#1-#2'~may~not~serve~as~an~
- end-point~for~a~range:~alphanumeric~characters~should~not~be~
- escaped,~and~non-alphanumeric~characters~should~be~escaped.
- }
-\__msg_kernel_new:nnnn { regex } { range-backwards }
- { Range~'[#1-#2]'~out~of~order~in~character~class. }
- {
- In~ranges~of~characters~'[x-y]'~appearing~in~character~classes,~
- the~first~character~code~must~not~be~larger~than~the~second.~
- Here,~'#1'~has~character~code~\int_eval:n {`#1},~while~
- '#2'~has~character~code~\int_eval:n {`#2}.
- }
-\__msg_kernel_new:nnnn { regex } { c-bad-mode }
- { Invalid~nested~'\iow_char:N\\c'~escape~in~regular~expression. }
- {
- The~'\iow_char:N\\c'~escape~cannot~be~used~within~
- a~control~sequence~test~'\iow_char:N\\c{...}'.~
- To~combine~several~category~tests,~use~'\iow_char:N\\c[...]'.
- }
-\__msg_kernel_new:nnnn { regex } { c-missing-rbrace }
- { Missing~right~brace~inserted~for~'\iow_char:N\\c'~escape. }
- {
- LaTeX~was~given~a~regular~expression~where~a~
- '\iow_char:N\\c\iow_char:N\{...'~construction~was~not~ended~
- with~a~closing~brace~'\iow_char:N\}'.
- }
-\__msg_kernel_new:nnnn { regex } { c-missing-rbrack }
- { Missing~right~bracket~inserted~for~'\iow_char:N\\c'~escape. }
- {
- A~construction~'\iow_char:N\\c[...'~appears~in~a~
- regular~expression,~but~the~closing~']'~is~not~present.
- }
-\__msg_kernel_new:nnnn { regex } { c-missing-category }
- { Invalid~character~'#1'~following~'\iow_char:N\\c'~escape. }
- {
- In~regular~expressions,~the~'\iow_char:N\\c'~escape~sequence~
- may~only~be~followed~by~a~left~brace,~a~left~bracket,~or~a~
- capital~letter~representing~a~character~category,~namely~
- one~of~'ABCDELMOPSTU'.
- }
-\__msg_kernel_new:nnnn { regex } { c-trailing }
- { Trailing~category~code~escape~'\iow_char:N\\c'... }
- {
- A~regular~expression~ends~with~'\iow_char:N\\c'~followed~
- by~a~letter.~It~will~be~ignored.
- }
-\__msg_kernel_new:nnnn { regex } { u-missing-lbrace }
- { Missing~left~brace~following~'\iow_char:N\\u'~escape. }
- {
- The~'\iow_char:N\\u'~escape~sequence~must~be~followed~by~
- a~brace~group~with~the~name~of~the~variable~to~use.
- }
-\__msg_kernel_new:nnnn { regex } { u-missing-rbrace }
- { Missing~right~brace~inserted~for~'\iow_char:N\\u'~escape. }
- {
- LaTeX~
- \str_if_eq_x:nnTF { } {#2}
- { reached~the~end~of~the~string~ }
- { encountered~an~escaped~alphanumeric~character '\iow_char:N\\#2'~ }
- when~parsing~the~argument~of~an~'\iow_char:N\\u\iow_char:N\{...\}'~escape.
- }
-\__msg_kernel_new:nnnn { regex } { posix-unsupported }
- { POSIX~collating~element~'[#1 ~ #1]'~not~supported. }
- {
- The~'[.foo.]'~and~'[=bar=]'~syntaxes~have~a~special~meaning~
- in~POSIX~regular~expressions.~This~is~not~supported~by~LaTeX.~
- Maybe~you~forgot~to~escape~a~left~bracket~in~a~character~class?
- }
-\__msg_kernel_new:nnnn { regex } { posix-unknown }
- { POSIX~class~'[:#1:]'~unknown. }
- {
- '[:#1:]'~is~not~among~the~known~POSIX~classes~
- '[:alnum:]',~'[:alpha:]',~'[:ascii:]',~'[:blank:]',~
- '[:cntrl:]',~'[:digit:]',~'[:graph:]',~'[:lower:]',~
- '[:print:]',~'[:punct:]',~'[:space:]',~'[:upper:]',~
- '[:word:]',~and~'[:xdigit:]'.
- }
-\__msg_kernel_new:nnnn { regex } { posix-missing-close }
- { Missing~closing~':]'~for~POSIX~class. }
- { The~POSIX~syntax~'#1'~must~be~followed~by~':]',~not~'#2'. }
-\__msg_kernel_new:nnnn { regex } { result-unbalanced }
- { Missing~brace~inserted~when~#1. }
- {
- LaTeX~was~asked~to~do~some~regular~expression~operation,~
- and~the~resulting~token~list~would~not~have~the~same~number~
- of~begin-group~and~end-group~tokens.~Braces~were~inserted:~
- #2~left,~#3~right.
- }
-\__msg_kernel_new:nnnn { regex } { unknown-option }
- { Unknown~option~'#1'~for~regular~expressions. }
- {
- The~only~available~option~is~'case-insensitive',~toggled~by~
- '(?i)'~and~'(?-i)'.
- }
-\__msg_kernel_new:nnnn { regex } { special-group-unknown }
- { Unknown~special~group~'#1~...'~in~a~regular~expression. }
- {
- The~only~valid~constructions~starting~with~'(?'~are~
- '(?:~...~)',~'(?|~...~)',~'(?i)',~and~'(?-i)'.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-c }
- { Misused~'\iow_char:N\\c'~command~in~a~replacement~text. }
- {
- In~a~replacement~text,~the~'\iow_char:N\\c'~escape~sequence~
- can~be~followed~by~one~of~the~letters~'ABCDELMOPSTU'~
- or~a~brace~group,~not~by~'#1'.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-u }
- { Misused~'\iow_char:N\\u'~command~in~a~replacement~text. }
- {
- In~a~replacement~text,~the~'\iow_char:N\\u'~escape~sequence~
- must~be~~followed~by~a~brace~group~holding~the~name~of~the~
- variable~to~use.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-g }
- {
- Missing~brace~for~the~'\iow_char:N\\g'~construction~
- in~a~replacement~text.
- }
- {
- In~the~replacement~text~for~a~regular~expression~search,~
- submatches~are~represented~either~as~'\iow_char:N \\g{dd..d}',~
- or~'\\d',~where~'d'~are~single~digits.~Here,~a~brace~is~missing.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-catcode-end }
- {
- Missing~character~for~the~'\iow_char:N\\c<category><character>'~
- construction~in~a~replacement~text.
- }
- {
- In~a~replacement~text,~the~'\iow_char:N\\c'~escape~sequence~
- can~be~followed~by~one~of~the~letters~'ABCDELMOPSTU'~representing~
- the~character~category.~Then,~a~character~must~follow.~LaTeX~
- reached~the~end~of~the~replacement~when~looking~for~that.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-catcode-in-cs }
- {
- Category~code~'\iow_char:N\\c#1#3'~ignored~inside~
- '\iow_char:N\\c\{...\}'~in~a~replacement~text.
- }
- {
- In~a~replacement~text,~the~category~codes~of~the~argument~of~
- '\iow_char:N\\c\{...\}'~are~ignored~when~building~the~control~
- sequence~name.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-null-space }
- { TeX~cannot~build~a~space~token~with~character~code~0. }
- {
- You~asked~for~a~character~token~with~category~space,~
- and~character~code~0,~for~instance~through~
- '\iow_char:N\\cS\iow_char:N\\x00'.~
- This~specific~case~is~impossible~and~will~be~replaced~
- by~a~normal~space.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-missing-rbrace }
- { Missing~right~brace~inserted~in~replacement~text. }
- {
- There~ \int_compare:nTF { #1 = 1 } { was } { were } ~ #1~
- missing~right~\int_compare:nTF { #1 = 1 } { brace } { braces } .
- }
-\__msg_kernel_new:nnnn { regex } { replacement-missing-rparen }
- { Missing~right~parenthesis~inserted~in~replacement~text. }
- {
- There~ \int_compare:nTF { #1 = 1 } { was } { were } ~ #1~
- missing~right~\int_compare:nTF { #1 = 1 } { parenthesis } { parentheses } .
- }
-\cs_new:Npn \__regex_msg_repeated:nnN #1#2#3
- {
- \str_if_eq_x:nnF { #1 #2 } { 1 0 }
- {
- , ~ repeated ~
- \int_case:nnF {#2}
- {
- { -1 } { #1~or~more~times,~\bool_if:NTF #3 { lazy } { greedy } }
- { 0 } { #1~times }
- }
- {
- between~#1~and~\int_eval:n {#1+#2}~times,~
- \bool_if:NTF #3 { lazy } { greedy }
- }
- }
- }
-\cs_new_protected:Npn \__regex_trace_states:n #1
- {
- \int_step_inline:nnnn
- \l__regex_min_state_int
- { 1 }
- { \l__regex_max_state_int - 1 }
- {
- \trace:nnx { regex } { #1 }
- { \iow_char:N \\toks ##1 = { \__regex_toks_use:w ##1 } }
- }
- }
-%%
-%%
-%% End of file `l3regex-trace.sty'.
Deleted: trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3regex.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3regex.sty 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3regex.sty 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,3005 +0,0 @@
-%%
-%% This is file `l3regex.sty',
-%% generated with the docstrip utility.
-%%
-%% The original source files were:
-%%
-%% l3regex.dtx (with options: `package')
-%%
-%% Copyright (C) 2011-2017 The LaTeX3 Project
-%%
-%% It may be distributed and/or modified under the conditions of
-%% the LaTeX Project Public License (LPPL), either version 1.3c of
-%% this license or (at your option) any later version. The latest
-%% version of this license is in the file:
-%%
-%% http://www.latex-project.org/lppl.txt
-%%
-%% This file is part of the "l3experimental bundle" (The Work in LPPL)
-%% and all files in that bundle must be distributed together.
-%%
-%% File: l3regex.dtx Copyright (C) 2011-2017 The LaTeX3 Project
-\RequirePackage{expl3}[2017/05/13]
-\@ifpackagelater{expl3}{2017/05/13}
- {}
- {%
- \PackageError{l3regex}{Support package l3kernel too old}
- {%
- Please install an up to date version of l3kernel\MessageBreak
- using your TeX package manager or from CTAN.\MessageBreak
- \MessageBreak
- Loading l3regex will abort!%
- }%
- \endinput
- }
-\ProvidesExplPackage{l3regex}{2017/05/13}{}
- {L3 Experimental regular expressions}
-\RequirePackage{l3tl-build, l3tl-analysis, l3intarray}
-\cs_generate_variant:Nn \tl_to_str:n { V }
-\cs_new_protected:Npn \__regex_standard_escapechar:
- { \int_set:Nn \tex_escapechar:D { `\\ } }
-\cs_new:Npn \__regex_toks_use:w { \tex_the:D \tex_toks:D }
-\cs_new_protected:Npn \__regex_toks_clear:N #1
- { \tex_toks:D #1 { } }
-\cs_new_eq:NN \__regex_toks_set:Nn \tex_toks:D
-\cs_new_protected:Npn \__regex_toks_set:No #1
- { \__regex_toks_set:Nn #1 \exp_after:wN }
-\cs_new_protected:Npn \__regex_toks_memcpy:NNn #1#2#3
- {
- \prg_replicate:nn {#3}
- {
- \tex_toks:D #1 = \tex_toks:D #2
- \int_incr:N #1
- \int_incr:N #2
- }
- }
-\cs_new_protected:Npn \__regex_toks_put_left:Nx #1#2
- {
- \cs_set:Npx \__regex_tmp:w { #2 }
- \tex_toks:D #1 \exp_after:wN \exp_after:wN \exp_after:wN
- { \exp_after:wN \__regex_tmp:w \tex_the:D \tex_toks:D #1 }
- }
-\cs_new_protected:Npn \__regex_toks_put_right:Nx #1#2
- {
- \cs_set:Npx \__regex_tmp:w {#2}
- \tex_toks:D #1 \exp_after:wN
- { \tex_the:D \tex_toks:D \exp_after:wN #1 \__regex_tmp:w }
- }
-\cs_new_protected:Npn \__regex_toks_put_right:Nn #1#2
- { \tex_toks:D #1 \exp_after:wN { \tex_the:D \tex_toks:D #1 #2 } }
-\cs_new:Npn \__regex_current_cs_to_str:
- {
- \exp_after:wN \exp_after:wN \exp_after:wN \cs_to_str:N
- \tex_the:D \tex_toks:D \l__regex_current_pos_int
- }
-\cs_new:Npn \__regex_tmp:w { }
-\tl_new:N \l__regex_internal_a_tl
-\tl_new:N \l__regex_internal_b_tl
-\int_new:N \l__regex_internal_a_int
-\int_new:N \l__regex_internal_b_int
-\int_new:N \l__regex_internal_c_int
-\bool_new:N \l__regex_internal_bool
-\seq_new:N \l__regex_internal_seq
-\tl_new:N \g__regex_internal_tl
-\tl_const:Nn \c__regex_no_match_regex
- {
- \__regex_branch:n
- { \__regex_class:NnnnN \c_true_bool { } { 1 } { 0 } \c_true_bool }
- }
-\__intarray_new:Nn \g__regex_charcode_intarray { 65536 }
-\__intarray_new:Nn \g__regex_catcode_intarray { 65536 }
-\__intarray_new:Nn \g__regex_balance_intarray { 65536 }
-\int_new:N \l__regex_balance_int
-\tl_new:N \l__regex_cs_name_tl
-\int_const:Nn \c__regex_ascii_min_int { 0 }
-\int_const:Nn \c__regex_ascii_max_control_int { 31 }
-\int_const:Nn \c__regex_ascii_max_int { 127 }
-\int_const:Nn \c__regex_ascii_lower_int { `a - `A }
-\cs_new_protected:Npn \__regex_break_true:w
- #1 \__regex_break_point:TF #2 #3 {#2}
-\cs_new_protected:Npn \__regex_break_point:TF #1 #2 { #2 }
-\cs_new_protected:Npn \__regex_item_reverse:n #1
- {
- #1
- \__regex_break_point:TF { } \__regex_break_true:w
- }
-\cs_new_protected:Npn \__regex_item_caseful_equal:n #1
- {
- \if_int_compare:w #1 = \l__regex_current_char_int
- \exp_after:wN \__regex_break_true:w
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_caseful_range:nn #1 #2
- {
- \reverse_if:N \if_int_compare:w #1 > \l__regex_current_char_int
- \reverse_if:N \if_int_compare:w #2 < \l__regex_current_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_caseless_equal:n #1
- {
- \if_int_compare:w #1 = \l__regex_current_char_int
- \exp_after:wN \__regex_break_true:w
- \fi:
- \if_int_compare:w \l__regex_case_changed_char_int = \c_max_int
- \__regex_compute_case_changed_char:
- \fi:
- \if_int_compare:w #1 = \l__regex_case_changed_char_int
- \exp_after:wN \__regex_break_true:w
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_caseless_range:nn #1 #2
- {
- \reverse_if:N \if_int_compare:w #1 > \l__regex_current_char_int
- \reverse_if:N \if_int_compare:w #2 < \l__regex_current_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
- \fi:
- \fi:
- \if_int_compare:w \l__regex_case_changed_char_int = \c_max_int
- \__regex_compute_case_changed_char:
- \fi:
- \reverse_if:N \if_int_compare:w #1 > \l__regex_case_changed_char_int
- \reverse_if:N \if_int_compare:w #2 < \l__regex_case_changed_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_compute_case_changed_char:
- {
- \int_set_eq:NN \l__regex_case_changed_char_int \l__regex_current_char_int
- \if_int_compare:w \l__regex_current_char_int > `Z \exp_stop_f:
- \if_int_compare:w \l__regex_current_char_int > `z \exp_stop_f: \else:
- \if_int_compare:w \l__regex_current_char_int < `a \exp_stop_f: \else:
- \int_sub:Nn \l__regex_case_changed_char_int { \c__regex_ascii_lower_int }
- \fi:
- \fi:
- \else:
- \if_int_compare:w \l__regex_current_char_int < `A \exp_stop_f: \else:
- \int_add:Nn \l__regex_case_changed_char_int { \c__regex_ascii_lower_int }
- \fi:
- \fi:
- }
-\cs_new_eq:NN \__regex_item_equal:n ?
-\cs_new_eq:NN \__regex_item_range:nn ?
-\cs_new_protected:Npn \__regex_item_catcode:
- {
- "
- \if_case:w \l__regex_current_catcode_int
- 1 \or: 4 \or: 10 \or: 40
- \or: 100 \or: \or: 1000 \or: 4000
- \or: 10000 \or: \or: 100000 \or: 400000
- \or: 1000000 \or: 4000000 \else: 1*0
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_catcode:nT #1
- {
- \if_int_odd:w \__int_eval:w #1 / \__regex_item_catcode: \__int_eval_end:
- \exp_after:wN \use:n
- \else:
- \exp_after:wN \use_none:n
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_catcode_reverse:nT #1#2
- { \__regex_item_catcode:nT {#1} { \__regex_item_reverse:n {#2} } }
-\cs_new_protected:Npn \__regex_item_exact:nn #1#2
- {
- \if_int_compare:w #1 = \l__regex_current_catcode_int
- \if_int_compare:w #2 = \l__regex_current_char_int
- \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_item_exact_cs:n #1
- {
- \int_compare:nNnTF \l__regex_current_catcode_int = 0
- {
- \tl_set:Nx \l__regex_internal_a_tl
- { \scan_stop: \__regex_current_cs_to_str: \scan_stop: }
- \tl_if_in:noTF { \scan_stop: #1 \scan_stop: } \l__regex_internal_a_tl
- { \__regex_break_true:w } { }
- }
- { }
- }
-\cs_new_protected:Npn \__regex_item_cs:n #1
- {
- \int_compare:nNnT \l__regex_current_catcode_int = 0
- {
- \group_begin:
- \tl_set:Nx \l__regex_cs_name_tl { \__regex_current_cs_to_str: }
- \__regex_single_match:
- \__regex_disable_submatches:
- \__regex_build_for_cs:n {#1}
- \bool_set_eq:NN \l__regex_saved_success_bool \g__regex_success_bool
- \exp_args:NV \__regex_match:n \l__regex_cs_name_tl
- \if_meaning:w \c_true_bool \g__regex_success_bool
- \group_insert_after:N \__regex_break_true:w
- \fi:
- \bool_gset_eq:NN \g__regex_success_bool \l__regex_saved_success_bool
- \group_end:
- }
- }
-\cs_new_protected:Npn \__regex_prop_d:
- { \__regex_item_caseful_range:nn { `0 } { `9 } }
-\cs_new_protected:Npn \__regex_prop_h:
- {
- \__regex_item_caseful_equal:n { `\ }
- \__regex_item_caseful_equal:n { `\^^I }
- }
-\cs_new_protected:Npn \__regex_prop_s:
- {
- \__regex_item_caseful_equal:n { `\ }
- \__regex_item_caseful_equal:n { `\^^I }
- \__regex_item_caseful_equal:n { `\^^J }
- \__regex_item_caseful_equal:n { `\^^L }
- \__regex_item_caseful_equal:n { `\^^M }
- }
-\cs_new_protected:Npn \__regex_prop_v:
- { \__regex_item_caseful_range:nn { `\^^J } { `\^^M } } % lf, vtab, ff, cr
-\cs_new_protected:Npn \__regex_prop_w:
- {
- \__regex_item_caseful_range:nn { `a } { `z }
- \__regex_item_caseful_range:nn { `A } { `Z }
- \__regex_item_caseful_range:nn { `0 } { `9 }
- \__regex_item_caseful_equal:n { `_ }
- }
-\cs_new_protected:Npn \__regex_prop_N:
- {
- \__regex_item_reverse:n
- { \__regex_item_caseful_equal:n { `\^^J } }
- }
-\cs_new_protected:Npn \__regex_posix_alnum:
- { \__regex_posix_alpha: \__regex_posix_digit: }
-\cs_new_protected:Npn \__regex_posix_alpha:
- { \__regex_posix_lower: \__regex_posix_upper: }
-\cs_new_protected:Npn \__regex_posix_ascii:
- {
- \__regex_item_caseful_range:nn
- \c__regex_ascii_min_int
- \c__regex_ascii_max_int
- }
-\cs_new_eq:NN \__regex_posix_blank: \__regex_prop_h:
-\cs_new_protected:Npn \__regex_posix_cntrl:
- {
- \__regex_item_caseful_range:nn
- \c__regex_ascii_min_int
- \c__regex_ascii_max_control_int
- \__regex_item_caseful_equal:n \c__regex_ascii_max_int
- }
-\cs_new_eq:NN \__regex_posix_digit: \__regex_prop_d:
-\cs_new_protected:Npn \__regex_posix_graph:
- { \__regex_item_caseful_range:nn { `! } { `\~ } }
-\cs_new_protected:Npn \__regex_posix_lower:
- { \__regex_item_caseful_range:nn { `a } { `z } }
-\cs_new_protected:Npn \__regex_posix_print:
- { \__regex_item_caseful_range:nn { `\ } { `\~ } }
-\cs_new_protected:Npn \__regex_posix_punct:
- {
- \__regex_item_caseful_range:nn { `! } { `/ }
- \__regex_item_caseful_range:nn { `: } { `@ }
- \__regex_item_caseful_range:nn { `[ } { `` }
- \__regex_item_caseful_range:nn { `\{ } { `\~ }
- }
-\cs_new_protected:Npn \__regex_posix_space:
- {
- \__regex_item_caseful_equal:n { `\ }
- \__regex_item_caseful_range:nn { `\^^I } { `\^^M }
- }
-\cs_new_protected:Npn \__regex_posix_upper:
- { \__regex_item_caseful_range:nn { `A } { `Z } }
-\cs_new_eq:NN \__regex_posix_word: \__regex_prop_w:
-\cs_new_protected:Npn \__regex_posix_xdigit:
- {
- \__regex_posix_digit:
- \__regex_item_caseful_range:nn { `A } { `F }
- \__regex_item_caseful_range:nn { `a } { `f }
- }
-\cs_new_protected:Npn \__regex_escape_use:nnnn #1#2#3#4
- {
- \__tl_build:Nw \l__regex_internal_a_tl
- \cs_set:Npn \__regex_escape_unescaped:N ##1 { #1 }
- \cs_set:Npn \__regex_escape_escaped:N ##1 { #2 }
- \cs_set:Npn \__regex_escape_raw:N ##1 { #3 }
- \__regex_standard_escapechar:
- \tl_gset:Nx \g__regex_internal_tl { \__str_to_other_fast:n {#4} }
- \tl_set:Nx \l__regex_internal_b_tl
- {
- \exp_after:wN \__regex_escape_loop:N \g__regex_internal_tl
- { break } \__prg_break_point:
- }
- \__tl_build_one:o \l__regex_internal_b_tl
- \__tl_build_end:
- \l__regex_internal_a_tl
- }
-\cs_new:Npn \__regex_escape_loop:N #1
- {
- \cs_if_exist_use:cF { __regex_escape_\token_to_str:N #1:w }
- { \__regex_escape_unescaped:N #1 }
- \__regex_escape_loop:N
- }
-\cs_new:cpn { __regex_escape_ \c_backslash_str :w }
- \__regex_escape_loop:N #1
- {
- \cs_if_exist_use:cF { __regex_escape_/\token_to_str:N #1:w }
- { \__regex_escape_escaped:N #1 }
- \__regex_escape_loop:N
- }
-\cs_new_eq:NN \__regex_escape_unescaped:N ?
-\cs_new_eq:NN \__regex_escape_escaped:N ?
-\cs_new_eq:NN \__regex_escape_raw:N ?
-\cs_new_eq:NN \__regex_escape_break:w \__prg_break:
-\cs_new:cpn { __regex_escape_/break:w }
- {
- \if_false: { \fi: }
- \__msg_kernel_error:nn { regex } { trailing-backslash }
- \exp_after:wN \use_none:n \exp_after:wN { \if_false: } \fi:
- }
-\cs_new:cpn { __regex_escape_~:w } { }
-\cs_new:cpx { __regex_escape_/a:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^G }
-\cs_new:cpx { __regex_escape_/t:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^I }
-\cs_new:cpx { __regex_escape_/n:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^J }
-\cs_new:cpx { __regex_escape_/f:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^L }
-\cs_new:cpx { __regex_escape_/r:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^M }
-\cs_new:cpx { __regex_escape_/e:w }
- { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^[ }
-\cs_new:cpn { __regex_escape_/x:w } \__regex_escape_loop:N
- {
- \exp_after:wN \__regex_escape_x_end:w
- \__int_value:w "0 \__regex_escape_x_test:N
- }
-\cs_new:Npn \__regex_escape_x_end:w #1 ;
- {
- \int_compare:nNnTF {#1} > \c_max_char_int
- {
- \if_false: { \fi: }
- \__tl_build_one:o \l__regex_internal_b_tl
- \__msg_kernel_error:nnx { regex } { x-overflow } {#1}
- \tl_set:Nx \l__regex_internal_b_tl
- { \if_false: } \fi:
- }
- {
- \exp_last_unbraced:Nf \__regex_escape_raw:N
- { \char_generate:nn {#1} { 12 } }
- }
- }
-\cs_new:Npn \__regex_escape_x_test:N #1
- {
- \str_if_eq_x:nnTF {#1} { break } { ; }
- {
- \if_charcode:w \c_space_token #1
- \exp_after:wN \__regex_escape_x_test:N
- \else:
- \exp_after:wN \__regex_escape_x_testii:N
- \exp_after:wN #1
- \fi:
- }
- }
-\cs_new:Npn \__regex_escape_x_testii:N #1
- {
- \if_charcode:w \c_left_brace_str #1
- \exp_after:wN \__regex_escape_x_loop:N
- \else:
- \__regex_hexadecimal_use:NTF #1
- { \exp_after:wN \__regex_escape_x:N }
- { ; \exp_after:wN \__regex_escape_loop:N \exp_after:wN #1 }
- \fi:
- }
-\cs_new:Npn \__regex_escape_x:N #1
- {
- \str_if_eq_x:nnTF {#1} { break } { ; }
- {
- \__regex_hexadecimal_use:NTF #1
- { ; \__regex_escape_loop:N }
- { ; \__regex_escape_loop:N #1 }
- }
- }
-\cs_new:Npn \__regex_escape_x_loop:N #1
- {
- \str_if_eq_x:nnTF {#1} { break }
- { ; \__regex_escape_x_loop_error:n { } {#1} }
- {
- \__regex_hexadecimal_use:NTF #1
- { \__regex_escape_x_loop:N }
- {
- \token_if_eq_charcode:NNTF \c_space_token #1
- { \__regex_escape_x_loop:N }
- {
- ;
- \exp_after:wN
- \token_if_eq_charcode:NNTF \c_right_brace_str #1
- { \__regex_escape_loop:N }
- { \__regex_escape_x_loop_error:n {#1} }
- }
- }
- }
- }
-\cs_new:Npn \__regex_escape_x_loop_error:n #1
- {
- \if_false: { \fi: }
- \__tl_build_one:o \l__regex_internal_b_tl
- \__msg_kernel_error:nnx { regex } { x-missing-rbrace } {#1}
- \tl_set:Nx \l__regex_internal_b_tl
- { \if_false: } \fi: \__regex_escape_loop:N #1
- }
-\prg_new_conditional:Npnn \__regex_hexadecimal_use:N #1 { TF }
- {
- \if_int_compare:w 1 < "1 \token_to_str:N #1 \exp_stop_f:
- #1 \prg_return_true:
- \else:
- \if_case:w \__int_eval:w
- \exp_after:wN ` \token_to_str:N #1 - `a
- \__int_eval_end:
- A
- \or: B
- \or: C
- \or: D
- \or: E
- \or: F
- \else:
- \prg_return_false:
- \exp_after:wN \use_none:n
- \fi:
- \prg_return_true:
- \fi:
- }
-\prg_new_conditional:Npnn \__regex_char_if_special:N #1 { TF }
- {
- \if_int_compare:w `#1 > `Z \exp_stop_f:
- \if_int_compare:w `#1 > `z \exp_stop_f:
- \if_int_compare:w `#1 < \c__regex_ascii_max_int
- \prg_return_true: \else: \prg_return_false: \fi:
- \else:
- \if_int_compare:w `#1 < `a \exp_stop_f:
- \prg_return_true: \else: \prg_return_false: \fi:
- \fi:
- \else:
- \if_int_compare:w `#1 > `9 \exp_stop_f:
- \if_int_compare:w `#1 < `A \exp_stop_f:
- \prg_return_true: \else: \prg_return_false: \fi:
- \else:
- \if_int_compare:w `#1 < `0 \exp_stop_f:
- \if_int_compare:w `#1 < `\ \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \else: \prg_return_false: \fi:
- \fi:
- \fi:
- }
-\prg_new_conditional:Npnn \__regex_char_if_alphanumeric:N #1 { TF }
- {
- \if_int_compare:w `#1 > `Z \exp_stop_f:
- \if_int_compare:w `#1 > `z \exp_stop_f:
- \prg_return_false:
- \else:
- \if_int_compare:w `#1 < `a \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \fi:
- \else:
- \if_int_compare:w `#1 > `9 \exp_stop_f:
- \if_int_compare:w `#1 < `A \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \else:
- \if_int_compare:w `#1 < `0 \exp_stop_f:
- \prg_return_false: \else: \prg_return_true: \fi:
- \fi:
- \fi:
- }
-\int_new:N \l__regex_group_level_int
-\int_new:N \l__regex_mode_int
-\int_const:Nn \c__regex_cs_in_class_mode_int { -6 }
-\int_const:Nn \c__regex_cs_mode_int { -2 }
-\int_const:Nn \c__regex_outer_mode_int { 0 }
-\int_const:Nn \c__regex_catcode_mode_int { 2 }
-\int_const:Nn \c__regex_class_mode_int { 3 }
-\int_const:Nn \c__regex_catcode_in_class_mode_int { 6 }
-\int_new:N \l__regex_catcodes_int
-\int_new:N \l__regex_default_catcodes_int
-\bool_new:N \l__regex_catcodes_bool
-\int_const:Nn \c__regex_catcode_C_int { "1 }
-\int_const:Nn \c__regex_catcode_B_int { "4 }
-\int_const:Nn \c__regex_catcode_E_int { "10 }
-\int_const:Nn \c__regex_catcode_M_int { "40 }
-\int_const:Nn \c__regex_catcode_T_int { "100 }
-\int_const:Nn \c__regex_catcode_P_int { "1000 }
-\int_const:Nn \c__regex_catcode_U_int { "4000 }
-\int_const:Nn \c__regex_catcode_D_int { "10000 }
-\int_const:Nn \c__regex_catcode_S_int { "100000 }
-\int_const:Nn \c__regex_catcode_L_int { "400000 }
-\int_const:Nn \c__regex_catcode_O_int { "1000000 }
-\int_const:Nn \c__regex_catcode_A_int { "4000000 }
-\int_const:Nn \c__regex_all_catcodes_int { "5515155 }
-\cs_new_eq:NN \l__regex_internal_regex \c__regex_no_match_regex
-\seq_new:N \l__regex_show_prefix_seq
-\int_new:N \l__regex_show_lines_int
-\cs_new_protected:Npn \__regex_get_digits:NTFw #1#2#3#4#5
- {
- \__regex_if_raw_digit:NNTF #4 #5
- { #1 = #5 \__regex_get_digits_loop:nw {#2} }
- { #3 #4 #5 }
- }
-\cs_new:Npn \__regex_get_digits_loop:nw #1#2#3
- {
- \__regex_if_raw_digit:NNTF #2 #3
- { #3 \__regex_get_digits_loop:nw {#1} }
- { \scan_stop: #1 #2 #3 }
- }
-\prg_new_conditional:Npnn \__regex_if_raw_digit:NN #1#2 { TF }
- {
- \if_meaning:w \__regex_compile_raw:N #1
- \if_int_compare:w 1 < 1 #2 \exp_stop_f:
- \prg_return_true:
- \else:
- \prg_return_false:
- \fi:
- \else:
- \prg_return_false:
- \fi:
- }
-\cs_new:Npn \__regex_if_in_class:TF
- {
- \if_int_odd:w \l__regex_mode_int
- \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \use_ii:nn
- \fi:
- }
-\cs_new:Npn \__regex_if_in_cs:TF
- {
- \if_int_odd:w \l__regex_mode_int
- \exp_after:wN \use_ii:nn
- \else:
- \if_int_compare:w \l__regex_mode_int < \c__regex_outer_mode_int
- \exp_after:wN \exp_after:wN \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \exp_after:wN \exp_after:wN \use_ii:nn
- \fi:
- \fi:
- }
-\cs_new:Npn \__regex_if_in_class_or_catcode:TF
- {
- \if_int_odd:w \l__regex_mode_int
- \exp_after:wN \use_i:nn
- \else:
- \if_int_compare:w \l__regex_mode_int > \c__regex_outer_mode_int
- \exp_after:wN \exp_after:wN \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \exp_after:wN \exp_after:wN \use_ii:nn
- \fi:
- \fi:
- }
-\cs_new:Npn \__regex_if_within_catcode:TF
- {
- \if_int_compare:w \l__regex_mode_int > \c__regex_outer_mode_int
- \exp_after:wN \use_i:nn
- \else:
- \exp_after:wN \use_ii:nn
- \fi:
- }
-\cs_new_protected:Npn \__regex_chk_c_allowed:T
- {
- \if_int_compare:w \l__regex_mode_int = \c__regex_outer_mode_int
- \exp_after:wN \use:n
- \else:
- \if_int_compare:w \l__regex_mode_int = \c__regex_class_mode_int
- \exp_after:wN \exp_after:wN \exp_after:wN \use:n
- \else:
- \__msg_kernel_error:nn { regex } { c-bad-mode }
- \exp_after:wN \exp_after:wN \exp_after:wN \use_none:n
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_mode_quit_c:
- {
- \if_int_compare:w \l__regex_mode_int = \c__regex_catcode_mode_int
- \int_set_eq:NN \l__regex_mode_int \c__regex_outer_mode_int
- \else:
- \if_int_compare:w \l__regex_mode_int = \c__regex_catcode_in_class_mode_int
- \int_set_eq:NN \l__regex_mode_int \c__regex_class_mode_int
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_compile:w
- {
- \__tl_build_x:Nw \l__regex_internal_regex
- \int_zero:N \l__regex_group_level_int
- \int_set_eq:NN \l__regex_default_catcodes_int \c__regex_all_catcodes_int
- \int_set_eq:NN \l__regex_catcodes_int \l__regex_default_catcodes_int
- \cs_set:Npn \__regex_item_equal:n { \__regex_item_caseful_equal:n }
- \cs_set:Npn \__regex_item_range:nn { \__regex_item_caseful_range:nn }
- \__tl_build_one:n { \__regex_branch:n { \if_false: } \fi: }
- }
-\cs_new_protected:Npn \__regex_compile_end:
- {
- \__regex_if_in_class:TF
- {
- \__msg_kernel_error:nn { regex } { missing-rbrack }
- \use:c { __regex_compile_]: }
- \prg_do_nothing: \prg_do_nothing:
- }
- { }
- \if_int_compare:w \l__regex_group_level_int > 0 \exp_stop_f:
- \__msg_kernel_error:nnx { regex } { missing-rparen }
- { \int_use:N \l__regex_group_level_int }
- \prg_replicate:nn
- { \l__regex_group_level_int }
- {
- \__tl_build_one:n
- {
- \if_false: { \fi: }
- \if_false: { \fi: } { 1 } { 0 } \c_true_bool
- }
- \__tl_build_end:
- \__tl_build_one:o \l__regex_internal_regex
- }
- \fi:
- \__tl_build_one:n { \if_false: { \fi: } }
- \__tl_build_end:
- }
-\cs_new_protected:Npn \__regex_compile:n #1
- {
- \__regex_compile:w
- \__regex_standard_escapechar:
- \int_set_eq:NN \l__regex_mode_int \c__regex_outer_mode_int
- \__regex_escape_use:nnnn
- {
- \__regex_char_if_special:NTF ##1
- \__regex_compile_special:N \__regex_compile_raw:N ##1
- }
- {
- \__regex_char_if_alphanumeric:NTF ##1
- \__regex_compile_escaped:N \__regex_compile_raw:N ##1
- }
- { \__regex_compile_raw:N ##1 }
- { #1 }
- \prg_do_nothing: \prg_do_nothing:
- \prg_do_nothing: \prg_do_nothing:
- \int_compare:nNnT \l__regex_mode_int = \c__regex_catcode_mode_int
- { \__msg_kernel_error:nn { regex } { c-trailing } }
- \int_compare:nNnT \l__regex_mode_int < \c__regex_outer_mode_int
- {
- \__msg_kernel_error:nn { regex } { c-missing-rbrace }
- \__regex_compile_end_cs:
- \prg_do_nothing: \prg_do_nothing:
- \prg_do_nothing: \prg_do_nothing:
- }
- \__regex_compile_end:
- }
-\cs_new_protected:Npn \__regex_compile_special:N #1
- {
- \cs_if_exist_use:cF { __regex_compile_#1: }
- { \__regex_compile_raw:N #1 }
- }
-\cs_new_protected:Npn \__regex_compile_escaped:N #1
- {
- \cs_if_exist_use:cF { __regex_compile_/#1: }
- { \__regex_compile_raw:N #1 }
- }
-\cs_new_protected:Npn \__regex_compile_one:x #1
- {
- \__regex_mode_quit_c:
- \__regex_if_in_class:TF { }
- {
- \__tl_build_one:n
- { \__regex_class:NnnnN \c_true_bool { \if_false: } \fi: }
- }
- \__tl_build_one:x
- {
- \if_int_compare:w \l__regex_catcodes_int < \c__regex_all_catcodes_int
- \__regex_item_catcode:nT { \int_use:N \l__regex_catcodes_int }
- { \exp_not:N \exp_not:n {#1} }
- \else:
- \exp_not:N \exp_not:n {#1}
- \fi:
- }
- \int_set_eq:NN \l__regex_catcodes_int \l__regex_default_catcodes_int
- \__regex_if_in_class:TF { } { \__regex_compile_quantifier:w }
- }
-\cs_new_protected:Npn \__regex_compile_abort_tokens:n #1
- {
- \use:x
- {
- \exp_args:No \tl_map_function:nN { \tl_to_str:n {#1} }
- \__regex_compile_raw:N
- }
- }
-\cs_generate_variant:Nn \__regex_compile_abort_tokens:n { x }
-\cs_new_protected:Npn \__regex_compile_quantifier:w #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_compile_special:N
- {
- \cs_if_exist_use:cF { __regex_compile_quantifier_#2:w }
- { \__regex_compile_quantifier_none: #1 #2 }
- }
- { \__regex_compile_quantifier_none: #1 #2 }
- }
-\cs_new_protected:Npn \__regex_compile_quantifier_none:
- { \__tl_build_one:n { \if_false: { \fi: } { 1 } { 0 } \c_false_bool } }
-\cs_new_protected:Npn \__regex_compile_quantifier_abort:xNN #1#2#3
- {
- \__regex_compile_quantifier_none:
- \__msg_kernel_warning:nnxx { regex } { invalid-quantifier } {#1} {#3}
- \__regex_compile_abort_tokens:x {#1}
- #2 #3
- }
-\cs_new_protected:Npn \__regex_compile_quantifier_lazyness:nnNN #1#2#3#4
- {
- \str_if_eq:nnTF { #3 #4 } { \__regex_compile_special:N ? }
- { \__tl_build_one:n { \if_false: { \fi: } { #1 } { #2 } \c_true_bool } }
- {
- \__tl_build_one:n { \if_false: { \fi: } { #1 } { #2 } \c_false_bool }
- #3 #4
- }
- }
-\cs_new_protected:cpn { __regex_compile_quantifier_?:w }
- { \__regex_compile_quantifier_lazyness:nnNN { 0 } { 1 } }
-\cs_new_protected:cpn { __regex_compile_quantifier_*:w }
- { \__regex_compile_quantifier_lazyness:nnNN { 0 } { -1 } }
-\cs_new_protected:cpn { __regex_compile_quantifier_+:w }
- { \__regex_compile_quantifier_lazyness:nnNN { 1 } { -1 } }
-\cs_new_protected:cpn { __regex_compile_quantifier_ \c_left_brace_str :w }
- {
- \__regex_get_digits:NTFw \l__regex_internal_a_int
- { \__regex_compile_quantifier_braced_auxi:w }
- { \__regex_compile_quantifier_abort:xNN { \c_left_brace_str } }
- }
-\cs_new_protected:Npn \__regex_compile_quantifier_braced_auxi:w #1#2
- {
- \str_case_x:nnF { #1 #2 }
- {
- { \__regex_compile_special:N \c_right_brace_str }
- {
- \exp_args:No \__regex_compile_quantifier_lazyness:nnNN
- { \int_use:N \l__regex_internal_a_int } { 0 }
- }
- { \__regex_compile_special:N , }
- {
- \__regex_get_digits:NTFw \l__regex_internal_b_int
- { \__regex_compile_quantifier_braced_auxiii:w }
- { \__regex_compile_quantifier_braced_auxii:w }
- }
- }
- {
- \__regex_compile_quantifier_abort:xNN
- { \c_left_brace_str \int_use:N \l__regex_internal_a_int }
- #1 #2
- }
- }
-\cs_new_protected:Npn \__regex_compile_quantifier_braced_auxii:w #1#2
- {
- \str_if_eq_x:nnTF
- { #1 #2 } { \__regex_compile_special:N \c_right_brace_str }
- {
- \exp_args:No \__regex_compile_quantifier_lazyness:nnNN
- { \int_use:N \l__regex_internal_a_int } { -1 }
- }
- {
- \__regex_compile_quantifier_abort:xNN
- { \c_left_brace_str \int_use:N \l__regex_internal_a_int , }
- #1 #2
- }
- }
-\cs_new_protected:Npn \__regex_compile_quantifier_braced_auxiii:w #1#2
- {
- \str_if_eq_x:nnTF
- { #1 #2 } { \__regex_compile_special:N \c_right_brace_str }
- {
- \if_int_compare:w \l__regex_internal_a_int > \l__regex_internal_b_int
- \__msg_kernel_error:nnxx { regex } { backwards-quantifier }
- { \int_use:N \l__regex_internal_a_int }
- { \int_use:N \l__regex_internal_b_int }
- \int_zero:N \l__regex_internal_b_int
- \else:
- \int_sub:Nn \l__regex_internal_b_int \l__regex_internal_a_int
- \fi:
- \exp_args:Noo \__regex_compile_quantifier_lazyness:nnNN
- { \int_use:N \l__regex_internal_a_int }
- { \int_use:N \l__regex_internal_b_int }
- }
- {
- \__regex_compile_quantifier_abort:xNN
- {
- \c_left_brace_str
- \int_use:N \l__regex_internal_a_int ,
- \int_use:N \l__regex_internal_b_int
- }
- #1 #2
- }
- }
-\cs_new_protected:Npn \__regex_compile_raw_error:N #1
- {
- \__msg_kernel_error:nnx { regex } { bad-escape } {#1}
- \__regex_compile_raw:N #1
- }
-\cs_new_protected:Npn \__regex_compile_raw:N #1#2#3
- {
- \__regex_if_in_class:TF
- {
- \str_if_eq:nnTF {#2#3} { \__regex_compile_special:N - }
- { \__regex_compile_range:Nw #1 }
- {
- \__regex_compile_one:x
- { \__regex_item_equal:n { \__int_value:w `#1 ~ } }
- #2 #3
- }
- }
- {
- \__regex_compile_one:x
- { \__regex_item_equal:n { \__int_value:w `#1 ~ } }
- #2 #3
- }
- }
-\prg_new_protected_conditional:Npnn \__regex_if_end_range:NN #1#2 { TF }
- {
- \if_meaning:w \__regex_compile_raw:N #1
- \prg_return_true:
- \else:
- \if_meaning:w \__regex_compile_special:N #1
- \if_charcode:w ] #2
- \prg_return_false:
- \else:
- \prg_return_true:
- \fi:
- \else:
- \prg_return_false:
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_compile_range:Nw #1#2#3
- {
- \__regex_if_end_range:NNTF #2 #3
- {
- \if_int_compare:w `#1 > `#3 \exp_stop_f:
- \__msg_kernel_error:nnxx { regex } { range-backwards } {#1} {#3}
- \else:
- \__tl_build_one:x
- {
- \if_int_compare:w `#1 = `#3 \exp_stop_f:
- \__regex_item_equal:n
- \else:
- \__regex_item_range:nn { \__int_value:w `#1 ~ }
- \fi:
- { \__int_value:w `#3 ~ }
- }
- \fi:
- }
- {
- \__msg_kernel_warning:nnxx { regex } { range-missing-end }
- {#1} { \c_backslash_str #3 }
- \__tl_build_one:x
- {
- \__regex_item_equal:n { \__int_value:w `#1 ~ }
- \__regex_item_equal:n { \__int_value:w `- ~ }
- }
- #2#3
- }
- }
-\cs_new_protected:cpx { __regex_compile_.: }
- {
- \exp_not:N \__regex_if_in_class:TF
- { \__regex_compile_raw:N . }
- { \__regex_compile_one:x \exp_not:c { __regex_prop_.: } }
- }
-\cs_new_protected:cpn { __regex_prop_.: }
- {
- \if_int_compare:w \l__regex_current_char_int > - 2 \exp_stop_f:
- \exp_after:wN \__regex_break_true:w
- \fi:
- }
-\cs_set_protected:Npn \__regex_tmp:w #1#2
- {
- \cs_new_protected:cpx { __regex_compile_/#1: }
- { \__regex_compile_one:x \exp_not:c { __regex_prop_#1: } }
- \cs_new_protected:cpx { __regex_compile_/#2: }
- {
- \__regex_compile_one:x
- { \__regex_item_reverse:n \exp_not:c { __regex_prop_#1: } }
- }
- }
-\__regex_tmp:w d D
-\__regex_tmp:w h H
-\__regex_tmp:w s S
-\__regex_tmp:w v V
-\__regex_tmp:w w W
-\cs_new_protected:cpn { __regex_compile_/N: }
- { \__regex_compile_one:x \__regex_prop_N: }
-\cs_new_protected:Npn \__regex_compile_anchor:NF #1#2
- {
- \__regex_if_in_class_or_catcode:TF {#2}
- {
- \__tl_build_one:n
- { \__regex_assertion:Nn \c_true_bool { \__regex_anchor:N #1 } }
- }
- }
-\cs_set_protected:Npn \__regex_tmp:w #1#2
- {
- \cs_new_protected:cpn { __regex_compile_/#1: }
- { \__regex_compile_anchor:NF #2 { \__regex_compile_raw_error:N #1 } }
- }
-\__regex_tmp:w A \l__regex_min_pos_int
-\__regex_tmp:w G \l__regex_start_pos_int
-\__regex_tmp:w Z \l__regex_max_pos_int
-\__regex_tmp:w z \l__regex_max_pos_int
-\cs_set_protected:Npn \__regex_tmp:w #1#2
- {
- \cs_new_protected:cpn { __regex_compile_#1: }
- { \__regex_compile_anchor:NF #2 { \__regex_compile_raw:N #1 } }
- }
-\exp_args:Nx \__regex_tmp:w { \iow_char:N \^ } \l__regex_min_pos_int
-\exp_args:Nx \__regex_tmp:w { \iow_char:N \$ } \l__regex_max_pos_int
-\cs_new_protected:cpn { __regex_compile_/b: }
- {
- \__regex_if_in_class_or_catcode:TF
- { \__regex_compile_raw_error:N b }
- {
- \__tl_build_one:n
- { \__regex_assertion:Nn \c_true_bool { \__regex_b_test: } }
- }
- }
-\cs_new_protected:cpn { __regex_compile_/B: }
- {
- \__regex_if_in_class_or_catcode:TF
- { \__regex_compile_raw_error:N B }
- {
- \__tl_build_one:n
- { \__regex_assertion:Nn \c_false_bool { \__regex_b_test: } }
- }
- }
-\cs_new_protected:cpn { __regex_compile_]: }
- {
- \__regex_if_in_class:TF
- {
- \if_int_compare:w \l__regex_mode_int > \c__regex_catcode_in_class_mode_int
- \__tl_build_one:n { \if_false: { \fi: } }
- \fi:
- \tex_advance:D \l__regex_mode_int - 15 \exp_stop_f:
- \tex_divide:D \l__regex_mode_int 13 \exp_stop_f:
- \if_int_odd:w \l__regex_mode_int \else:
- \exp_after:wN \__regex_compile_quantifier:w
- \fi:
- }
- { \__regex_compile_raw:N ] }
- }
-\cs_new_protected:cpn { __regex_compile_[: }
- {
- \__regex_if_in_class:TF
- { \__regex_compile_class_posix_test:w }
- {
- \__regex_if_within_catcode:TF
- {
- \exp_after:wN \__regex_compile_class_catcode:w
- \int_use:N \l__regex_catcodes_int ;
- }
- { \__regex_compile_class_normal:w }
- }
- }
-\cs_new_protected:Npn \__regex_compile_class_normal:w
- {
- \__regex_compile_class:TFNN
- { \__regex_class:NnnnN \c_true_bool }
- { \__regex_class:NnnnN \c_false_bool }
- }
-\cs_new_protected:Npn \__regex_compile_class_catcode:w #1;
- {
- \if_int_compare:w \l__regex_mode_int = \c__regex_catcode_mode_int
- \__tl_build_one:n
- { \__regex_class:NnnnN \c_true_bool { \if_false: } \fi: }
- \fi:
- \int_set_eq:NN \l__regex_catcodes_int \l__regex_default_catcodes_int
- \__regex_compile_class:TFNN
- { \__regex_item_catcode:nT {#1} }
- { \__regex_item_catcode_reverse:nT {#1} }
- }
-\cs_new_protected:Npn \__regex_compile_class:TFNN #1#2#3#4
- {
- \l__regex_mode_int = \__int_value:w \l__regex_mode_int 3 \exp_stop_f:
- \str_if_eq:nnTF { #3 #4 } { \__regex_compile_special:N ^ }
- {
- \__tl_build_one:n { #2 { \if_false: } \fi: }
- \__regex_compile_class:NN
- }
- {
- \__tl_build_one:n { #1 { \if_false: } \fi: }
- \__regex_compile_class:NN #3 #4
- }
- }
-\cs_new_protected:Npn \__regex_compile_class:NN #1#2
- {
- \token_if_eq_charcode:NNTF #2 ]
- { \__regex_compile_raw:N #2 }
- { #1 #2 }
- }
-\cs_new_protected:Npn \__regex_compile_class_posix_test:w #1#2
- {
- \token_if_eq_meaning:NNT \__regex_compile_special:N #1
- {
- \str_case:nn { #2 }
- {
- : { \__regex_compile_class_posix:NNNNw }
- = { \__msg_kernel_warning:nnx { regex } { posix-unsupported } { = } }
- . { \__msg_kernel_warning:nnx { regex } { posix-unsupported } { . } }
- }
- }
- \__regex_compile_raw:N [ #1 #2
- }
-\cs_new_protected:Npn \__regex_compile_class_posix:NNNNw #1#2#3#4#5#6
- {
- \str_if_eq:nnTF { #5 #6 } { \__regex_compile_special:N ^ }
- {
- \bool_set_false:N \l__regex_internal_bool
- \tl_set:Nx \l__regex_internal_a_tl { \if_false: } \fi:
- \__regex_compile_class_posix_loop:w
- }
- {
- \bool_set_true:N \l__regex_internal_bool
- \tl_set:Nx \l__regex_internal_a_tl { \if_false: } \fi:
- \__regex_compile_class_posix_loop:w #5 #6
- }
- }
-\cs_new:Npn \__regex_compile_class_posix_loop:w #1#2
- {
- \token_if_eq_meaning:NNTF \__regex_compile_raw:N #1
- { #2 \__regex_compile_class_posix_loop:w }
- { \if_false: { \fi: } \__regex_compile_class_posix_end:w #1 #2 }
- }
-\cs_new_protected:Npn \__regex_compile_class_posix_end:w #1#2#3#4
- {
- \str_if_eq:nnTF { #1 #2 #3 #4 }
- { \__regex_compile_special:N : \__regex_compile_special:N ] }
- {
- \cs_if_exist:cTF { __regex_posix_ \l__regex_internal_a_tl : }
- {
- \__regex_compile_one:x
- {
- \bool_if:NF \l__regex_internal_bool \__regex_item_reverse:n
- \exp_not:c { __regex_posix_ \l__regex_internal_a_tl : }
- }
- }
- {
- \__msg_kernel_warning:nnx { regex } { posix-unknown }
- { \l__regex_internal_a_tl }
- \__regex_compile_abort_tokens:x
- {
- [: \bool_if:NF \l__regex_internal_bool { ^ }
- \l__regex_internal_a_tl :]
- }
- }
- }
- {
- \__msg_kernel_error:nnxx { regex } { posix-missing-close }
- { [: \l__regex_internal_a_tl } { #2 #4 }
- \__regex_compile_abort_tokens:x { [: \l__regex_internal_a_tl }
- #1 #2 #3 #4
- }
- }
-\cs_new_protected:Npn \__regex_compile_group_begin:N #1
- {
- \__tl_build_one:n { #1 { \if_false: } \fi: }
- \__regex_mode_quit_c:
- \__tl_build:Nw \l__regex_internal_regex
- \int_set_eq:NN \l__regex_default_catcodes_int \l__regex_catcodes_int
- \int_incr:N \l__regex_group_level_int
- \__tl_build_one:n { \__regex_branch:n { \if_false: } \fi: }
- }
-\cs_new_protected:Npn \__regex_compile_group_end:
- {
- \if_int_compare:w \l__regex_group_level_int > 0 \exp_stop_f:
- \__tl_build_one:n { \if_false: { \fi: } }
- \__tl_build_end:
- \int_set_eq:NN \l__regex_catcodes_int \l__regex_default_catcodes_int
- \__tl_build_one:o \l__regex_internal_regex
- \exp_after:wN \__regex_compile_quantifier:w
- \else:
- \__msg_kernel_warning:nn { regex } { extra-rparen }
- \exp_after:wN \__regex_compile_raw:N \exp_after:wN )
- \fi:
- }
-\cs_new_protected:cpn { __regex_compile_(: }
- {
- \__regex_if_in_class:TF { \__regex_compile_raw:N ( }
- { \__regex_compile_lparen:w }
- }
-\cs_new_protected:Npn \__regex_compile_lparen:w #1#2#3#4
- {
- \str_if_eq:nnTF { #1 #2 } { \__regex_compile_special:N ? }
- {
- \cs_if_exist_use:cF
- { __regex_compile_special_group_\token_to_str:N #4 :w }
- {
- \__msg_kernel_warning:nnx { regex } { special-group-unknown }
- { (? #4 }
- \__regex_compile_group_begin:N \__regex_group:nnnN
- \__regex_compile_raw:N ? #3 #4
- }
- }
- {
- \__regex_compile_group_begin:N \__regex_group:nnnN
- #1 #2 #3 #4
- }
- }
-\cs_new_protected:cpn { __regex_compile_|: }
- {
- \__regex_if_in_class:TF { \__regex_compile_raw:N | }
- {
- \__tl_build_one:n
- { \if_false: { \fi: } \__regex_branch:n { \if_false: } \fi: }
- }
- }
-\cs_new_protected:cpn { __regex_compile_): }
- {
- \__regex_if_in_class:TF { \__regex_compile_raw:N ) }
- { \__regex_compile_group_end: }
- }
-\cs_new_protected:cpn { __regex_compile_special_group_::w }
- { \__regex_compile_group_begin:N \__regex_group_no_capture:nnnN }
-\cs_new_protected:cpn { __regex_compile_special_group_|:w }
- { \__regex_compile_group_begin:N \__regex_group_resetting:nnnN }
-\cs_new_protected:Npn \__regex_compile_special_group_i:w #1#2
- {
- \str_if_eq:nnTF { #1 #2 } { \__regex_compile_special:N ) }
- {
- \cs_set:Npn \__regex_item_equal:n { \__regex_item_caseless_equal:n }
- \cs_set:Npn \__regex_item_range:nn { \__regex_item_caseless_range:nn }
- }
- {
- \__msg_kernel_warning:nnx { regex } { unknown-option } { (?i #2 }
- \__regex_compile_raw:N (
- \__regex_compile_raw:N ?
- \__regex_compile_raw:N i
- #1 #2
- }
- }
-\cs_new_protected:cpn { __regex_compile_special_group_-:w } #1#2#3#4
- {
- \str_if_eq:nnTF { #1 #2 #3 #4 }
- { \__regex_compile_raw:N i \__regex_compile_special:N ) }
- {
- \cs_set:Npn \__regex_item_equal:n { \__regex_item_caseful_equal:n }
- \cs_set:Npn \__regex_item_range:nn { \__regex_item_caseful_range:nn }
- }
- {
- \__msg_kernel_warning:nnx { regex } { unknown-option } { (?-#2#4 }
- \__regex_compile_raw:N (
- \__regex_compile_raw:N ?
- \__regex_compile_raw:N -
- #1 #2 #3 #4
- }
- }
-\cs_new_protected:cpn { __regex_compile_/c: }
- { \__regex_chk_c_allowed:T { \__regex_compile_c_test:NN } }
-\cs_new_protected:Npn \__regex_compile_c_test:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_compile_raw:N
- {
- \int_if_exist:cTF { c__regex_catcode_#2_int }
- {
- \int_set_eq:Nc \l__regex_catcodes_int { c__regex_catcode_#2_int }
- \l__regex_mode_int
- = \if_case:w \l__regex_mode_int
- \c__regex_catcode_mode_int
- \else:
- \c__regex_catcode_in_class_mode_int
- \fi:
- }
- }
- { \cs_if_exist_use:cF { __regex_compile_c_#2:w } }
- {
- \__msg_kernel_error:nnx { regex } { c-missing-category } {#2}
- #1 #2
- }
- }
-\cs_new_protected:cpn { __regex_compile_c_[:w } #1#2
- {
- \l__regex_mode_int
- = \if_case:w \l__regex_mode_int
- \c__regex_catcode_mode_int
- \else:
- \c__regex_catcode_in_class_mode_int
- \fi:
- \int_zero:N \l__regex_catcodes_int
- \str_if_eq:nnTF { #1 #2 } { \__regex_compile_special:N ^ }
- {
- \bool_set_false:N \l__regex_catcodes_bool
- \__regex_compile_c_lbrack_loop:NN
- }
- {
- \bool_set_true:N \l__regex_catcodes_bool
- \__regex_compile_c_lbrack_loop:NN
- #1 #2
- }
- }
-\cs_new_protected:Npn \__regex_compile_c_lbrack_loop:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_compile_raw:N
- {
- \int_if_exist:cTF { c__regex_catcode_#2_int }
- {
- \exp_args:Nc \__regex_compile_c_lbrack_add:N
- { c__regex_catcode_#2_int }
- \__regex_compile_c_lbrack_loop:NN
- }
- }
- {
- \token_if_eq_charcode:NNTF #2 ]
- { \__regex_compile_c_lbrack_end: }
- }
- {
- \__msg_kernel_error:nnx { regex } { c-missing-rbrack } {#2}
- \__regex_compile_c_lbrack_end:
- #1 #2
- }
- }
-\cs_new_protected:Npn \__regex_compile_c_lbrack_add:N #1
- {
- \if_int_odd:w \__int_eval:w \l__regex_catcodes_int / #1 \__int_eval_end:
- \else:
- \int_add:Nn \l__regex_catcodes_int {#1}
- \fi:
- }
-\cs_new_protected:Npn \__regex_compile_c_lbrack_end:
- {
- \if_meaning:w \c_false_bool \l__regex_catcodes_bool
- \int_set:Nn \l__regex_catcodes_int
- { \c__regex_all_catcodes_int - \l__regex_catcodes_int }
- \fi:
- }
-\cs_new_protected:cpn { __regex_compile_c_ \c_left_brace_str :w }
- {
- \__regex_compile:w
- \__regex_disable_submatches:
- \l__regex_mode_int
- = \if_case:w \l__regex_mode_int
- \c__regex_cs_mode_int
- \else:
- \c__regex_cs_in_class_mode_int
- \fi:
- }
-\flag_new:n { __regex_cs }
-\cs_new_protected:cpn { __regex_compile_ \c_right_brace_str : }
- {
- \__regex_if_in_cs:TF
- { \__regex_compile_end_cs: }
- { \exp_after:wN \__regex_compile_raw:N \c_right_brace_str }
- }
-\cs_new_protected:Npn \__regex_compile_end_cs:
- {
- \__regex_compile_end:
- \flag_clear:n { __regex_cs }
- \tl_set:Nx \l__regex_internal_a_tl
- {
- \exp_after:wN \__regex_compile_cs_aux:Nn \l__regex_internal_regex
- \q_nil \q_nil \q_recursion_stop
- }
- \exp_args:Nx \__regex_compile_one:x
- {
- \flag_if_raised:nTF { __regex_cs }
- { \__regex_item_cs:n { \exp_not:o \l__regex_internal_regex } }
- { \__regex_item_exact_cs:n { \tl_tail:N \l__regex_internal_a_tl } }
- }
- }
-\cs_new:Npn \__regex_compile_cs_aux:Nn #1#2
- {
- \cs_if_eq:NNTF #1 \__regex_branch:n
- {
- \scan_stop:
- \__regex_compile_cs_aux:NNnnnN #2
- \q_nil \q_nil \q_nil \q_nil \q_nil \q_nil \q_recursion_stop
- \__regex_compile_cs_aux:Nn
- }
- {
- \quark_if_nil:NF #1 { \flag_raise:n { __regex_cs } }
- \use_none_delimit_by_q_recursion_stop:w
- }
- }
-\cs_new:Npn \__regex_compile_cs_aux:NNnnnN #1#2#3#4#5#6
- {
- \bool_lazy_all:nTF
- {
- { \cs_if_eq_p:NN #1 \__regex_class:NnnnN }
- {#2}
- { \tl_if_head_eq_meaning_p:nN {#3} \__regex_item_caseful_equal:n }
- { \int_compare_p:nNn { \tl_count:n {#3} } = { 2 } }
- { \int_compare_p:nNn {#5} = { 0 } }
- }
- {
- \prg_replicate:nn {#4}
- { \char_generate:nn { \use_ii:nn #3 } {12} }
- \__regex_compile_cs_aux:NNnnnN
- }
- {
- \quark_if_nil:NF #1
- {
- \flag_raise:n { __regex_cs }
- \use_i_delimit_by_q_recursion_stop:nw
- }
- \use_none_delimit_by_q_recursion_stop:w
- }
- }
-\cs_new_protected:cpn { __regex_compile_/u: } #1#2
- {
- \__regex_if_in_class_or_catcode:TF
- { \__regex_compile_raw_error:N u #1 #2 }
- {
- \str_if_eq_x:nnTF {#1#2} { \__regex_compile_special:N \c_left_brace_str }
- {
- \tl_set:Nx \l__regex_internal_a_tl { \if_false: } \fi:
- \__regex_compile_u_loop:NN
- }
- {
- \__msg_kernel_error:nn { regex } { u-missing-lbrace }
- \__regex_compile_raw:N u #1 #2
- }
- }
- }
-\cs_new:Npn \__regex_compile_u_loop:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_compile_raw:N
- { #2 \__regex_compile_u_loop:NN }
- {
- \token_if_eq_meaning:NNTF #1 \__regex_compile_special:N
- {
- \exp_after:wN \token_if_eq_charcode:NNTF \c_right_brace_str #2
- { \if_false: { \fi: } \__regex_compile_u_end: }
- { #2 \__regex_compile_u_loop:NN }
- }
- {
- \if_false: { \fi: }
- \__msg_kernel_error:nnx { regex } { u-missing-rbrace } {#2}
- \__regex_compile_u_end:
- #1 #2
- }
- }
- }
-\cs_new_protected:Npn \__regex_compile_u_end:
- {
- \tl_set:Nv \l__regex_internal_a_tl { \l__regex_internal_a_tl }
- \if_int_compare:w \l__regex_mode_int = \c__regex_outer_mode_int
- \__regex_compile_u_not_cs:
- \else:
- \__regex_compile_u_in_cs:
- \fi:
- }
-\cs_new_protected:Npn \__regex_compile_u_in_cs:
- {
- \tl_gset:Nx \g__regex_internal_tl
- { \exp_args:No \__str_to_other_fast:n { \l__regex_internal_a_tl } }
- \__tl_build_one:x
- {
- \tl_map_function:NN \g__regex_internal_tl
- \__regex_compile_u_in_cs_aux:n
- }
- }
-\cs_new:Npn \__regex_compile_u_in_cs_aux:n #1
- {
- \__regex_class:NnnnN \c_true_bool
- { \__regex_item_caseful_equal:n { \__int_value:w `#1 } }
- { 1 } { 0 } \c_false_bool
- }
-\cs_new_protected:Npn \__regex_compile_u_not_cs:
- {
- \exp_args:No \__tl_analysis_map_inline:nn { \l__regex_internal_a_tl }
- {
- \__tl_build_one:n
- {
- \__regex_class:NnnnN \c_true_bool
- {
- \if_int_compare:w "##2 = 0 \exp_stop_f:
- \__regex_item_exact_cs:n { \exp_after:wN \cs_to_str:N ##1 }
- \else:
- \__regex_item_exact:nn { \__int_value:w "##2 } { ##3 }
- \fi:
- }
- { 1 } { 0 } \c_false_bool
- }
- }
- }
-\cs_new_protected:cpn { __regex_compile_/K: }
- {
- \int_compare:nNnTF \l__regex_mode_int = \c__regex_outer_mode_int
- { \__tl_build_one:n { \__regex_command_K: } }
- { \__regex_compile_raw_error:N K }
- }
-\cs_new_protected:Npn \__regex_show:Nn #1#2
- {
- \__tl_build:Nw \l__regex_internal_a_tl
- \cs_set_protected:Npn \__regex_branch:n
- {
- \seq_pop_right:NN \l__regex_show_prefix_seq \l__regex_internal_a_tl
- \__regex_show_one:n { +-branch }
- \seq_put_right:No \l__regex_show_prefix_seq \l__regex_internal_a_tl
- \use:n
- }
- \cs_set_protected:Npn \__regex_group:nnnN
- { \__regex_show_group_aux:nnnnN { } }
- \cs_set_protected:Npn \__regex_group_no_capture:nnnN
- { \__regex_show_group_aux:nnnnN { ~(no~capture) } }
- \cs_set_protected:Npn \__regex_group_resetting:nnnN
- { \__regex_show_group_aux:nnnnN { ~(resetting) } }
- \cs_set_eq:NN \__regex_class:NnnnN \__regex_show_class:NnnnN
- \cs_set_protected:Npn \__regex_command_K:
- { \__regex_show_one:n { reset~match~start~(\iow_char:N\\K) } }
- \cs_set_protected:Npn \__regex_assertion:Nn ##1##2
- { \__regex_show_one:n { \bool_if:NF ##1 { negative~ } assertion:~##2 } }
- \cs_set:Npn \__regex_b_test: { word~boundary }
- \cs_set_eq:NN \__regex_anchor:N \__regex_show_anchor_to_str:N
- \cs_set_protected:Npn \__regex_item_caseful_equal:n ##1
- { \__regex_show_one:n { char~code~\int_eval:n{##1} } }
- \cs_set_protected:Npn \__regex_item_caseful_range:nn ##1##2
- { \__regex_show_one:n { range~[\int_eval:n{##1}, \int_eval:n{##2}] } }
- \cs_set_protected:Npn \__regex_item_caseless_equal:n ##1
- { \__regex_show_one:n { char~code~\int_eval:n{##1}~(caseless) } }
- \cs_set_protected:Npn \__regex_item_caseless_range:nn ##1##2
- {
- \__regex_show_one:n
- { Range~[\int_eval:n{##1}, \int_eval:n{##2}]~(caseless) }
- }
- \cs_set_protected:Npn \__regex_item_catcode:nT
- { \__regex_show_item_catcode:NnT \c_true_bool }
- \cs_set_protected:Npn \__regex_item_catcode_reverse:nT
- { \__regex_show_item_catcode:NnT \c_false_bool }
- \cs_set_protected:Npn \__regex_item_reverse:n
- { \__regex_show_scope:nn { Reversed~match } }
- \cs_set_protected:Npn \__regex_item_exact:nn ##1##2
- { \__regex_show_one:n { char~##2,~catcode~##1 } }
- \cs_set_eq:NN \__regex_item_exact_cs:n \__regex_show_item_exact_cs:n
- \cs_set_protected:Npn \__regex_item_cs:n
- { \__regex_show_scope:nn { control~sequence } }
- \cs_set:cpn { __regex_prop_.: } { \__regex_show_one:n { any~token } }
- \seq_clear:N \l__regex_show_prefix_seq
- \__regex_show_push:n { ~ }
- \cs_if_exist_use:N #1
- \__tl_build_end:
- \__msg_show_variable:NNNnn #1 \cs_if_exist:NTF ? { }
- { >~Compiled~regex~#2: \l__regex_internal_a_tl }
- }
-\cs_new_protected:Npn \__regex_show_one:n #1
- {
- \int_incr:N \l__regex_show_lines_int
- \__tl_build_one:x
- {
- \exp_not:N \\
- \seq_map_function:NN \l__regex_show_prefix_seq \use:n
- #1
- }
- }
-\cs_new_protected:Npn \__regex_show_push:n #1
- { \seq_put_right:Nx \l__regex_show_prefix_seq { #1 ~ } }
-\cs_new_protected:Npn \__regex_show_pop:
- { \seq_pop_right:NN \l__regex_show_prefix_seq \l__regex_internal_a_tl }
-\cs_new_protected:Npn \__regex_show_scope:nn #1#2
- {
- \__regex_show_one:n {#1}
- \__regex_show_push:n { ~ }
- #2
- \__regex_show_pop:
- }
-\cs_new_protected:Npn \__regex_show_group_aux:nnnnN #1#2#3#4#5
- {
- \__regex_show_one:n { ,-group~begin #1 }
- \__regex_show_push:n { | }
- \use_ii:nn #2
- \__regex_show_pop:
- \__regex_show_one:n
- { `-group~end \__regex_msg_repeated:nnN {#3} {#4} #5 }
- }
-\cs_set:Npn \__regex_show_class:NnnnN #1#2#3#4#5
- {
- \__tl_build:Nw \l__regex_internal_a_tl
- \int_zero:N \l__regex_show_lines_int
- \__regex_show_push:n {~}
- #2
- \exp_last_unbraced:Nf
- \int_case:nnF { \l__regex_show_lines_int }
- {
- {0}
- {
- \__tl_build_end:
- \__regex_show_one:n { \bool_if:NTF #1 { Fail } { Pass } }
- }
- {1}
- {
- \__tl_build_end:
- \bool_if:NTF #1
- {
- #2
- \__tl_build_one:n { \__regex_msg_repeated:nnN {#3} {#4} #5 }
- }
- {
- \__regex_show_one:n
- { Don't~match~\__regex_msg_repeated:nnN {#3} {#4} #5 }
- \__tl_build_one:o \l__regex_internal_a_tl
- }
- }
- }
- {
- \__tl_build_end:
- \__regex_show_one:n
- {
- \bool_if:NTF #1 { M } { Don't~m } atch
- \__regex_msg_repeated:nnN {#3} {#4} #5
- }
- \__tl_build_one:o \l__regex_internal_a_tl
- }
- }
-\cs_new:Npn \__regex_show_anchor_to_str:N #1
- {
- anchor~at~
- \str_case:nnF { #1 }
- {
- { \l__regex_min_pos_int } { start~(\iow_char:N\\A) }
- { \l__regex_start_pos_int } { start~of~match~(\iow_char:N\\G) }
- { \l__regex_max_pos_int } { end~(\iow_char:N\\Z) }
- }
- { <error:~'#1'~not~recognized> }
- }
-\cs_new_protected:Npn \__regex_show_item_catcode:NnT #1#2
- {
- \seq_set_split:Nnn \l__regex_internal_seq { } { CBEMTPUDSLOA }
- \seq_set_filter:NNn \l__regex_internal_seq \l__regex_internal_seq
- { \int_if_odd_p:n { #2 / \int_use:c { c__regex_catcode_##1_int } } }
- \__regex_show_scope:nn
- {
- categories~
- \seq_map_function:NN \l__regex_internal_seq \use:n
- , ~
- \bool_if:NF #1 { negative~ } class
- }
- }
-\cs_new_protected:Npn \__regex_show_item_exact_cs:n #1
- {
- \seq_set_split:Nnn \l__regex_internal_seq { \scan_stop: } {#1}
- \seq_set_map:NNn \l__regex_internal_seq
- \l__regex_internal_seq { \iow_char:N\\##1 }
- \__regex_show_one:n
- { control~sequence~ \seq_use:Nn \l__regex_internal_seq { ~or~ } }
- }
-\int_new:N \l__regex_min_state_int
-\int_set:Nn \l__regex_min_state_int { 1 }
-\int_new:N \l__regex_max_state_int
-\int_new:N \l__regex_left_state_int
-\int_new:N \l__regex_right_state_int
-\seq_new:N \l__regex_left_state_seq
-\seq_new:N \l__regex_right_state_seq
-\int_new:N \l__regex_capturing_group_int
-\cs_new_protected:Npn \__regex_build:n #1
- {
- \__regex_compile:n {#1}
- \__regex_build:N \l__regex_internal_regex
- }
-\cs_new_protected:Npn \__regex_build:N #1
- {
- \__regex_standard_escapechar:
- \int_zero:N \l__regex_capturing_group_int
- \int_set_eq:NN \l__regex_max_state_int \l__regex_min_state_int
- \__regex_build_new_state:
- \__regex_build_new_state:
- \__regex_toks_put_right:Nn \l__regex_left_state_int
- { \__regex_action_start_wildcard: }
- \__regex_group:nnnN {#1} { 1 } { 0 } \c_false_bool
- \__regex_toks_put_right:Nn \l__regex_right_state_int
- { \__regex_action_success: }
- }
-\cs_new_protected:Npn \__regex_build_for_cs:n #1
- {
- \int_set_eq:NN \l__regex_max_state_int \l__regex_min_state_int
- \__regex_build_new_state:
- \__regex_build_new_state:
- \__regex_push_lr_states:
- #1
- \__regex_pop_lr_states:
- \__regex_toks_put_right:Nn \l__regex_right_state_int
- {
- \if_int_compare:w \l__regex_current_pos_int = \l__regex_max_pos_int
- \exp_after:wN \__regex_action_success:
- \fi:
- }
- }
-\cs_new_protected:Npn \__regex_push_lr_states:
- {
- \seq_push:No \l__regex_left_state_seq
- { \int_use:N \l__regex_left_state_int }
- \seq_push:No \l__regex_right_state_seq
- { \int_use:N \l__regex_right_state_int }
- }
-\cs_new_protected:Npn \__regex_pop_lr_states:
- {
- \seq_pop:NN \l__regex_left_state_seq \l__regex_internal_a_tl
- \int_set:Nn \l__regex_left_state_int \l__regex_internal_a_tl
- \seq_pop:NN \l__regex_right_state_seq \l__regex_internal_a_tl
- \int_set:Nn \l__regex_right_state_int \l__regex_internal_a_tl
- }
-\cs_new_protected:Npn \__regex_build_transition_left:NNN #1#2#3
- { \__regex_toks_put_left:Nx #2 { #1 { \int_eval:n { #3 - #2 } } } }
-\cs_new_protected:Npn \__regex_build_transition_right:nNn #1#2#3
- { \__regex_toks_put_right:Nx #2 { #1 { \int_eval:n { #3 - #2 } } } }
-\cs_new_protected:Npn \__regex_build_new_state:
- {
- \__regex_toks_clear:N \l__regex_max_state_int
- \int_set_eq:NN \l__regex_left_state_int \l__regex_right_state_int
- \int_set_eq:NN \l__regex_right_state_int \l__regex_max_state_int
- \int_incr:N \l__regex_max_state_int
- }
-\cs_new_protected:Npn \__regex_build_transitions_lazyness:NNNNN #1#2#3#4#5
- {
- \__regex_build_new_state:
- \__regex_toks_put_right:Nx \l__regex_left_state_int
- {
- \if_meaning:w \c_true_bool #1
- #2 { \int_eval:n { #3 - \l__regex_left_state_int } }
- #4 { \int_eval:n { #5 - \l__regex_left_state_int } }
- \else:
- #4 { \int_eval:n { #5 - \l__regex_left_state_int } }
- #2 { \int_eval:n { #3 - \l__regex_left_state_int } }
- \fi:
- }
- }
-\cs_new_protected:Npn \__regex_class:NnnnN #1#2#3#4#5
- {
- \cs_set:Npx \__regex_tests_action_cost:n ##1
- {
- \exp_not:n { \exp_not:n {#2} }
- \bool_if:NTF #1
- { \__regex_break_point:TF { \__regex_action_cost:n {##1} } { } }
- { \__regex_break_point:TF { } { \__regex_action_cost:n {##1} } }
- }
- \if_case:w - #4 \exp_stop_f:
- \__regex_class_repeat:n {#3}
- \or: \__regex_class_repeat:nN {#3} #5
- \else: \__regex_class_repeat:nnN {#3} {#4} #5
- \fi:
- }
-\cs_new:Npn \__regex_tests_action_cost:n { \__regex_action_cost:n }
-\cs_new_protected:Npn \__regex_class_repeat:n #1
- {
- \prg_replicate:nn {#1}
- {
- \__regex_build_new_state:
- \__regex_build_transition_right:nNn \__regex_tests_action_cost:n
- \l__regex_left_state_int \l__regex_right_state_int
- }
- }
-\cs_new_protected:Npn \__regex_class_repeat:nN #1#2
- {
- \if_int_compare:w #1 = 0 \exp_stop_f:
- \__regex_build_transitions_lazyness:NNNNN #2
- \__regex_action_free:n \l__regex_right_state_int
- \__regex_tests_action_cost:n \l__regex_left_state_int
- \else:
- \__regex_class_repeat:n {#1}
- \int_set_eq:NN \l__regex_internal_a_int \l__regex_left_state_int
- \__regex_build_transitions_lazyness:NNNNN #2
- \__regex_action_free:n \l__regex_right_state_int
- \__regex_action_free:n \l__regex_internal_a_int
- \fi:
- }
-\cs_new_protected:Npn \__regex_class_repeat:nnN #1#2#3
- {
- \__regex_class_repeat:n {#1}
- \int_set:Nn \l__regex_internal_a_int
- { \l__regex_max_state_int + #2 - 1 }
- \prg_replicate:nn { #2 }
- {
- \__regex_build_transitions_lazyness:NNNNN #3
- \__regex_action_free:n \l__regex_internal_a_int
- \__regex_tests_action_cost:n \l__regex_right_state_int
- }
- }
-\cs_new_protected:Npn \__regex_group_aux:nnnnN #1#2#3#4#5
- {
- \if_int_compare:w #3 = 0 \exp_stop_f:
- \__regex_build_new_state:
- \__regex_build_transition_right:nNn \__regex_action_free_group:n
- \l__regex_left_state_int \l__regex_right_state_int
- \fi:
- \__regex_build_new_state:
- \__regex_push_lr_states:
- #2
- \__regex_pop_lr_states:
- \if_case:w - #4 \exp_stop_f:
- \__regex_group_repeat:nn {#1} {#3}
- \or: \__regex_group_repeat:nnN {#1} {#3} #5
- \else: \__regex_group_repeat:nnnN {#1} {#3} {#4} #5
- \fi:
- }
-\cs_new_protected:Npn \__regex_group:nnnN #1
- {
- \exp_args:No \__regex_group_aux:nnnnN
- { \int_use:N \l__regex_capturing_group_int }
- {
- \int_incr:N \l__regex_capturing_group_int
- #1
- }
- }
-\cs_new_protected:Npn \__regex_group_no_capture:nnnN
- { \__regex_group_aux:nnnnN { -1 } }
-\cs_new_protected:Npn \__regex_group_resetting:nnnN #1
- {
- \__regex_group_aux:nnnnN { -1 }
- {
- \exp_args:Noo \__regex_group_resetting_loop:nnNn
- { \int_use:N \l__regex_capturing_group_int }
- { \int_use:N \l__regex_capturing_group_int }
- #1
- { ?? \__prg_break:n } { }
- \__prg_break_point:
- }
- }
-\cs_new_protected:Npn \__regex_group_resetting_loop:nnNn #1#2#3#4
- {
- \use_none:nn #3 { \int_set:Nn \l__regex_capturing_group_int {#1} }
- \int_set:Nn \l__regex_capturing_group_int {#2}
- #3 {#4}
- \exp_args:Nf \__regex_group_resetting_loop:nnNn
- { \int_max:nn {#1} { \l__regex_capturing_group_int } }
- {#2}
- }
-\cs_new_protected:Npn \__regex_branch:n #1
- {
- \__regex_build_new_state:
- \seq_get:NN \l__regex_left_state_seq \l__regex_internal_a_tl
- \int_set:Nn \l__regex_left_state_int \l__regex_internal_a_tl
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_left_state_int \l__regex_right_state_int
- #1
- \seq_get:NN \l__regex_right_state_seq \l__regex_internal_a_tl
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_right_state_int \l__regex_internal_a_tl
- }
-\cs_new_protected:Npn \__regex_group_repeat:nn #1#2
- {
- \if_int_compare:w #2 = 0 \exp_stop_f:
- \int_set:Nn \l__regex_max_state_int
- { \l__regex_left_state_int - 1 }
- \__regex_build_new_state:
- \else:
- \__regex_group_repeat_aux:n {#2}
- \__regex_group_submatches:nNN {#1}
- \l__regex_internal_a_int \l__regex_right_state_int
- \__regex_build_new_state:
- \fi:
- }
-\cs_new_protected:Npn \__regex_group_submatches:nNN #1#2#3
- {
- \if_int_compare:w #1 > - 1 \exp_stop_f:
- \__regex_toks_put_left:Nx #2 { \__regex_action_submatch:n { #1 < } }
- \__regex_toks_put_left:Nx #3 { \__regex_action_submatch:n { #1 > } }
- \fi:
- }
-\cs_new_protected:Npn \__regex_group_repeat_aux:n #1
- {
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_right_state_int \l__regex_max_state_int
- \int_set_eq:NN \l__regex_internal_a_int \l__regex_left_state_int
- \int_set_eq:NN \l__regex_internal_b_int \l__regex_max_state_int
- \if_int_compare:w \__int_eval:w #1 > 1 \exp_stop_f:
- \int_set:Nn \l__regex_internal_c_int
- {
- ( #1 - 1 )
- * ( \l__regex_internal_b_int - \l__regex_internal_a_int )
- }
- \int_add:Nn \l__regex_right_state_int { \l__regex_internal_c_int }
- \int_add:Nn \l__regex_max_state_int { \l__regex_internal_c_int }
- \__regex_toks_memcpy:NNn
- \l__regex_internal_b_int
- \l__regex_internal_a_int
- \l__regex_internal_c_int
- \fi:
- }
-\cs_new_protected:Npn \__regex_group_repeat:nnN #1#2#3
- {
- \if_int_compare:w #2 = 0 \exp_stop_f:
- \__regex_group_submatches:nNN {#1}
- \l__regex_left_state_int \l__regex_right_state_int
- \int_set:Nn \l__regex_internal_a_int
- { \l__regex_left_state_int - 1 }
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_right_state_int \l__regex_internal_a_int
- \__regex_build_new_state:
- \if_meaning:w \c_true_bool #3
- \__regex_build_transition_left:NNN \__regex_action_free:n
- \l__regex_internal_a_int \l__regex_right_state_int
- \else:
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_internal_a_int \l__regex_right_state_int
- \fi:
- \else:
- \__regex_group_repeat_aux:n {#2}
- \__regex_group_submatches:nNN {#1}
- \l__regex_internal_a_int \l__regex_right_state_int
- \if_meaning:w \c_true_bool #3
- \__regex_build_transition_right:nNn \__regex_action_free_group:n
- \l__regex_right_state_int \l__regex_internal_a_int
- \else:
- \__regex_build_transition_left:NNN \__regex_action_free_group:n
- \l__regex_right_state_int \l__regex_internal_a_int
- \fi:
- \__regex_build_new_state:
- \fi:
- }
-\cs_new_protected:Npn \__regex_group_repeat:nnnN #1#2#3#4
- {
- \__regex_group_submatches:nNN {#1}
- \l__regex_left_state_int \l__regex_right_state_int
- \__regex_group_repeat_aux:n { #2 + #3 }
- \if_meaning:w \c_true_bool #4
- \int_set_eq:NN \l__regex_left_state_int \l__regex_max_state_int
- \prg_replicate:nn { #3 }
- {
- \int_sub:Nn \l__regex_left_state_int
- { \l__regex_internal_b_int - \l__regex_internal_a_int }
- \__regex_build_transition_left:NNN \__regex_action_free:n
- \l__regex_left_state_int \l__regex_max_state_int
- }
- \else:
- \prg_replicate:nn { #3 - 1 }
- {
- \int_sub:Nn \l__regex_right_state_int
- { \l__regex_internal_b_int - \l__regex_internal_a_int }
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_right_state_int \l__regex_max_state_int
- }
- \if_int_compare:w #2 = 0 \exp_stop_f:
- \int_set:Nn \l__regex_right_state_int
- { \l__regex_left_state_int - 1 }
- \else:
- \int_sub:Nn \l__regex_right_state_int
- { \l__regex_internal_b_int - \l__regex_internal_a_int }
- \fi:
- \__regex_build_transition_right:nNn \__regex_action_free:n
- \l__regex_right_state_int \l__regex_max_state_int
- \fi:
- \__regex_build_new_state:
- }
-\cs_new_protected:Npn \__regex_assertion:Nn #1#2
- {
- \__regex_build_new_state:
- \__regex_toks_put_right:Nx \l__regex_left_state_int
- {
- \exp_not:n {#2}
- \__regex_break_point:TF
- \bool_if:NF #1 { { } }
- {
- \__regex_action_free:n
- {
- \int_eval:n
- { \l__regex_right_state_int - \l__regex_left_state_int }
- }
- }
- \bool_if:NT #1 { { } }
- }
- }
-\cs_new_protected:Npn \__regex_anchor:N #1
- {
- \if_int_compare:w #1 = \l__regex_current_pos_int
- \exp_after:wN \__regex_break_true:w
- \fi:
- }
-\cs_new_protected:Npn \__regex_b_test:
- {
- \group_begin:
- \int_set_eq:NN \l__regex_current_char_int \l__regex_last_char_int
- \__regex_prop_w:
- \__regex_break_point:TF
- { \group_end: \__regex_item_reverse:n \__regex_prop_w: }
- { \group_end: \__regex_prop_w: }
- }
-\cs_new_protected:Npn \__regex_command_K:
- {
- \__regex_build_new_state:
- \__regex_toks_put_right:Nx \l__regex_left_state_int
- {
- \__regex_action_submatch:n { 0< }
- \bool_set_true:N \l__regex_fresh_thread_bool
- \__regex_action_free:n
- { \int_eval:n { \l__regex_right_state_int - \l__regex_left_state_int } }
- \bool_set_false:N \l__regex_fresh_thread_bool
- }
- }
-\int_new:N \l__regex_min_pos_int
-\int_new:N \l__regex_max_pos_int
-\int_new:N \l__regex_current_pos_int
-\int_new:N \l__regex_start_pos_int
-\int_new:N \l__regex_success_pos_int
-\int_new:N \l__regex_current_char_int
-\int_new:N \l__regex_current_catcode_int
-\int_new:N \l__regex_last_char_int
-\int_new:N \l__regex_case_changed_char_int
-\int_new:N \l__regex_current_state_int
-\prop_new:N \l__regex_current_submatches_prop
-\prop_new:N \l__regex_success_submatches_prop
-\int_new:N \l__regex_step_int
-\int_new:N \l__regex_min_active_int
-\int_new:N \l__regex_max_active_int
-\__intarray_new:Nn \g__regex_state_active_intarray { 65536 }
-\__intarray_new:Nn \g__regex_thread_state_intarray { 65536 }
-\tl_new:N \l__regex_every_match_tl
-\bool_new:N \l__regex_fresh_thread_bool
-\bool_new:N \l__regex_empty_success_bool
-\cs_new_eq:NN \__regex_if_two_empty_matches:F \use:n
-\bool_new:N \g__regex_success_bool
-\bool_new:N \l__regex_saved_success_bool
-\bool_new:N \l__regex_match_success_bool
-\cs_new_protected:Npn \__regex_match:n #1
- {
- \int_zero:N \l__regex_balance_int
- \int_set:Nn \l__regex_current_pos_int { 2 * \l__regex_max_state_int }
- \__regex_query_set:nnn { } { -1 } { -2 }
- \int_set_eq:NN \l__regex_min_pos_int \l__regex_current_pos_int
- \__tl_analysis_map_inline:nn {#1}
- { \__regex_query_set:nnn {##1} {"##2} {##3} }
- \int_set_eq:NN \l__regex_max_pos_int \l__regex_current_pos_int
- \__regex_query_set:nnn { } { -1 } { -2 }
- \bool_gset_false:N \g__regex_success_bool
- \int_step_inline:nnnn
- \l__regex_min_state_int { 1 } { \l__regex_max_state_int - 1 }
- { \__intarray_gset_fast:Nnn \g__regex_state_active_intarray {##1} { 1 } }
- \int_set_eq:NN \l__regex_min_active_int \l__regex_max_state_int
- \int_zero:N \l__regex_step_int
- \int_set_eq:NN \l__regex_success_pos_int \l__regex_min_pos_int
- \int_set:Nn \l__regex_min_submatch_int
- { 2 * \l__regex_max_state_int }
- \int_set_eq:NN \l__regex_submatch_int \l__regex_min_submatch_int
- \bool_set_false:N \l__regex_empty_success_bool
- \__regex_match_once:
- }
-\cs_new_protected:Npn \__regex_match_once:
- {
- \if_meaning:w \c_true_bool \l__regex_empty_success_bool
- \cs_set:Npn \__regex_if_two_empty_matches:F
- { \int_compare:nNnF \l__regex_start_pos_int = \l__regex_current_pos_int }
- \else:
- \cs_set_eq:NN \__regex_if_two_empty_matches:F \use:n
- \fi:
- \int_set_eq:NN \l__regex_start_pos_int \l__regex_success_pos_int
- \bool_set_false:N \l__regex_match_success_bool
- \prop_clear:N \l__regex_current_submatches_prop
- \int_set_eq:NN \l__regex_max_active_int \l__regex_min_active_int
- \__regex_store_state:n { \l__regex_min_state_int }
- \int_set:Nn \l__regex_current_pos_int
- { \l__regex_start_pos_int - 1 }
- \__regex_query_get:
- \__regex_match_loop:
- \l__regex_every_match_tl
- }
-\cs_new_protected:Npn \__regex_single_match:
- {
- \tl_set:Nn \l__regex_every_match_tl
- { \bool_gset_eq:NN \g__regex_success_bool \l__regex_match_success_bool }
- }
-\cs_new_protected:Npn \__regex_multi_match:n #1
- {
- \tl_set:Nn \l__regex_every_match_tl
- {
- \if_meaning:w \c_true_bool \l__regex_match_success_bool
- \bool_gset_true:N \g__regex_success_bool
- #1
- \exp_after:wN \__regex_match_once:
- \fi:
- }
- }
-\cs_new_protected:Npn \__regex_match_loop:
- {
- \int_add:Nn \l__regex_step_int { 2 }
- \int_incr:N \l__regex_current_pos_int
- \int_set_eq:NN \l__regex_last_char_int \l__regex_current_char_int
- \int_set_eq:NN \l__regex_case_changed_char_int \c_max_int
- \__regex_query_get:
- \use:x
- {
- \int_set_eq:NN \l__regex_max_active_int \l__regex_min_active_int
- \int_step_function:nnnN
- { \l__regex_min_active_int }
- { 1 }
- { \l__regex_max_active_int - 1 }
- \__regex_match_one_active:n
- }
- \__prg_break_point:
- \bool_set_false:N \l__regex_fresh_thread_bool %^^A was arg of break_point:n
- \if_int_compare:w \l__regex_max_active_int > \l__regex_min_active_int
- \if_int_compare:w \l__regex_current_pos_int < \l__regex_max_pos_int
- \exp_after:wN \exp_after:wN \exp_after:wN \__regex_match_loop:
- \fi:
- \fi:
- }
-\cs_new:Npn \__regex_match_one_active:n #1
- {
- \__regex_use_state_and_submatches:nn
- { \__intarray_item_fast:Nn \g__regex_thread_state_intarray {#1} }
- { \__regex_toks_use:w #1 }
- }
-\cs_new_protected:Npn \__regex_query_set:nnn #1#2#3
- {
- \__intarray_gset_fast:Nnn \g__regex_charcode_intarray
- { \l__regex_current_pos_int } {#3}
- \__intarray_gset_fast:Nnn \g__regex_catcode_intarray
- { \l__regex_current_pos_int } {#2}
- \__intarray_gset_fast:Nnn \g__regex_balance_intarray
- { \l__regex_current_pos_int } { \l__regex_balance_int }
- \__regex_toks_set:Nn \l__regex_current_pos_int {#1}
- \int_incr:N \l__regex_current_pos_int
- \if_case:w #2 \exp_stop_f:
- \or: \int_incr:N \l__regex_balance_int
- \or: \int_decr:N \l__regex_balance_int
- \fi:
- }
-\cs_new_protected:Npn \__regex_query_get:
- {
- \l__regex_current_char_int
- = \__intarray_item_fast:Nn \g__regex_charcode_intarray
- { \l__regex_current_pos_int } \scan_stop:
- \l__regex_current_catcode_int
- = \__intarray_item_fast:Nn \g__regex_catcode_intarray
- { \l__regex_current_pos_int } \scan_stop:
- }
-\cs_new_protected:Npn \__regex_use_state:
- {
- \__intarray_gset_fast:Nnn \g__regex_state_active_intarray
- { \l__regex_current_state_int } { \l__regex_step_int }
- \__regex_toks_use:w \l__regex_current_state_int
- \__intarray_gset_fast:Nnn \g__regex_state_active_intarray
- { \l__regex_current_state_int } { \l__regex_step_int + 1 }
- }
-\cs_new_protected:Npn \__regex_use_state_and_submatches:nn #1 #2
- {
- \int_set:Nn \l__regex_current_state_int {#1}
- \if_int_compare:w
- \__intarray_item_fast:Nn \g__regex_state_active_intarray
- { \l__regex_current_state_int }
- < \l__regex_step_int
- \tl_set:Nn \l__regex_current_submatches_prop {#2}
- \exp_after:wN \__regex_use_state:
- \fi:
- \scan_stop:
- }
-\cs_new_protected:Npn \__regex_action_start_wildcard:
- {
- \bool_set_true:N \l__regex_fresh_thread_bool
- \__regex_action_free:n {1}
- \bool_set_false:N \l__regex_fresh_thread_bool
- \__regex_action_cost:n {0}
- }
-\cs_new_protected:Npn \__regex_action_free:n
- { \__regex_action_free_aux:nn { > \l__regex_step_int \else: } }
-\cs_new_protected:Npn \__regex_action_free_group:n
- { \__regex_action_free_aux:nn { < \l__regex_step_int } }
-\cs_new_protected:Npn \__regex_action_free_aux:nn #1#2
- {
- \use:x
- {
- \int_add:Nn \l__regex_current_state_int {#2}
- \exp_not:n
- {
- \if_int_compare:w
- \__intarray_item_fast:Nn \g__regex_state_active_intarray
- { \l__regex_current_state_int }
- #1
- \exp_after:wN \__regex_use_state:
- \fi:
- }
- \int_set:Nn \l__regex_current_state_int
- { \int_use:N \l__regex_current_state_int }
- \tl_set:Nn \exp_not:N \l__regex_current_submatches_prop
- { \exp_not:o \l__regex_current_submatches_prop }
- }
- }
-\cs_new_protected:Npn \__regex_action_cost:n #1
- {
- \exp_args:No \__regex_store_state:n
- { \__int_value:w \__int_eval:w \l__regex_current_state_int + #1 }
- }
-\cs_new_protected:Npn \__regex_store_state:n #1
- {
- \__regex_store_submatches:
- \__intarray_gset_fast:Nnn \g__regex_thread_state_intarray
- { \l__regex_max_active_int } {#1}
- \int_incr:N \l__regex_max_active_int
- }
-\cs_new_protected:Npn \__regex_store_submatches:
- {
- \__regex_toks_set:No \l__regex_max_active_int
- { \l__regex_current_submatches_prop }
- }
-\cs_new_protected:Npn \__regex_disable_submatches:
- {
- \cs_set_protected:Npn \__regex_store_submatches: { }
- \cs_set_protected:Npn \__regex_action_submatch:n ##1 { }
- }
-\cs_new_protected:Npn \__regex_action_submatch:n #1
- {
- \prop_put:Nno \l__regex_current_submatches_prop {#1}
- { \int_use:N \l__regex_current_pos_int }
- }
-\cs_new_protected:Npn \__regex_action_success:
- {
- \__regex_if_two_empty_matches:F
- {
- \bool_set_true:N \l__regex_match_success_bool
- \bool_set_eq:NN \l__regex_empty_success_bool
- \l__regex_fresh_thread_bool
- \int_set_eq:NN \l__regex_success_pos_int \l__regex_current_pos_int
- \prop_set_eq:NN \l__regex_success_submatches_prop
- \l__regex_current_submatches_prop
- \__prg_break:
- }
- }
-\int_new:N \l__regex_replacement_csnames_int
-\tl_new:N \l__regex_replacement_category_tl
-\seq_new:N \l__regex_replacement_category_seq
-\tl_new:N \l__regex_balance_tl
-\cs_new:Npn \__regex_replacement_balance_one_match:n #1
- { - \__regex_submatch_balance:n {#1} }
-\cs_new:Npn \__regex_replacement_do_one_match:n #1
- {
- \__regex_query_range:nn
- { \__intarray_item_fast:Nn \g__regex_submatch_prev_intarray {#1} }
- { \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray {#1} }
- }
-\cs_new:Npn \__regex_replacement_exp_not:N #1 { \exp_not:n {#1} }
-\cs_new:Npn \__regex_query_range:nn #1#2
- {
- \exp_after:wN \__regex_query_range_loop:ww
- \__int_value:w \__int_eval:w #1 \exp_after:wN ;
- \__int_value:w \__int_eval:w #2 ;
- \__prg_break_point:
- }
-\cs_new:Npn \__regex_query_range_loop:ww #1 ; #2 ;
- {
- \if_int_compare:w #1 < #2 \exp_stop_f:
- \else:
- \exp_after:wN \__prg_break:
- \fi:
- \__regex_toks_use:w #1 \exp_stop_f:
- \exp_after:wN \__regex_query_range_loop:ww
- \__int_value:w \__int_eval:w #1 + 1 ; #2 ;
- }
-\cs_new:Npn \__regex_query_submatch:n #1
- {
- \__regex_query_range:nn
- { \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray {#1} }
- { \__intarray_item_fast:Nn \g__regex_submatch_end_intarray {#1} }
- }
-\cs_new_protected:Npn \__regex_submatch_balance:n #1
- {
- \__int_eval:w
- \int_compare:nNnTF
- { \__intarray_item_fast:Nn \g__regex_submatch_end_intarray {#1} } = 0
- { 0 }
- {
- \__intarray_item_fast:Nn \g__regex_balance_intarray
- { \__intarray_item_fast:Nn \g__regex_submatch_end_intarray {#1} }
- }
- -
- \int_compare:nNnTF
- { \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray {#1} } = 0
- { 0 }
- {
- \__intarray_item_fast:Nn \g__regex_balance_intarray
- { \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray {#1} }
- }
- \__int_eval_end:
- }
-\cs_new_protected:Npn \__regex_replacement:n #1
- {
- \__tl_build:Nw \l__regex_internal_a_tl
- \int_zero:N \l__regex_balance_int
- \tl_clear:N \l__regex_balance_tl
- \__regex_escape_use:nnnn
- {
- \if_charcode:w \c_right_brace_str ##1
- \__regex_replacement_rbrace:N
- \else:
- \__regex_replacement_normal:n
- \fi:
- ##1
- }
- { \__regex_replacement_escaped:N ##1 }
- { \__regex_replacement_normal:n ##1 }
- {#1}
- \prg_do_nothing: \prg_do_nothing:
- \if_int_compare:w \l__regex_replacement_csnames_int > 0 \exp_stop_f:
- \__msg_kernel_error:nnx { regex } { replacement-missing-rbrace }
- { \int_use:N \l__regex_replacement_csnames_int }
- \__tl_build_one:x
- { \prg_replicate:nn \l__regex_replacement_csnames_int \cs_end: }
- \fi:
- \seq_if_empty:NF \l__regex_replacement_category_seq
- {
- \__msg_kernel_error:nnx { regex } { replacement-missing-rparen }
- { \seq_count:N \l__regex_replacement_category_seq }
- \seq_clear:N \l__regex_replacement_category_seq
- }
- \cs_gset:Npx \__regex_replacement_balance_one_match:n ##1
- {
- + \int_use:N \l__regex_balance_int
- \l__regex_balance_tl
- - \__regex_submatch_balance:n {##1}
- }
- \__tl_build_end:
- \exp_args:No \__regex_replacement_aux:n \l__regex_internal_a_tl
- }
-\cs_new_protected:Npn \__regex_replacement_aux:n #1
- {
- \cs_set:Npn \__regex_replacement_do_one_match:n ##1
- {
- \__regex_query_range:nn
- { \__intarray_item_fast:Nn \g__regex_submatch_prev_intarray {##1} }
- { \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray {##1} }
- #1
- }
- }
-\cs_new_protected:Npn \__regex_replacement_normal:n #1
- {
- \tl_if_empty:NTF \l__regex_replacement_category_tl
- { \__tl_build_one:n {#1} }
- { % (
- \token_if_eq_charcode:NNTF #1 )
- {
- \seq_pop:NN \l__regex_replacement_category_seq
- \l__regex_replacement_category_tl
- }
- {
- \use:c { __regex_replacement_c_ \l__regex_replacement_category_tl :w }
- \__regex_replacement_normal:n {#1}
- }
- }
- }
-\cs_new_protected:Npn \__regex_replacement_escaped:N #1
- {
- \cs_if_exist_use:cF { __regex_replacement_#1:w }
- {
- \if_int_compare:w 1 < 1#1 \exp_stop_f:
- \__regex_replacement_put_submatch:n {#1}
- \else:
- \exp_args:No \__regex_replacement_normal:n
- { \token_to_str:N #1 }
- \fi:
- }
- }
-\cs_new_protected:Npn \__regex_replacement_put_submatch:n #1
- {
- \if_int_compare:w #1 < \l__regex_capturing_group_int
- \__tl_build_one:n { \__regex_query_submatch:n { #1 + ##1 } }
- \if_int_compare:w \l__regex_replacement_csnames_int = 0 \exp_stop_f:
- \tl_put_right:Nn \l__regex_balance_tl
- { + \__regex_submatch_balance:n { \__int_eval:w #1+##1 \__int_eval_end: } }
- \fi:
- \fi:
- }
-\cs_new_protected:Npn \__regex_replacement_g:w #1#2
- {
- \str_if_eq_x:nnTF { #1#2 } { \__regex_replacement_normal:n \c_left_brace_str }
- { \l__regex_internal_a_int = \__regex_replacement_g_digits:NN }
- { \__regex_replacement_error:NNN g #1 #2 }
- }
-\cs_new:Npn \__regex_replacement_g_digits:NN #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_replacement_normal:n
- {
- \if_int_compare:w 1 < 1#2 \exp_stop_f:
- #2
- \exp_after:wN \use_i:nnn
- \exp_after:wN \__regex_replacement_g_digits:NN
- \else:
- \exp_stop_f:
- \exp_after:wN \__regex_replacement_error:NNN
- \exp_after:wN g
- \fi:
- }
- {
- \exp_stop_f:
- \if_meaning:w \__regex_replacement_rbrace:N #1
- \exp_args:No \__regex_replacement_put_submatch:n
- { \int_use:N \l__regex_internal_a_int }
- \exp_after:wN \use_none:nn
- \else:
- \exp_after:wN \__regex_replacement_error:NNN
- \exp_after:wN g
- \fi:
- }
- #1 #2
- }
-\cs_new_protected:Npn \__regex_replacement_c:w #1#2
- {
- \token_if_eq_meaning:NNTF #1 \__regex_replacement_normal:n
- {
- \exp_after:wN \token_if_eq_charcode:NNTF \c_left_brace_str #2
- { \__regex_replacement_cu_aux:Nw \__regex_replacement_exp_not:N }
- {
- \cs_if_exist:cTF { __regex_replacement_c_#2:w }
- { \__regex_replacement_cat:NNN #2 }
- { \__regex_replacement_error:NNN c #1#2 }
- }
- }
- { \__regex_replacement_error:NNN c #1#2 }
- }
-\cs_new_protected:Npn \__regex_replacement_cu_aux:Nw #1
- {
- \if_case:w \l__regex_replacement_csnames_int
- \__tl_build_one:n { \exp_not:n { \exp_after:wN #1 \cs:w } }
- \else:
- \__tl_build_one:n { \exp_not:n { \exp_after:wN \tl_to_str:V \cs:w } }
- \fi:
- \int_incr:N \l__regex_replacement_csnames_int
- }
-\cs_new_protected:Npn \__regex_replacement_u:w #1#2
- {
- \str_if_eq_x:nnTF { #1#2 } { \__regex_replacement_normal:n \c_left_brace_str }
- { \__regex_replacement_cu_aux:Nw \exp_not:V }
- { \__regex_replacement_error:NNN u #1#2 }
- }
-\cs_new_protected:Npn \__regex_replacement_rbrace:N #1
- {
- \if_int_compare:w \l__regex_replacement_csnames_int > 0 \exp_stop_f:
- \__tl_build_one:n \cs_end:
- \int_decr:N \l__regex_replacement_csnames_int
- \else:
- \__regex_replacement_normal:n {#1}
- \fi:
- }
-\cs_new_protected:Npn \__regex_replacement_cat:NNN #1#2#3
- {
- \token_if_eq_meaning:NNTF \prg_do_nothing: #3
- { \__msg_kernel_error:nn { regex } { replacement-catcode-end } }
- {
- \int_compare:nNnTF { \l__regex_replacement_csnames_int } > 0
- {
- \__msg_kernel_error:nnnn
- { regex } { replacement-catcode-in-cs } {#1} {#3}
- #2 #3
- }
- {
- \str_if_eq:nnTF { #2 #3 } { \__regex_replacement_normal:n ( } % )
- {
- \seq_push:NV \l__regex_replacement_category_seq
- \l__regex_replacement_category_tl
- \tl_set:Nn \l__regex_replacement_category_tl {#1}
- }
- { \use:c { __regex_replacement_c_#1:w } #2 #3 }
- }
- }
- }
-\group_begin:
- \cs_new_protected:Npn \__regex_replacement_char:nNN #1#2#3
- {
- \tex_lccode:D 0 = `#3 \scan_stop:
- \tex_lowercase:D { \__tl_build_one:n {#1} }
- }
- \char_set_catcode_active:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_A:w
- { \__regex_replacement_char:nNN { \exp_not:n { \exp_not:N ^^@ } } }
- \char_set_catcode_group_begin:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_B:w
- {
- \if_int_compare:w \l__regex_replacement_csnames_int = 0 \exp_stop_f:
- \int_incr:N \l__regex_balance_int
- \fi:
- \__regex_replacement_char:nNN
- { \exp_not:n { \exp_after:wN ^^@ \if_false: } \fi: } }
- }
- \cs_new_protected:Npn \__regex_replacement_c_C:w #1#2
- { \__tl_build_one:n { \exp_not:N \exp_not:N \exp_not:c {#2} } }
- \char_set_catcode_math_subscript:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_D:w
- { \__regex_replacement_char:nNN { ^^@ } }
- \char_set_catcode_group_end:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_E:w
- {
- \if_int_compare:w \l__regex_replacement_csnames_int = 0 \exp_stop_f:
- \int_decr:N \l__regex_balance_int
- \fi:
- \__regex_replacement_char:nNN
- { \exp_not:n { \if_false: { \fi: ^^@ } }
- }
- \char_set_catcode_letter:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_L:w
- { \__regex_replacement_char:nNN { ^^@ } }
- \char_set_catcode_math_toggle:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_M:w
- { \__regex_replacement_char:nNN { ^^@ } }
- \char_set_catcode_other:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_O:w
- { \__regex_replacement_char:nNN { ^^@ } }
- \char_set_catcode_parameter:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_P:w
- {
- \__regex_replacement_char:nNN
- { \exp_not:n { \exp_not:n { ^^@^^@^^@^^@ } } }
- }
- \cs_new_protected:Npn \__regex_replacement_c_S:w #1#2
- {
- \if_int_compare:w `#2 = 0 \exp_stop_f:
- \__msg_kernel_error:nn { regex } { replacement-null-space }
- \fi:
- \tex_lccode:D `\ = `#2 \scan_stop:
- \tex_lowercase:D { \__tl_build_one:n {~} }
- }
- \char_set_catcode_alignment:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_T:w
- { \__regex_replacement_char:nNN { ^^@ } }
- \char_set_catcode_math_superscript:N \^^@
- \cs_new_protected:Npn \__regex_replacement_c_U:w
- { \__regex_replacement_char:nNN { ^^@ } }
-\group_end:
-\cs_new_protected:Npn \__regex_replacement_error:NNN #1#2#3
- {
- \__msg_kernel_error:nnx { regex } { replacement-#1 } {#3}
- #2 #3
- }
-\cs_new_protected:Npn \regex_new:N #1
- { \cs_new_eq:NN #1 \c__regex_no_match_regex }
-\cs_new_protected:Npn \regex_set:Nn #1#2
- {
- \__regex_compile:n {#2}
- \tl_set_eq:NN #1 \l__regex_internal_regex
- }
-\cs_new_protected:Npn \regex_gset:Nn #1#2
- {
- \__regex_compile:n {#2}
- \tl_gset_eq:NN #1 \l__regex_internal_regex
- }
-\cs_new_protected:Npn \regex_const:Nn #1#2
- {
- \__regex_compile:n {#2}
- \tl_const:Nx #1 { \exp_not:o \l__regex_internal_regex }
- }
-\cs_new_protected:Npn \regex_show:n #1
- {
- \__regex_compile:n {#1}
- \__regex_show:Nn \l__regex_internal_regex
- { { \tl_to_str:n {#1} } }
- }
-\cs_new_protected:Npn \regex_show:N #1
- { \__regex_show:Nn #1 { variable~\token_to_str:N #1 } }
-\prg_new_protected_conditional:Npnn \regex_match:nn #1#2 { T , F , TF }
- {
- \__regex_if_match:nn { \__regex_build:n {#1} } {#2}
- \__regex_return:
- }
-\prg_new_protected_conditional:Npnn \regex_match:Nn #1#2 { T , F , TF }
- {
- \__regex_if_match:nn { \__regex_build:N #1 } {#2}
- \__regex_return:
- }
-\cs_new_protected:Npn \regex_count:nnN #1
- { \__regex_count:nnN { \__regex_build:n {#1} } }
-\cs_new_protected:Npn \regex_count:NnN #1
- { \__regex_count:nnN { \__regex_build:N #1 } }
-\cs_set_protected:Npn \__regex_tmp:w #1#2#3
- {
- \cs_new_protected:Npn #2 ##1 { #1 { \__regex_build:n {##1} } }
- \cs_new_protected:Npn #3 ##1 { #1 { \__regex_build:N ##1 } }
- \prg_new_protected_conditional:Npnn #2 ##1##2##3 { T , F , TF }
- { #1 { \__regex_build:n {##1} } {##2} ##3 \__regex_return: }
- \prg_new_protected_conditional:Npnn #3 ##1##2##3 { T , F , TF }
- { #1 { \__regex_build:N ##1 } {##2} ##3 \__regex_return: }
- }
-\__regex_tmp:w \__regex_extract_once:nnN
- \regex_extract_once:nnN \regex_extract_once:NnN
-\__regex_tmp:w \__regex_extract_all:nnN
- \regex_extract_all:nnN \regex_extract_all:NnN
-\__regex_tmp:w \__regex_replace_once:nnN
- \regex_replace_once:nnN \regex_replace_once:NnN
-\__regex_tmp:w \__regex_replace_all:nnN
- \regex_replace_all:nnN \regex_replace_all:NnN
-\__regex_tmp:w \__regex_split:nnN \regex_split:nnN \regex_split:NnN
-\int_new:N \l__regex_match_count_int
-\flag_new:n { __regex_begin }
-\flag_new:n { __regex_end }
-\int_new:N \l__regex_min_submatch_int
-\int_new:N \l__regex_submatch_int
-\int_new:N \l__regex_zeroth_submatch_int
-\__intarray_new:Nn \g__regex_submatch_prev_intarray { 65536 }
-\__intarray_new:Nn \g__regex_submatch_begin_intarray { 65536 }
-\__intarray_new:Nn \g__regex_submatch_end_intarray { 65536 }
-\cs_new_protected:Npn \__regex_return:
- {
- \if_meaning:w \c_true_bool \g__regex_success_bool
- \prg_return_true:
- \else:
- \prg_return_false:
- \fi:
- }
-\cs_new_protected:Npn \__regex_if_match:nn #1#2
- {
- \group_begin:
- \__regex_disable_submatches:
- \__regex_single_match:
- #1
- \__regex_match:n {#2}
- \group_end:
- }
-\cs_new_protected:Npn \__regex_count:nnN #1#2#3
- {
- \group_begin:
- \__regex_disable_submatches:
- \int_zero:N \l__regex_match_count_int
- \__regex_multi_match:n { \int_incr:N \l__regex_match_count_int }
- #1
- \__regex_match:n {#2}
- \exp_args:NNNo
- \group_end:
- \int_set:Nn #3 { \int_use:N \l__regex_match_count_int }
- }
-\cs_new_protected:Npn \__regex_extract_once:nnN #1#2#3
- {
- \group_begin:
- \__regex_single_match:
- #1
- \__regex_match:n {#2}
- \__regex_extract:
- \__regex_group_end_extract_seq:N #3
- }
-\cs_new_protected:Npn \__regex_extract_all:nnN #1#2#3
- {
- \group_begin:
- \__regex_multi_match:n { \__regex_extract: }
- #1
- \__regex_match:n {#2}
- \__regex_group_end_extract_seq:N #3
- }
-\cs_new_protected:Npn \__regex_split:nnN #1#2#3
- {
- \group_begin:
- \__regex_multi_match:n
- {
- \if_int_compare:w \l__regex_start_pos_int < \l__regex_success_pos_int
- \__regex_extract:
- \__intarray_gset_fast:Nnn \g__regex_submatch_prev_intarray
- { \l__regex_zeroth_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g__regex_submatch_end_intarray
- { \l__regex_zeroth_submatch_int }
- {
- \__intarray_item_fast:Nn \g__regex_submatch_begin_intarray
- { \l__regex_zeroth_submatch_int }
- }
- \__intarray_gset_fast:Nnn \g__regex_submatch_begin_intarray
- { \l__regex_zeroth_submatch_int }
- { \l__regex_start_pos_int }
- \fi:
- }
- #1
- \__regex_match:n {#2}
- \__intarray_gset_fast:Nnn \g__regex_submatch_prev_intarray
- { \l__regex_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g__regex_submatch_end_intarray
- { \l__regex_submatch_int }
- { \l__regex_max_pos_int }
- \__intarray_gset_fast:Nnn \g__regex_submatch_begin_intarray
- { \l__regex_submatch_int }
- { \l__regex_start_pos_int }
- \int_incr:N \l__regex_submatch_int
- \if_meaning:w \c_true_bool \l__regex_empty_success_bool
- \if_int_compare:w \l__regex_start_pos_int = \l__regex_max_pos_int
- \int_decr:N \l__regex_submatch_int
- \fi:
- \fi:
- \__regex_group_end_extract_seq:N #3
- }
-\cs_new_protected:Npn \__regex_group_end_extract_seq:N #1
- {
- \cs_set_eq:NN \__seq_item:n \scan_stop:
- \flag_clear:n { __regex_begin }
- \flag_clear:n { __regex_end }
- \tl_set:Nx \l__regex_internal_a_tl
- {
- \s__seq
- \int_step_function:nnnN
- { \l__regex_min_submatch_int }
- { 1 }
- { \l__regex_submatch_int - 1 }
- \__regex_extract_seq_aux:n
- }
- \int_compare:nNnF
- { \flag_height:n { __regex_begin } + \flag_height:n { __regex_end } }
- = 0
- {
- \__msg_kernel_error:nnxxx { regex } { result-unbalanced }
- { splitting~or~extracting~submatches }
- { \flag_height:n { __regex_end } }
- { \flag_height:n { __regex_begin } }
- }
- \use:x
- {
- \group_end:
- \tl_set:Nn \exp_not:N #1 { \l__regex_internal_a_tl }
- }
- }
-\cs_new:Npn \__regex_extract_seq_aux:n #1
- {
- \__seq_item:n
- {
- \exp_after:wN \__regex_extract_seq_aux:ww
- \__int_value:w \__regex_submatch_balance:n {#1} ; #1;
- }
- }
-\cs_new:Npn \__regex_extract_seq_aux:ww #1; #2;
- {
- \if_int_compare:w #1 < 0 \exp_stop_f:
- \flag_raise:n { __regex_end }
- \prg_replicate:nn {-#1} { \exp_not:n { { \if_false: } \fi: } }
- \fi:
- \__regex_query_submatch:n {#2}
- \if_int_compare:w #1 > 0 \exp_stop_f:
- \flag_raise:n { __regex_begin }
- \prg_replicate:nn {#1} { \exp_not:n { \if_false: { \fi: } } }
- \fi:
- }
-\cs_new_protected:Npn \__regex_extract:
- {
- \if_meaning:w \c_true_bool \g__regex_success_bool
- \int_set_eq:NN \l__regex_zeroth_submatch_int \l__regex_submatch_int
- \prg_replicate:nn \l__regex_capturing_group_int
- {
- \__intarray_gset_fast:Nnn \g__regex_submatch_begin_intarray
- { \l__regex_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g__regex_submatch_end_intarray
- { \l__regex_submatch_int } { 0 }
- \__intarray_gset_fast:Nnn \g__regex_submatch_prev_intarray
- { \l__regex_submatch_int } { 0 }
- \int_incr:N \l__regex_submatch_int
- }
- \prop_map_inline:Nn \l__regex_success_submatches_prop
- {
- \if_int_compare:w ##1 - 1 \exp_stop_f:
- \exp_after:wN \__regex_extract_e:wn \__int_value:w
- \else:
- \exp_after:wN \__regex_extract_b:wn \__int_value:w
- \fi:
- \__int_eval:w \l__regex_zeroth_submatch_int + ##1 {##2}
- }
- \__intarray_gset_fast:Nnn \g__regex_submatch_prev_intarray
- { \l__regex_zeroth_submatch_int } { \l__regex_start_pos_int }
- \fi:
- }
-\cs_new_protected:Npn \__regex_extract_b:wn #1 < #2
- { \__intarray_gset_fast:Nnn \g__regex_submatch_begin_intarray {#1} {#2} }
-\cs_new_protected:Npn \__regex_extract_e:wn #1 > #2
- { \__intarray_gset_fast:Nnn \g__regex_submatch_end_intarray {#1} {#2} }
-\cs_new_protected:Npn \__regex_replace_once:nnN #1#2#3
- {
- \group_begin:
- \__regex_single_match:
- #1
- \__regex_replacement:n {#2}
- \exp_args:No \__regex_match:n { #3 }
- \if_meaning:w \c_false_bool \g__regex_success_bool
- \group_end:
- \else:
- \__regex_extract:
- \int_set:Nn \l__regex_balance_int
- {
- \__regex_replacement_balance_one_match:n
- { \l__regex_zeroth_submatch_int }
- }
- \tl_set:Nx \l__regex_internal_a_tl
- {
- \__regex_replacement_do_one_match:n { \l__regex_zeroth_submatch_int }
- \__regex_query_range:nn
- {
- \__intarray_item_fast:Nn \g__regex_submatch_end_intarray
- { \l__regex_zeroth_submatch_int }
- }
- { \l__regex_max_pos_int }
- }
- \__regex_group_end_replace:N #3
- \fi:
- }
-\cs_new_protected:Npn \__regex_replace_all:nnN #1#2#3
- {
- \group_begin:
- \__regex_multi_match:n { \__regex_extract: }
- #1
- \__regex_replacement:n {#2}
- \exp_args:No \__regex_match:n {#3}
- \int_set:Nn \l__regex_balance_int
- {
- 0
- \int_step_function:nnnN
- { \l__regex_min_submatch_int }
- \l__regex_capturing_group_int
- { \l__regex_submatch_int - 1 }
- \__regex_replacement_balance_one_match:n
- }
- \tl_set:Nx \l__regex_internal_a_tl
- {
- \int_step_function:nnnN
- { \l__regex_min_submatch_int }
- \l__regex_capturing_group_int
- { \l__regex_submatch_int - 1 }
- \__regex_replacement_do_one_match:n
- \__regex_query_range:nn
- \l__regex_start_pos_int \l__regex_max_pos_int
- }
- \__regex_group_end_replace:N #3
- }
-\cs_new_protected:Npn \__regex_group_end_replace:N #1
- {
- \if_int_compare:w \l__regex_balance_int = 0 \exp_stop_f:
- \else:
- \__msg_kernel_error:nnxxx { regex } { result-unbalanced }
- { replacing }
- { \int_max:nn { - \l__regex_balance_int } { 0 } }
- { \int_max:nn { \l__regex_balance_int } { 0 } }
- \fi:
- \use:x
- {
- \group_end:
- \tl_set:Nn \exp_not:N #1
- {
- \if_int_compare:w \l__regex_balance_int < 0 \exp_stop_f:
- \prg_replicate:nn { - \l__regex_balance_int }
- { { \if_false: } \fi: }
- \fi:
- \l__regex_internal_a_tl
- \if_int_compare:w \l__regex_balance_int > 0 \exp_stop_f:
- \prg_replicate:nn { \l__regex_balance_int }
- { \if_false: { \fi: } }
- \fi:
- }
- }
- }
-\__msg_kernel_new:nnnn { regex } { trailing-backslash }
- { Trailing~escape~character~'\iow_char:N\\'. }
- {
- A~regular~expression~or~its~replacement~text~ends~with~
- the~escape~character~'\iow_char:N\\'.~It~will~be~ignored.
- }
-\__msg_kernel_new:nnnn { regex } { x-missing-rbrace }
- { Missing~closing~brace~in~'\iow_char:N\\x'~hexadecimal~sequence. }
- {
- You~wrote~something~like~
- '\iow_char:N\\x\{...#1'.~
- The~closing~brace~is~missing.
- }
-\__msg_kernel_new:nnnn { regex } { x-overflow }
- { Character~code~'#1'~too~large~in~'\iow_char:N\\x'~hexadecimal~sequence. }
- {
- You~wrote~something~like~
- '\iow_char:N\\x\{\int_to_Hex:n{#1}\}'.~
- The~character~code~#1~is~larger~than~
- the~maximum~value~\int_use:N \c_max_char_int.
- }
-\__msg_kernel_new:nnnn { regex } { invalid-quantifier }
- { Braced~quantifier~'#1'~may~not~be~followed~by~'#2'. }
- {
- The~character~'#2'~is~invalid~in~the~braced~quantifier~'#1'.~
- The~only~valid~quantifiers~are~'*',~'?',~'+',~'{<int>}',~
- '{<min>,}'~and~'{<min>,<max>}',~optionally~followed~by~'?'.
- }
-\__msg_kernel_new:nnnn { regex } { missing-rbrack }
- { Missing~right~bracket~inserted~in~regular~expression. }
- {
- LaTeX~was~given~a~regular~expression~where~a~character~class~
- was~started~with~'[',~but~the~matching~']'~is~missing.
- }
-\__msg_kernel_new:nnnn { regex } { missing-rparen }
- {
- Missing~right~
- \int_compare:nTF { #1 = 1 } { parenthesis } { parentheses } ~
- inserted~in~regular~expression.
- }
- {
- LaTeX~was~given~a~regular~expression~with~\int_eval:n {#1} ~
- more~left~parentheses~than~right~parentheses.
- }
-\__msg_kernel_new:nnnn { regex } { extra-rparen }
- { Extra~right~parenthesis~ignored~in~regular~expression. }
- {
- LaTeX~came~across~a~closing~parenthesis~when~no~submatch~group~
- was~open.~The~parenthesis~will~be~ignored.
- }
-\__msg_kernel_new:nnnn { regex } { bad-escape }
- {
- Invalid~escape~'\iow_char:N\\#1'~
- \__regex_if_in_cs:TF { within~a~control~sequence. }
- {
- \__regex_if_in_class:TF
- { in~a~character~class. }
- { following~a~category~test. }
- }
- }
- {
- The~escape~sequence~'\iow_char:N\\#1'~may~not~appear~
- \__regex_if_in_cs:TF
- {
- within~a~control~sequence~test~introduced~by~
- '\iow_char:N\\c\iow_char:N\{'.
- }
- {
- \__regex_if_in_class:TF
- { within~a~character~class~ }
- { following~a~category~test~such~as~'\iow_char:N\\cL'~ }
- because~it~does~not~match~exactly~one~character.
- }
- }
-\__msg_kernel_new:nnnn { regex } { range-missing-end }
- { Invalid~end-point~for~range~'#1-#2'~in~character~class. }
- {
- The~end-point~'#2'~of~the~range~'#1-#2'~may~not~serve~as~an~
- end-point~for~a~range:~alphanumeric~characters~should~not~be~
- escaped,~and~non-alphanumeric~characters~should~be~escaped.
- }
-\__msg_kernel_new:nnnn { regex } { range-backwards }
- { Range~'[#1-#2]'~out~of~order~in~character~class. }
- {
- In~ranges~of~characters~'[x-y]'~appearing~in~character~classes,~
- the~first~character~code~must~not~be~larger~than~the~second.~
- Here,~'#1'~has~character~code~\int_eval:n {`#1},~while~
- '#2'~has~character~code~\int_eval:n {`#2}.
- }
-\__msg_kernel_new:nnnn { regex } { c-bad-mode }
- { Invalid~nested~'\iow_char:N\\c'~escape~in~regular~expression. }
- {
- The~'\iow_char:N\\c'~escape~cannot~be~used~within~
- a~control~sequence~test~'\iow_char:N\\c{...}'.~
- To~combine~several~category~tests,~use~'\iow_char:N\\c[...]'.
- }
-\__msg_kernel_new:nnnn { regex } { c-missing-rbrace }
- { Missing~right~brace~inserted~for~'\iow_char:N\\c'~escape. }
- {
- LaTeX~was~given~a~regular~expression~where~a~
- '\iow_char:N\\c\iow_char:N\{...'~construction~was~not~ended~
- with~a~closing~brace~'\iow_char:N\}'.
- }
-\__msg_kernel_new:nnnn { regex } { c-missing-rbrack }
- { Missing~right~bracket~inserted~for~'\iow_char:N\\c'~escape. }
- {
- A~construction~'\iow_char:N\\c[...'~appears~in~a~
- regular~expression,~but~the~closing~']'~is~not~present.
- }
-\__msg_kernel_new:nnnn { regex } { c-missing-category }
- { Invalid~character~'#1'~following~'\iow_char:N\\c'~escape. }
- {
- In~regular~expressions,~the~'\iow_char:N\\c'~escape~sequence~
- may~only~be~followed~by~a~left~brace,~a~left~bracket,~or~a~
- capital~letter~representing~a~character~category,~namely~
- one~of~'ABCDELMOPSTU'.
- }
-\__msg_kernel_new:nnnn { regex } { c-trailing }
- { Trailing~category~code~escape~'\iow_char:N\\c'... }
- {
- A~regular~expression~ends~with~'\iow_char:N\\c'~followed~
- by~a~letter.~It~will~be~ignored.
- }
-\__msg_kernel_new:nnnn { regex } { u-missing-lbrace }
- { Missing~left~brace~following~'\iow_char:N\\u'~escape. }
- {
- The~'\iow_char:N\\u'~escape~sequence~must~be~followed~by~
- a~brace~group~with~the~name~of~the~variable~to~use.
- }
-\__msg_kernel_new:nnnn { regex } { u-missing-rbrace }
- { Missing~right~brace~inserted~for~'\iow_char:N\\u'~escape. }
- {
- LaTeX~
- \str_if_eq_x:nnTF { } {#2}
- { reached~the~end~of~the~string~ }
- { encountered~an~escaped~alphanumeric~character '\iow_char:N\\#2'~ }
- when~parsing~the~argument~of~an~'\iow_char:N\\u\iow_char:N\{...\}'~escape.
- }
-\__msg_kernel_new:nnnn { regex } { posix-unsupported }
- { POSIX~collating~element~'[#1 ~ #1]'~not~supported. }
- {
- The~'[.foo.]'~and~'[=bar=]'~syntaxes~have~a~special~meaning~
- in~POSIX~regular~expressions.~This~is~not~supported~by~LaTeX.~
- Maybe~you~forgot~to~escape~a~left~bracket~in~a~character~class?
- }
-\__msg_kernel_new:nnnn { regex } { posix-unknown }
- { POSIX~class~'[:#1:]'~unknown. }
- {
- '[:#1:]'~is~not~among~the~known~POSIX~classes~
- '[:alnum:]',~'[:alpha:]',~'[:ascii:]',~'[:blank:]',~
- '[:cntrl:]',~'[:digit:]',~'[:graph:]',~'[:lower:]',~
- '[:print:]',~'[:punct:]',~'[:space:]',~'[:upper:]',~
- '[:word:]',~and~'[:xdigit:]'.
- }
-\__msg_kernel_new:nnnn { regex } { posix-missing-close }
- { Missing~closing~':]'~for~POSIX~class. }
- { The~POSIX~syntax~'#1'~must~be~followed~by~':]',~not~'#2'. }
-\__msg_kernel_new:nnnn { regex } { result-unbalanced }
- { Missing~brace~inserted~when~#1. }
- {
- LaTeX~was~asked~to~do~some~regular~expression~operation,~
- and~the~resulting~token~list~would~not~have~the~same~number~
- of~begin-group~and~end-group~tokens.~Braces~were~inserted:~
- #2~left,~#3~right.
- }
-\__msg_kernel_new:nnnn { regex } { unknown-option }
- { Unknown~option~'#1'~for~regular~expressions. }
- {
- The~only~available~option~is~'case-insensitive',~toggled~by~
- '(?i)'~and~'(?-i)'.
- }
-\__msg_kernel_new:nnnn { regex } { special-group-unknown }
- { Unknown~special~group~'#1~...'~in~a~regular~expression. }
- {
- The~only~valid~constructions~starting~with~'(?'~are~
- '(?:~...~)',~'(?|~...~)',~'(?i)',~and~'(?-i)'.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-c }
- { Misused~'\iow_char:N\\c'~command~in~a~replacement~text. }
- {
- In~a~replacement~text,~the~'\iow_char:N\\c'~escape~sequence~
- can~be~followed~by~one~of~the~letters~'ABCDELMOPSTU'~
- or~a~brace~group,~not~by~'#1'.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-u }
- { Misused~'\iow_char:N\\u'~command~in~a~replacement~text. }
- {
- In~a~replacement~text,~the~'\iow_char:N\\u'~escape~sequence~
- must~be~~followed~by~a~brace~group~holding~the~name~of~the~
- variable~to~use.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-g }
- {
- Missing~brace~for~the~'\iow_char:N\\g'~construction~
- in~a~replacement~text.
- }
- {
- In~the~replacement~text~for~a~regular~expression~search,~
- submatches~are~represented~either~as~'\iow_char:N \\g{dd..d}',~
- or~'\\d',~where~'d'~are~single~digits.~Here,~a~brace~is~missing.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-catcode-end }
- {
- Missing~character~for~the~'\iow_char:N\\c<category><character>'~
- construction~in~a~replacement~text.
- }
- {
- In~a~replacement~text,~the~'\iow_char:N\\c'~escape~sequence~
- can~be~followed~by~one~of~the~letters~'ABCDELMOPSTU'~representing~
- the~character~category.~Then,~a~character~must~follow.~LaTeX~
- reached~the~end~of~the~replacement~when~looking~for~that.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-catcode-in-cs }
- {
- Category~code~'\iow_char:N\\c#1#3'~ignored~inside~
- '\iow_char:N\\c\{...\}'~in~a~replacement~text.
- }
- {
- In~a~replacement~text,~the~category~codes~of~the~argument~of~
- '\iow_char:N\\c\{...\}'~are~ignored~when~building~the~control~
- sequence~name.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-null-space }
- { TeX~cannot~build~a~space~token~with~character~code~0. }
- {
- You~asked~for~a~character~token~with~category~space,~
- and~character~code~0,~for~instance~through~
- '\iow_char:N\\cS\iow_char:N\\x00'.~
- This~specific~case~is~impossible~and~will~be~replaced~
- by~a~normal~space.
- }
-\__msg_kernel_new:nnnn { regex } { replacement-missing-rbrace }
- { Missing~right~brace~inserted~in~replacement~text. }
- {
- There~ \int_compare:nTF { #1 = 1 } { was } { were } ~ #1~
- missing~right~\int_compare:nTF { #1 = 1 } { brace } { braces } .
- }
-\__msg_kernel_new:nnnn { regex } { replacement-missing-rparen }
- { Missing~right~parenthesis~inserted~in~replacement~text. }
- {
- There~ \int_compare:nTF { #1 = 1 } { was } { were } ~ #1~
- missing~right~\int_compare:nTF { #1 = 1 } { parenthesis } { parentheses } .
- }
-\cs_new:Npn \__regex_msg_repeated:nnN #1#2#3
- {
- \str_if_eq_x:nnF { #1 #2 } { 1 0 }
- {
- , ~ repeated ~
- \int_case:nnF {#2}
- {
- { -1 } { #1~or~more~times,~\bool_if:NTF #3 { lazy } { greedy } }
- { 0 } { #1~times }
- }
- {
- between~#1~and~\int_eval:n {#1+#2}~times,~
- \bool_if:NTF #3 { lazy } { greedy }
- }
- }
- }
-%%
-%%
-%% End of file `l3regex.sty'.
Modified: trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3str-convert.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3str-convert.sty 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3str-convert.sty 2017-06-05 23:17:08 UTC (rev 44483)
@@ -20,9 +20,8 @@
%%
%% File: l3str-convert.dtx Copyright (C) 2013-2017 The LaTeX3 Project
\RequirePackage{expl3}
-\ProvidesExplPackage{l3str-convert}{2017/05/13}{}
+\ProvidesExplPackage{l3str-convert}{2017/05/29}{}
{L3 Experimental string encoding conversions}
-\RequirePackage{l3tl-analysis,l3tl-build}
\cs_if_exist:NF \use_ii_i:nn
{ \cs_new:Npn \use_ii_i:nn #1#2 { #2 #1 } }
\cs_new_protected:Npn \__str_tmp:w { }
Modified: trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3str-format.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3str-format.sty 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3str-format.sty 2017-06-05 23:17:08 UTC (rev 44483)
@@ -20,7 +20,7 @@
%%
%% File: l3str-format.dtx Copyright (C) 2012-2013,2015-2017 The LaTeX3 Project
\RequirePackage{expl3}
-\ProvidesExplPackage{l3str-format}{2017/05/13}{}
+\ProvidesExplPackage{l3str-format}{2017/05/29}{}
{L3 Experimental string formatting}
\RequirePackage{l3str}
\cs_generate_variant:Nn \use:nn { nf }
Deleted: trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3tl-analysis.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3tl-analysis.sty 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3tl-analysis.sty 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,449 +0,0 @@
-%%
-%% This is file `l3tl-analysis.sty',
-%% generated with the docstrip utility.
-%%
-%% The original source files were:
-%%
-%% l3tl-analysis.dtx (with options: `package')
-%%
-%% Copyright (C) 2011-2017 The LaTeX3 Project
-%%
-%% It may be distributed and/or modified under the conditions of
-%% the LaTeX Project Public License (LPPL), either version 1.3c of
-%% this license or (at your option) any later version. The latest
-%% version of this license is in the file:
-%%
-%% http://www.latex-project.org/lppl.txt
-%%
-%% This file is part of the "l3experimental bundle" (The Work in LPPL)
-%% and all files in that bundle must be distributed together.
-%%
-%% File: l3tl-analysis.dtx Copyright (C) 2011-2012,2015-2017 The LaTeX3 Project
-\RequirePackage{expl3}
-\ProvidesExplPackage{l3tl-analysis}{2017/05/13}{}
- {L3 Experimental token list analysis}
-\__scan_new:N \s__tl
-\tl_new:N \l__tl_analysis_internal_tl
-\cs_new_eq:NN \l__tl_analysis_token ?
-\cs_new_eq:NN \l__tl_analysis_char_token ?
-\int_new:N \l__tl_analysis_normal_int
-\int_new:N \l__tl_analysis_index_int
-\int_new:N \l__tl_analysis_nesting_int
-\int_new:N \l__tl_analysis_type_int
-\tl_new:N \g__tl_analysis_result_tl
-\cs_new:Npn \__tl_analysis_extract_charcode:
- {
- \exp_after:wN \__tl_analysis_extract_charcode_aux:w
- \token_to_meaning:N \l__tl_analysis_token
- }
-\cs_new:Npn \__tl_analysis_extract_charcode_aux:w #1 ~ #2 ~ { ` }
-\cs_new:Npn \__tl_analysis_cs_space_count:NN #1 #2
- {
- \exp_after:wN #1
- \__int_value:w \__int_eval:w 0
- \exp_after:wN \__tl_analysis_cs_space_count:w
- \token_to_str:N #2
- \fi: \__tl_analysis_cs_space_count_end:w ; ~ !
- }
-\cs_new:Npn \__tl_analysis_cs_space_count:w #1 ~
- {
- \if_false: #1 #1 \fi:
- + 1
- \__tl_analysis_cs_space_count:w
- }
-\cs_new:Npn \__tl_analysis_cs_space_count_end:w ; #1 \fi: #2 !
- { \exp_after:wN ; \__int_value:w \str_count_ignore_spaces:n {#1} ; }
-\cs_new_protected:Npn \__tl_analysis:n #1
- {
- \group_begin:
- \group_align_safe_begin:
- \__tl_analysis_setup:n {#1}
- \__tl_analysis_a:n {#1}
- \__tl_analysis_b:n {#1}
- \group_align_safe_end:
- \group_end:
- }
-\cs_new_protected:Npn \__tl_analysis_setup:n #1
- {
- \int_set:Nn \tex_escapechar:D { -1 }
- \exp_after:wN \__tl_analysis_disable_loop:N
- \tl_to_str:n {#1} { ~ } { ? \__prg_break: }
- \__prg_break_point:
- \scan_stop:
- }
-\group_begin:
- \char_set_catcode_active:N \^^@
- \cs_new_protected:Npn \__tl_analysis_disable_loop:N #1
- {
- \tex_lccode:D 0 = `#1 ~
- \tex_lowercase:D { \tex_let:D ^^@ } \tex_undefined:D
- \__tl_analysis_disable_loop:N
- }
- \bool_lazy_or:nnT
- { \sys_if_engine_ptex_p: }
- { \sys_if_engine_uptex_p: }
- {
- \cs_gset_protected:Npn \__tl_analysis_disable_loop:N #1
- {
- \use_none:n #1 \scan_stop:
- \if_int_compare:w 256 > `#1 \exp_stop_f:
- \tex_lccode:D 0 = `#1 ~
- \tex_lowercase:D { \tex_let:D ^^@ } \tex_undefined:D
- \fi:
- \__tl_analysis_disable_loop:N
- }
- }
-\group_end:
-\cs_new_protected:Npn \__tl_analysis_a:n #1
- {
- \int_set:Nn \tex_escapechar:D { 92 }
- \int_zero:N \l__tl_analysis_normal_int
- \int_zero:N \l__tl_analysis_index_int
- \int_zero:N \l__tl_analysis_nesting_int
- \if_false: { \fi: \__tl_analysis_a_loop:w #1 }
- \int_decr:N \l__tl_analysis_index_int
- }
-\cs_new_protected:Npn \__tl_analysis_a_loop:w
- { \tex_futurelet:D \l__tl_analysis_token \__tl_analysis_a_type:w }
-\cs_new_protected:Npn \__tl_analysis_a_type:w
- {
- \l__tl_analysis_type_int =
- \if_meaning:w \l__tl_analysis_token \c_space_token
- 0
- \else:
- \if_catcode:w \exp_not:N \l__tl_analysis_token \c_group_begin_token
- 1
- \else:
- \if_catcode:w \exp_not:N \l__tl_analysis_token \c_group_end_token
- - 1
- \else:
- 2
- \fi:
- \fi:
- \fi:
- \exp_stop_f:
- \if_case:w \l__tl_analysis_type_int
- \exp_after:wN \__tl_analysis_a_space:w
- \or: \exp_after:wN \__tl_analysis_a_bgroup:w
- \or: \exp_after:wN \__tl_analysis_a_safe:N
- \else: \exp_after:wN \__tl_analysis_a_egroup:w
- \fi:
- }
-\cs_new_protected:Npn \__tl_analysis_a_space:w
- {
- \tex_afterassignment:D \__tl_analysis_a_space_test:w
- \exp_after:wN \cs_set_eq:NN
- \exp_after:wN \l__tl_analysis_char_token
- \token_to_str:N
- }
-\cs_new_protected:Npn \__tl_analysis_a_space_test:w
- {
- \if_meaning:w \l__tl_analysis_char_token \c_space_token
- \tex_toks:D \l__tl_analysis_index_int { \exp_not:n { ~ } }
- \__tl_analysis_a_store:
- \else:
- \int_incr:N \l__tl_analysis_normal_int
- \fi:
- \__tl_analysis_a_loop:w
- }
-\group_begin:
- \char_set_catcode_group_begin:N \^^@
- \char_set_catcode_group_end:N \^^E
- \cs_new_protected:Npn \__tl_analysis_a_bgroup:w
- { \__tl_analysis_a_group:nw { \exp_after:wN ^^@ \if_false: ^^E \fi: } }
- \char_set_catcode_group_begin:N \^^B
- \char_set_catcode_group_end:N \^^@
- \cs_new_protected:Npn \__tl_analysis_a_egroup:w
- { \__tl_analysis_a_group:nw { \if_false: ^^B \fi: ^^@ } }
-\group_end:
-\cs_new_protected:Npn \__tl_analysis_a_group:nw #1
- {
- \tex_lccode:D 0 = \__tl_analysis_extract_charcode: \scan_stop:
- \tex_lowercase:D { \tex_toks:D \l__tl_analysis_index_int {#1} }
- \if_int_compare:w \tex_lccode:D 0 = \tex_escapechar:D
- \int_set:Nn \tex_escapechar:D { 139 - \tex_escapechar:D }
- \fi:
- \tex_afterassignment:D \__tl_analysis_a_group_test:w
- \exp_after:wN \cs_set_eq:NN
- \exp_after:wN \l__tl_analysis_char_token
- \token_to_str:N
- }
-\cs_new_protected:Npn \__tl_analysis_a_group_test:w
- {
- \if_charcode:w \l__tl_analysis_token \l__tl_analysis_char_token
- \__tl_analysis_a_store:
- \else:
- \int_incr:N \l__tl_analysis_normal_int
- \fi:
- \__tl_analysis_a_loop:w
- }
-\cs_new_protected:Npn \__tl_analysis_a_store:
- {
- \tex_advance:D \l__tl_analysis_nesting_int \l__tl_analysis_type_int
- \if_int_compare:w \tex_lccode:D 0 = `\ \exp_stop_f:
- \tex_multiply:D \l__tl_analysis_type_int 2 \exp_stop_f:
- \fi:
- \tex_skip:D \l__tl_analysis_index_int
- = \l__tl_analysis_normal_int sp plus \l__tl_analysis_type_int sp \scan_stop:
- \int_incr:N \l__tl_analysis_index_int
- \int_zero:N \l__tl_analysis_normal_int
- \if_int_compare:w \l__tl_analysis_nesting_int = -1 \exp_stop_f:
- \cs_set_eq:NN \__tl_analysis_a_loop:w \scan_stop:
- \fi:
- }
-\cs_new_protected:Npn \__tl_analysis_a_safe:N #1
- {
- \if_charcode:w
- \scan_stop:
- \exp_after:wN \use_none:n \token_to_str:N #1 \prg_do_nothing:
- \scan_stop:
- \int_incr:N \l__tl_analysis_normal_int
- \else:
- \__tl_analysis_cs_space_count:NN \__tl_analysis_a_cs:ww #1
- \fi:
- \__tl_analysis_a_loop:w
- }
-\cs_new_protected:Npn \__tl_analysis_a_cs:ww #1; #2;
- {
- \if_int_compare:w #1 > 0 \exp_stop_f:
- \tex_skip:D \l__tl_analysis_index_int
- = \__int_eval:w \l__tl_analysis_normal_int + 1 sp \scan_stop:
- \tex_advance:D \l__tl_analysis_index_int #1 \exp_stop_f:
- \l__tl_analysis_normal_int #2 \exp_stop_f:
- \else:
- \tex_advance:D \l__tl_analysis_normal_int #2 \exp_stop_f:
- \fi:
- }
-\cs_new_protected:Npn \__tl_analysis_b:n #1
- {
- \tl_gset:Nx \g__tl_analysis_result_tl
- {
- \__tl_analysis_b_loop:w 0; #1
- \__prg_break_point:
- }
- }
-\cs_new:Npn \__tl_analysis_b_loop:w #1;
- {
- \exp_after:wN \__tl_analysis_b_normals:ww
- \__int_value:w \tex_skip:D #1 ; #1 ;
- }
-\cs_new:Npn \__tl_analysis_b_normals:ww #1;
- {
- \if_int_compare:w #1 = 0 \exp_stop_f:
- \__tl_analysis_b_special:w
- \fi:
- \__tl_analysis_b_normal:wwN #1;
- }
-\cs_new:Npn \__tl_analysis_b_normal:wwN #1; #2; #3
- {
- \exp_not:n { \exp_not:n { #3 } } \s__tl
- \if_charcode:w
- \scan_stop:
- \exp_after:wN \use_none:n \token_to_str:N #3 \prg_do_nothing:
- \scan_stop:
- \exp_after:wN \__tl_analysis_b_char:Nww
- \else:
- \exp_after:wN \__tl_analysis_b_cs:Nww
- \fi:
- #3 #1; #2;
- }
-\cs_new:Npx \__tl_analysis_b_char:Nww #1
- {
- \exp_not:N \if_meaning:w #1 \exp_not:N \tex_undefined:D
- \token_to_str:N D \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_catcode_other_token
- \token_to_str:N C \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_catcode_letter_token
- \token_to_str:N B \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_math_toggle_token 3 \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_alignment_token 4 \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_math_superscript_token 7 \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_math_subscript_token 8 \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_space_token
- \token_to_str:N A \exp_not:N \else:
- 6
- \exp_not:n { \fi: \fi: \fi: \fi: \fi: \fi: \fi: \fi: }
- \exp_not:N \__int_value:w `#1 \s__tl
- \exp_not:N \exp_after:wN \exp_not:N \__tl_analysis_b_normals:ww
- \exp_not:N \__int_value:w \exp_not:N \__int_eval:w - 1 +
- }
-\cs_new:Npn \__tl_analysis_b_cs:Nww #1
- {
- 0 -1 \s__tl
- \__tl_analysis_cs_space_count:NN \__tl_analysis_b_cs_test:ww #1
- }
-\cs_new:Npn \__tl_analysis_b_cs_test:ww #1 ; #2 ; #3 ; #4 ;
- {
- \exp_after:wN \__tl_analysis_b_normals:ww
- \__int_value:w \__int_eval:w
- \if_int_compare:w #1 = 0 \exp_stop_f:
- #3
- \else:
- \tex_skip:D \__int_eval:w #4 + #1 \__int_eval_end:
- \fi:
- - #2
- \exp_after:wN ;
- \__int_value:w \__int_eval:w #4 + #1 ;
- }
-\group_begin:
- \char_set_catcode_other:N A
- \cs_new:Npn \__tl_analysis_b_special:w
- \fi: \__tl_analysis_b_normal:wwN 0 ; #1 ;
- {
- \fi:
- \if_int_compare:w #1 = \l__tl_analysis_index_int
- \exp_after:wN \__prg_break:
- \fi:
- \tex_the:D \tex_toks:D #1 \s__tl
- \if_case:w \etex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
- A
- \or: 1
- \or: 1
- \else: 2
- \fi:
- \if_int_odd:w \etex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
- \exp_after:wN \__tl_analysis_b_special_char:wN \__int_value:w
- \else:
- \exp_after:wN \__tl_analysis_b_special_space:w \__int_value:w
- \fi:
- \__int_eval:w 1 + #1 \exp_after:wN ;
- \token_to_str:N
- }
-\group_end:
-\cs_new:Npn \__tl_analysis_b_special_char:wN #1 ; #2
- {
- \__int_value:w `#2 \s__tl
- \__tl_analysis_b_loop:w #1 ;
- }
-\cs_new:Npn \__tl_analysis_b_special_space:w #1 ; ~
- {
- 32 \s__tl
- \__tl_analysis_b_loop:w #1 ;
- }
-\cs_new_protected:Npn \__tl_analysis_map_inline:nn #1
- {
- \__tl_analysis:n {#1}
- \int_gincr:N \g__prg_map_int
- \exp_args:Nc \__tl_analysis_map_inline_aux:Nn
- { __tl_analysis_map_inline_ \int_use:N \g__prg_map_int :wNw }
- }
-\cs_new_protected:Npn \__tl_analysis_map_inline_aux:Nn #1#2
- {
- \cs_gset_protected:Npn #1 ##1 \s__tl ##2 ##3 \s__tl
- {
- \use_none:n ##2
- #2
- #1
- }
- \exp_after:wN #1
- \g__tl_analysis_result_tl
- \s__tl { ? \tl_map_break: } \s__tl
- \__prg_break_point:Nn \tl_map_break: { \int_gdecr:N \g__prg_map_int }
- }
-\cs_new_protected:Npn \tl_show_analysis:N #1
- {
- \tl_if_exist:NTF #1
- {
- \exp_args:No \__tl_analysis:n {#1}
- \__msg_show_pre:nnxxxx { LaTeX / kernel } { show-tl-analysis }
- { \token_to_str:N #1 } { \tl_if_empty:NTF #1 { } { ? } } { } { }
- \__tl_analysis_show:
- }
- { \tl_show:N #1 }
- }
-\cs_new_protected:Npn \tl_show_analysis:n #1
- {
- \__tl_analysis:n {#1}
- \__msg_show_pre:nnxxxx { LaTeX / kernel } { show-tl-analysis }
- { } { \tl_if_empty:nTF {#1} { } { ? } } { } { }
- \__tl_analysis_show:
- }
-\cs_new_protected:Npn \__tl_analysis_show:
- {
- \group_begin:
- \exp_args:NNx
- \group_end:
- \__msg_show_wrap:n
- {
- \exp_after:wN \__tl_analysis_show_loop:wNw \g__tl_analysis_result_tl
- \s__tl { ? \__prg_break: } \s__tl
- \__prg_break_point:
- }
- }
-\cs_new:Npn \__tl_analysis_show_loop:wNw #1 \s__tl #2 #3 \s__tl
- {
- \use_none:n #2
- \exp_not:n { \\ > \ \ }
- \if_int_compare:w "#2 = 0 \exp_stop_f:
- \exp_after:wN \__tl_analysis_show_cs:n
- \else:
- \if_int_compare:w "#2 = 13 \exp_stop_f:
- \exp_after:wN \exp_after:wN
- \exp_after:wN \__tl_analysis_show_active:n
- \else:
- \exp_after:wN \exp_after:wN
- \exp_after:wN \__tl_analysis_show_normal:n
- \fi:
- \fi:
- {#1}
- \__tl_analysis_show_loop:wNw
- }
-\cs_new:Npn \__tl_analysis_show_normal:n #1
- {
- \exp_after:wN \token_to_str:N #1 ~
- ( \exp_after:wN \token_to_meaning:N #1 )
- }
-\cs_new:Npn \__tl_analysis_show_value:N #1
- {
- \token_if_expandable:NF #1
- {
- \token_if_chardef:NTF #1 \__prg_break: { }
- \token_if_mathchardef:NTF #1 \__prg_break: { }
- \token_if_dim_register:NTF #1 \__prg_break: { }
- \token_if_int_register:NTF #1 \__prg_break: { }
- \token_if_skip_register:NTF #1 \__prg_break: { }
- \token_if_toks_register:NTF #1 \__prg_break: { }
- \use_none:nnn
- \__prg_break_point:
- \use:n { \exp_after:wN = \tex_the:D #1 }
- }
- }
-\cs_new:Npn \__tl_analysis_show_cs:n #1
- { \exp_args:No \__tl_analysis_show_long:nn {#1} { control~sequence= } }
-\cs_new:Npn \__tl_analysis_show_active:n #1
- { \exp_args:No \__tl_analysis_show_long:nn {#1} { active~character= } }
-\cs_new:Npn \__tl_analysis_show_long:nn #1
- {
- \__tl_analysis_show_long_aux:oofn
- { \token_to_str:N #1 }
- { \token_to_meaning:N #1 }
- { \__tl_analysis_show_value:N #1 }
- }
-\cs_new:Npn \__tl_analysis_show_long_aux:nnnn #1#2#3#4
- {
- \int_compare:nNnTF
- { \str_count:n { #1 ~ ( #4 #2 #3 ) } }
- > { \l_iow_line_count_int - 3 }
- {
- \str_range:nnn { #1 ~ ( #4 #2 #3 ) } { 1 }
- {
- \l_iow_line_count_int - 3
- - \str_count:N \c__tl_analysis_show_etc_str
- }
- \c__tl_analysis_show_etc_str
- }
- { #1 ~ ( #4 #2 #3 ) }
- }
-\cs_generate_variant:Nn \__tl_analysis_show_long_aux:nnnn { oof }
-\tl_const:Nx \c__tl_analysis_show_etc_str % (
- { \token_to_str:N \ETC.) }
-\__msg_kernel_new:nnn { kernel } { show-tl-analysis }
- {
- The~token~list~ \tl_if_empty:nF {#1} { #1 ~ }
- \tl_if_empty:nTF {#2}
- { is~empty }
- { contains~the~tokens: }
- }
-%%
-%%
-%% End of file `l3tl-analysis.sty'.
Deleted: trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3tl-build.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3tl-build.sty 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3experimental/l3str/l3tl-build.sty 2017-06-05 23:17:08 UTC (rev 44483)
@@ -1,104 +0,0 @@
-%%
-%% This is file `l3tl-build.sty',
-%% generated with the docstrip utility.
-%%
-%% The original source files were:
-%%
-%% l3tl-build.dtx (with options: `package')
-%%
-%% Copyright (C) 2011-2017 The LaTeX3 Project
-%%
-%% It may be distributed and/or modified under the conditions of
-%% the LaTeX Project Public License (LPPL), either version 1.3c of
-%% this license or (at your option) any later version. The latest
-%% version of this license is in the file:
-%%
-%% http://www.latex-project.org/lppl.txt
-%%
-%% This file is part of the "l3experimental bundle" (The Work in LPPL)
-%% and all files in that bundle must be distributed together.
-%%
-%% File: l3tl-build.dtx Copyright (C) 2011-2017 The LaTeX3 Project
-\RequirePackage{expl3}[2017/05/13]
-\@ifpackagelater{expl3}{2017/05/13}
- {}
- {%
- \PackageError{l3tl-build}{Support package l3kernel too old}
- {%
- Please install an up to date version of l3kernel\MessageBreak
- using your TeX package manager or from CTAN.\MessageBreak
- \MessageBreak
- Loading l3tl-build will abort!%
- }%
- \endinput
- }
-\ProvidesExplPackage{l3tl-build}{2017/05/13}{}
- {L3 Experimental token list construction}
-\int_new:N \l__tl_build_start_index_int
-\int_new:N \l__tl_build_index_int
-\tl_new:N \l__tl_build_result_tl
-\cs_new_protected:Npn \__tl_build_unpack:
- {
- \tl_put_right:Nx \l__tl_build_result_tl
- {
- \exp_after:wN \__tl_build_unpack_loop:w
- \int_use:N \l__tl_build_start_index_int ;
- \__prg_break_point:
- }
- }
-\cs_new:Npn \__tl_build_unpack_loop:w #1 ;
- {
- \if_int_compare:w #1 = \l__tl_build_index_int
- \exp_after:wN \__prg_break:
- \fi:
- \tex_the:D \tex_toks:D #1 \exp_stop_f:
- \exp_after:wN \__tl_build_unpack_loop:w
- \int_use:N \__int_eval:w #1 + 1 ;
- }
-\cs_new_protected:Npn \__tl_build:Nw
- { \__tl_build_aux:NNw \tl_set:Nn }
-\cs_new_protected:Npn \__tl_build_x:Nw
- { \__tl_build_aux:NNw \tl_set:Nx }
-\cs_new_protected:Npn \__tl_gbuild:Nw
- { \__tl_build_aux:NNw \tl_gset:Nn }
-\cs_new_protected:Npn \__tl_gbuild_x:Nw
- { \__tl_build_aux:NNw \tl_gset:Nx }
-\cs_new_protected:Npn \__tl_build_aux:NNw #1#2
- {
- \group_begin:
- \cs_set:Npn \__tl_build_end_assignment:n
- { \group_end: #1 #2 }
- \int_zero:N \l__tl_build_start_index_int
- \int_zero:N \l__tl_build_index_int
- \tl_clear:N \l__tl_build_result_tl
- }
-\cs_new_protected:Npn \__tl_build_end:
- {
- \__tl_build_unpack:
- \exp_args:No
- \__tl_build_end_assignment:n \l__tl_build_result_tl
- }
-\cs_new_eq:NN \__tl_build_end_assignment:n \use_none:n
-\cs_new_protected:Npn \__tl_build_one:n #1
- {
- \tex_toks:D \l__tl_build_index_int {#1}
- \int_incr:N \l__tl_build_index_int
- \if_int_compare:w \l__tl_build_index_int > \c_max_register_int
- \__tl_build_unpack:
- \l__tl_build_index_int \l__tl_build_start_index_int
- \fi:
- }
-\cs_new_protected:Npn \__tl_build_one:o #1
- {
- \tex_toks:D \l__tl_build_index_int \exp_after:wN {#1}
- \int_incr:N \l__tl_build_index_int
- \if_int_compare:w \l__tl_build_index_int > \c_max_register_int
- \__tl_build_unpack:
- \l__tl_build_index_int \l__tl_build_start_index_int
- \fi:
- }
-\cs_new_protected:Npn \__tl_build_one:x #1
- { \use:x { \__tl_build_one:n {#1} } }
-%%
-%%
-%% End of file `l3tl-build.sty'.
Modified: trunk/Master/texmf-dist/tex/latex/l3experimental/xcoffins/xcoffins.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3experimental/xcoffins/xcoffins.sty 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3experimental/xcoffins/xcoffins.sty 2017-06-05 23:17:08 UTC (rev 44483)
@@ -20,7 +20,7 @@
%%
%% File: xcoffins.dtx Copyright(C) 2010-2012,2014,2016,2017 The LaTeX3 Project
\RequirePackage{xparse}
-\ProvidesExplPackage{xcoffins}{2017/05/13}{}
+\ProvidesExplPackage{xcoffins}{2017/05/29}{}
{L3 Experimental design level coffins}
\keys_define:nn { coffin }
{
Modified: trunk/Master/texmf-dist/tex/latex/l3experimental/xgalley/l3galley.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3experimental/xgalley/l3galley.sty 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3experimental/xgalley/l3galley.sty 2017-06-05 23:17:08 UTC (rev 44483)
@@ -20,8 +20,8 @@
%%
%% File: l3galley.dtx Copyright (C) 1999-2001, 2004-2009 Frank Mittelbach
%% (C) 2010-2017 The LaTeX3 Project
-\RequirePackage{expl3}[2017/05/13]
-\@ifpackagelater{expl3}{2017/05/13}
+\RequirePackage{expl3}[2017/05/29]
+\@ifpackagelater{expl3}{2017/05/29}
{}
{%
\PackageError{l3galley}{Support package l3kernel too old}
@@ -33,7 +33,7 @@
}%
\endinput
}
-\ProvidesExplPackage{l3galley}{2017/05/13}{}
+\ProvidesExplPackage{l3galley}{2017/05/29}{}
{L3 Experimental galley code}
\int_new:N \l__galley_tmp_int
\seq_new:N \g__galley_tmpa_seq
Modified: trunk/Master/texmf-dist/tex/latex/l3experimental/xgalley/xgalley.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3experimental/xgalley/xgalley.sty 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3experimental/xgalley/xgalley.sty 2017-06-05 23:17:08 UTC (rev 44483)
@@ -21,7 +21,7 @@
%% File: xgalley.dtx Copyright (C) 1999-2001, 2004-2009 Frank Mittelbach
%% (C) 2010-2012,2014,2016-2017 The LaTeX3 Project
\RequirePackage{xparse}
-\ProvidesExplPackage{xgalley}{2017/05/13}{}
+\ProvidesExplPackage{xgalley}{2017/05/29}{}
{L3 Experimental galley}
\RequirePackage{xparse,xtemplate,l3galley}
\clist_new:N \l__galley_tmpa_clist
Modified: trunk/Master/texmf-dist/tex/latex/l3kernel/expl3-code.tex
===================================================================
--- trunk/Master/texmf-dist/tex/latex/l3kernel/expl3-code.tex 2017-06-05 23:15:32 UTC (rev 44482)
+++ trunk/Master/texmf-dist/tex/latex/l3kernel/expl3-code.tex 2017-06-05 23:17:08 UTC (rev 44483)
@@ -13,6 +13,7 @@
%% l3str.dtx (with options: `package')
%% l3seq.dtx (with options: `package')
%% l3int.dtx (with options: `package')
+%% l3intarray.dtx (with options: `package')
%% l3flag.dtx (with options: `package')
%% l3quark.dtx (with options: `package')
%% l3prg.dtx (with options: `package')
@@ -37,6 +38,9 @@
%% l3fp-random.dtx (with options: `package')
%% l3fp-assign.dtx (with options: `package')
%% l3sort.dtx (with options: `package')
+%% l3tl-build.dtx (with options: `package')
+%% l3tl-analysis.dtx (with options: `package')
+%% l3regex.dtx (with options: `package')
%% l3box.dtx (with options: `package')
%% l3coffins.dtx (with options: `package')
%% l3color.dtx (with options: `package')
@@ -58,7 +62,7 @@
%% and all files in that bundle must be distributed together.
%%
%% File: expl3.dtx Copyright (C) 1990-2017 The LaTeX3 Project
-\def\ExplFileDate{2017/05/13}%
+\def\ExplFileDate{2017/05/29}%
\begingroup
\def\next{\endgroup}%
\expandafter\ifx\csname PackageError\endcsname\relax
@@ -3094,6 +3098,7 @@
{ \__prg_map_break:Nn \tl_map_break: { } }
\cs_new:Npn \tl_map_break:n
{ \__prg_map_break:Nn \tl_map_break: }
+\cs_generate_variant:Nn \tl_to_str:n { V }
\cs_new:Npn \tl_to_str:N #1 { \etex_detokenize:D \exp_after:wN {#1} }
\cs_generate_variant:Nn \tl_to_str:N { c }
\cs_new:Npn \tl_use:N #1
@@ -5429,6 +5434,60 @@
\int_new:N \g_tmpa_int
\int_new:N \g_tmpb_int
\cs_new_eq:NN \c_minus_one \m at ne
+%% File: l3intarray.dtx Copyright (C) 2017 The LaTeX3 Project
+\int_new:N \g__intarray_font_int
+\cs_new_protected:Npn \__intarray_new:Nn #1#2
+ {
+ \__chk_if_free_cs:N #1
+ \int_gincr:N \g__intarray_font_int
+ \tex_global:D \tex_font:D #1 = cmr10~at~ \g__intarray_font_int sp \scan_stop:
+ \tex_hyphenchar:D #1 = \int_eval:n {#2} \scan_stop:
+ \int_compare:nNnT { \tex_hyphenchar:D #1 } > 0
+ { \tex_fontdimen:D \tex_hyphenchar:D #1 #1 = 0 sp \scan_stop: }
+ \int_step_inline:nnnn { 1 } { 1 } { 8 }
+ { \tex_fontdimen:D ##1 #1 = 0 sp \scan_stop: }
+ }
+\cs_new:Npn \__intarray_count:N #1 { \tex_the:D \tex_hyphenchar:D #1 }
+\cs_new_protected:Npn \__intarray_gset_fast:Nnn #1#2#3
+ { \tex_fontdimen:D \int_eval:n {#2} #1 = \int_eval:n {#3} sp \scan_stop: }
+\cs_new_protected:Npn \__intarray_gset:Nnn #1#2#3
+ {
+ \exp_args:Nff \__intarray_gset_aux:Nnn #1
+ { \int_eval:n {#2} } { \int_eval:n {#3} }
+ }
+\cs_new_protected:Npn \__intarray_gset_aux:Nnn #1#2#3
+ {
+ \int_compare:nTF { 1 <= #2 <= \__intarray_count:N #1 }
+ {
+ \int_compare:nTF { - \c_max_dim <= \int_abs:n {#3} <= \c_max_dim }
+ { \__intarray_gset_fast:Nnn #1 {#2} {#3} }
+ {
+ \__msg_kernel_error:nnxxxx { kernel } { overflow }
+ { \token_to_str:N #1 } {#2} {#3}
+ { \int_compare:nNnT {#3} < 0 { - } \__int_value:w \c_max_dim }
+ \__intarray_gset_fast:Nnn #1 {#2}
+ { \int_compare:nNnT {#3} < 0 { - } \c_max_dim }
+ }
+ }
+ {
+ \__msg_kernel_error:nnxxx { kernel } { out-of-bounds }
+ { \token_to_str:N #1 } {#2} { \__intarray_count:N #1 }
+ }
+ }
+\cs_new:Npn \__intarray_item_fast:Nn #1#2
+ { \__int_value:w \tex_fontdimen:D \int_eval:n {#2} #1 }
+\cs_new:Npn \__intarray_item:Nn #1#2
+ { \exp_args:Nf \__intarray_item_aux:Nn #1 { \int_eval:n {#2} } }
+\cs_new:Npn \__intarray_item_aux:Nn #1#2
+ {
+ \int_compare:nTF { 1 <= #2 <= \__intarray_count:N #1 }
+ { \__intarray_item_fast:Nn #1 {#2} }
+ {
+ \__msg_kernel_expandable_error:nnnnn { kernel } { out-of-bounds }
+ { \token_to_str:N #1 } {#2} { \__intarray_count:N #1 }
+ 0
+ }
+ }
%% File: l3flag.dtx Copyright (C) 2011-2012,2014-2017 The LaTeX3 Project
\cs_new_protected:Npn \flag_new:n #1
{
@@ -8098,6 +8157,18 @@
needed~when~defining~conditionals~or~variants,~or~when~building~a~
parameter~text~from~the~number~of~arguments~of~the~function.
}
+\__msg_kernel_new:nnnn { kernel } { overflow }
+ { Integers~larger~than~2^{30}-1~cannot~be~stored~in~arrays. }
+ {
+ An~attempt~was~made~to~store~#3~at~position~#2~in~the~array~'#1'.~
+ The~largest~allowed~value~#4~will~be~used~instead.
+ }
+\__msg_kernel_new:nnnn { kernel } { out-of-bounds }
+ { Access~to~an~entry~beyond~an~array's~bounds. }
+ {
+ An~attempt~was~made~to~access~or~store~data~at~position~#2~of~the~
+ array~'#1',~but~this~array~has~entries~at~positions~from~1~to~#3.
+ }
\__msg_kernel_new:nnnn { kernel } { protected-predicate }
{ Predicate~'#1'~must~be~expandable. }
{
@@ -8607,6 +8678,37 @@
\int_set:Nn \tex_endlinechar:D { \int_use:N \tex_endlinechar:D }
}
}
+\cs_new:Npn \ior_map_break:
+ { \__prg_map_break:Nn \ior_map_break: { } }
+\cs_new:Npn \ior_map_break:n
+ { \__prg_map_break:Nn \ior_map_break: }
+\cs_new_protected:Npn \ior_map_inline:Nn
+ { \__ior_map_inline:NNn \ior_get:NN }
+\cs_new_protected:Npn \ior_str_map_inline:Nn
+ { \__ior_map_inline:NNn \ior_str_get:NN }
+\cs_new_protected:Npn \__ior_map_inline:NNn
+ {
+ \int_gincr:N \g__prg_map_int
+ \exp_args:Nc \__ior_map_inline:NNNn
+ { __prg_map_ \int_use:N \g__prg_map_int :n }
+ }
+\cs_new_protected:Npn \__ior_map_inline:NNNn #1#2#3#4
+ {
+ \cs_gset_protected:Npn #1 ##1 {#4}
+ \ior_if_eof:NF #3 { \__ior_map_inline_loop:NNN #1#2#3 }
+ \__prg_break_point:Nn \ior_map_break:
+ { \int_gdecr:N \g__prg_map_int }
+ }
+\cs_new_protected:Npn \__ior_map_inline_loop:NNN #1#2#3
+ {
+ #2 #3 \l__ior_internal_tl
+ \ior_if_eof:NF #3
+ {
+ \exp_args:No #1 \l__ior_internal_tl
+ \__ior_map_inline_loop:NNN #1#2#3
+ }
+ }
+\tl_new:N \l__ior_internal_tl
\ior_new:N \g__file_internal_ior
\int_const:Nn \c_log_iow { -1 }
\int_const:Nn \c_term_iow
@@ -10038,11 +10140,18 @@
\cs_generate_variant:Nn \__keys_set_known:nnnN { o }
\cs_new_protected:Npn \keys_set_known:nn #1#2
{
+ \bool_if:NTF \l__keys_only_known_bool
+ { \keys_set:nn }
+ { \__keys_set_known:nn }
+ {#1} {#2}
+ }
+\cs_generate_variant:Nn \keys_set_known:nn { nV , nv , no }
+\cs_new_protected:Npn \__keys_set_known:nn #1#2
+ {
\bool_set_true:N \l__keys_only_known_bool
\keys_set:nn {#1} {#2}
\bool_set_false:N \l__keys_only_known_bool
}
-\cs_generate_variant:Nn \keys_set_known:nn { nV , nv , no }
\cs_new_protected:Npn \keys_set_filter:nnnN
{ \__keys_set_filter:onnnN \l__keys_unused_clist }
\cs_generate_variant:Nn \keys_set_filter:nnnN { nnV , nnv , nno }
@@ -10056,22 +10165,50 @@
\cs_generate_variant:Nn \__keys_set_filter:nnnnN { o }
\cs_new_protected:Npn \keys_set_filter:nnn #1#2#3
{
- \bool_set_true:N \l__keys_selective_bool
+ \bool_if:NTF \l__keys_filtered_bool
+ { \__keys_set_selective:nnn }
+ { \__keys_set_filter:nnn }
+ {#1} {#2} {#3}
+ }
+\cs_generate_variant:Nn \keys_set_filter:nnn { nnV , nnv , nno }
+\cs_new_protected:Npn \__keys_set_filter:nnn #1#2#3
+ {
\bool_set_true:N \l__keys_filtered_bool
- \seq_set_from_clist:Nn \l__keys_selective_seq {#2}
- \keys_set:nn {#1} {#3}
- \bool_set_false:N \l__keys_selective_bool
+ \__keys_set_selective:nnn {#1} {#2} {#3}
+ \bool_set_false:N \l__keys_filtered_bool
}
-\cs_generate_variant:Nn \keys_set_filter:nnn { nnV , nnv , nno }
\cs_new_protected:Npn \keys_set_groups:nnn #1#2#3
{
+ \bool_if:NTF \l__keys_filtered_bool
+ { \__keys_set_groups:nnn }
+ { \__keys_set_selective:nnn }
+ {#1} {#2} {#3}
+ }
+\cs_generate_variant:Nn \keys_set_groups:nnn { nnV , nnv , nno }
+\cs_new_protected:Npn \__keys_set_groups:nnn #1#2#3
+ {
+ \bool_set_false:N \l__keys_filtered_bool
+ \__keys_set_selective:nnn {#1} {#2} {#3}
+ \bool_set_true:N \l__keys_filtered_bool
+ }
+\cs_new_protected:Npn \__keys_set_selective:nnn
+ { \__keys_set_selective:onnn \l__keys_selective_seq }
+\cs_new_protected:Npn \__keys_set_selective:nnnn #1#2#3#4
+ {
+ \seq_set_from_clist:Nn \l__keys_selective_seq {#3}
+ \bool_if:NTF \l__keys_selective_bool
+ { \keys_set:nn }
+ { \__keys_set_selective:nn }
+ {#2} {#4}
+ \tl_set:Nn \l__keys_selective_seq {#1}
+ }
+\cs_generate_variant:Nn \__keys_set_selective:nnnn { o }
+\cs_new_protected:Npn \__keys_set_selective:nn #1#2
+ {
\bool_set_true:N \l__keys_selective_bool
- \bool_set_false:N \l__keys_filtered_bool
- \seq_set_from_clist:Nn \l__keys_selective_seq {#2}
- \keys_set:nn {#1} {#3}
+ \keys_set:nn {#1} {#2}
\bool_set_false:N \l__keys_selective_bool
}
-\cs_generate_variant:Nn \keys_set_groups:nnn { nnV , nnv , nno }
\cs_new_protected:Npn \__keys_set:n #1
{
\bool_set_true:N \l__keys_no_value_bool
@@ -16214,6 +16351,3458 @@
\cs_gset_eq:NN \sort_reversed: \sort_return_swapped:
\sort_return_swapped:
}
+%% File: l3tl-build.dtx Copyright (C) 2011-2017 The LaTeX3 Project
+\int_new:N \l__tl_build_start_index_int
+\int_new:N \l__tl_build_index_int
+\tl_new:N \l__tl_build_result_tl
+\cs_new_protected:Npn \__tl_build_unpack:
+ {
+ \tl_put_right:Nx \l__tl_build_result_tl
+ {
+ \exp_after:wN \__tl_build_unpack_loop:w
+ \int_use:N \l__tl_build_start_index_int ;
+ \__prg_break_point:
+ }
+ }
+\cs_new:Npn \__tl_build_unpack_loop:w #1 ;
+ {
+ \if_int_compare:w #1 = \l__tl_build_index_int
+ \exp_after:wN \__prg_break:
+ \fi:
+ \tex_the:D \tex_toks:D #1 \exp_stop_f:
+ \exp_after:wN \__tl_build_unpack_loop:w
+ \int_use:N \__int_eval:w #1 + 1 ;
+ }
+\cs_new_protected:Npn \__tl_build:Nw
+ { \__tl_build_aux:NNw \tl_set:Nn }
+\cs_new_protected:Npn \__tl_build_x:Nw
+ { \__tl_build_aux:NNw \tl_set:Nx }
+\cs_new_protected:Npn \__tl_gbuild:Nw
+ { \__tl_build_aux:NNw \tl_gset:Nn }
+\cs_new_protected:Npn \__tl_gbuild_x:Nw
+ { \__tl_build_aux:NNw \tl_gset:Nx }
+\cs_new_protected:Npn \__tl_build_aux:NNw #1#2
+ {
+ \group_begin:
+ \cs_set:Npn \__tl_build_end_assignment:n
+ { \group_end: #1 #2 }
+ \int_zero:N \l__tl_build_start_index_int
+ \int_zero:N \l__tl_build_index_int
+ \tl_clear:N \l__tl_build_result_tl
+ }
+\cs_new_protected:Npn \__tl_build_end:
+ {
+ \__tl_build_unpack:
+ \exp_args:No
+ \__tl_build_end_assignment:n \l__tl_build_result_tl
+ }
+\cs_new_eq:NN \__tl_build_end_assignment:n \use_none:n
+\cs_new_protected:Npn \__tl_build_one:n #1
+ {
+ \tex_toks:D \l__tl_build_index_int {#1}
+ \int_incr:N \l__tl_build_index_int
+ \if_int_compare:w \l__tl_build_index_int > \c_max_register_int
+ \__tl_build_unpack:
+ \l__tl_build_index_int \l__tl_build_start_index_int
+ \fi:
+ }
+\cs_new_protected:Npn \__tl_build_one:o #1
+ {
+ \tex_toks:D \l__tl_build_index_int \exp_after:wN {#1}
+ \int_incr:N \l__tl_build_index_int
+ \if_int_compare:w \l__tl_build_index_int > \c_max_register_int
+ \__tl_build_unpack:
+ \l__tl_build_index_int \l__tl_build_start_index_int
+ \fi:
+ }
+\cs_new_protected:Npn \__tl_build_one:x #1
+ { \use:x { \__tl_build_one:n {#1} } }
+%% File: l3tl-analysis.dtx Copyright (C) 2011-2012,2015-2017 The LaTeX3 Project%
+\__scan_new:N \s__tl
+\tl_new:N \l__tl_analysis_internal_tl
+\cs_new_eq:NN \l__tl_analysis_token ?
+\cs_new_eq:NN \l__tl_analysis_char_token ?
+\int_new:N \l__tl_analysis_normal_int
+\int_new:N \l__tl_analysis_index_int
+\int_new:N \l__tl_analysis_nesting_int
+\int_new:N \l__tl_analysis_type_int
+\tl_new:N \g__tl_analysis_result_tl
+\cs_new:Npn \__tl_analysis_extract_charcode:
+ {
+ \exp_after:wN \__tl_analysis_extract_charcode_aux:w
+ \token_to_meaning:N \l__tl_analysis_token
+ }
+\cs_new:Npn \__tl_analysis_extract_charcode_aux:w #1 ~ #2 ~ { ` }
+\cs_new:Npn \__tl_analysis_cs_space_count:NN #1 #2
+ {
+ \exp_after:wN #1
+ \__int_value:w \__int_eval:w 0
+ \exp_after:wN \__tl_analysis_cs_space_count:w
+ \token_to_str:N #2
+ \fi: \__tl_analysis_cs_space_count_end:w ; ~ !
+ }
+\cs_new:Npn \__tl_analysis_cs_space_count:w #1 ~
+ {
+ \if_false: #1 #1 \fi:
+ + 1
+ \__tl_analysis_cs_space_count:w
+ }
+\cs_new:Npn \__tl_analysis_cs_space_count_end:w ; #1 \fi: #2 !
+ { \exp_after:wN ; \__int_value:w \str_count_ignore_spaces:n {#1} ; }
+\cs_new_protected:Npn \__tl_analysis:n #1
+ {
+ \group_begin:
+ \group_align_safe_begin:
+ \__tl_analysis_setup:n {#1}
+ \__tl_analysis_a:n {#1}
+ \__tl_analysis_b:n {#1}
+ \group_align_safe_end:
+ \group_end:
+ }
+\cs_new_protected:Npn \__tl_analysis_setup:n #1
+ {
+ \int_set:Nn \tex_escapechar:D { -1 }
+ \exp_after:wN \__tl_analysis_disable_loop:N
+ \tl_to_str:n {#1} { ~ } { ? \__prg_break: }
+ \__prg_break_point:
+ \scan_stop:
+ }
+\group_begin:
+ \char_set_catcode_active:N \^^@
+ \cs_new_protected:Npn \__tl_analysis_disable_loop:N #1
+ {
+ \tex_lccode:D 0 = `#1 ~
+ \tex_lowercase:D { \tex_let:D ^^@ } \tex_undefined:D
+ \__tl_analysis_disable_loop:N
+ }
+ \cs_if_exist:NT \ptex_kanjiskip:D
+ {
+ \cs_gset_protected:Npn \__tl_analysis_disable_loop:N #1
+ {
+ \use_none:n #1 \scan_stop:
+ \if_int_compare:w 256 > `#1 \exp_stop_f:
+ \tex_lccode:D 0 = `#1 ~
+ \tex_lowercase:D { \tex_let:D ^^@ } \tex_undefined:D
+ \fi:
+ \__tl_analysis_disable_loop:N
+ }
+ }
+\group_end:
+\cs_new_protected:Npn \__tl_analysis_a:n #1
+ {
+ \int_set:Nn \tex_escapechar:D { 92 }
+ \int_zero:N \l__tl_analysis_normal_int
+ \int_zero:N \l__tl_analysis_index_int
+ \int_zero:N \l__tl_analysis_nesting_int
+ \if_false: { \fi: \__tl_analysis_a_loop:w #1 }
+ \int_decr:N \l__tl_analysis_index_int
+ }
+\cs_new_protected:Npn \__tl_analysis_a_loop:w
+ { \tex_futurelet:D \l__tl_analysis_token \__tl_analysis_a_type:w }
+\cs_new_protected:Npn \__tl_analysis_a_type:w
+ {
+ \l__tl_analysis_type_int =
+ \if_meaning:w \l__tl_analysis_token \c_space_token
+ 0
+ \else:
+ \if_catcode:w \exp_not:N \l__tl_analysis_token \c_group_begin_token
+ 1
+ \else:
+ \if_catcode:w \exp_not:N \l__tl_analysis_token \c_group_end_token
+ - 1
+ \else:
+ 2
+ \fi:
+ \fi:
+ \fi:
+ \exp_stop_f:
+ \if_case:w \l__tl_analysis_type_int
+ \exp_after:wN \__tl_analysis_a_space:w
+ \or: \exp_after:wN \__tl_analysis_a_bgroup:w
+ \or: \exp_after:wN \__tl_analysis_a_safe:N
+ \else: \exp_after:wN \__tl_analysis_a_egroup:w
+ \fi:
+ }
+\cs_new_protected:Npn \__tl_analysis_a_space:w
+ {
+ \tex_afterassignment:D \__tl_analysis_a_space_test:w
+ \exp_after:wN \cs_set_eq:NN
+ \exp_after:wN \l__tl_analysis_char_token
+ \token_to_str:N
+ }
+\cs_new_protected:Npn \__tl_analysis_a_space_test:w
+ {
+ \if_meaning:w \l__tl_analysis_char_token \c_space_token
+ \tex_toks:D \l__tl_analysis_index_int { \exp_not:n { ~ } }
+ \__tl_analysis_a_store:
+ \else:
+ \int_incr:N \l__tl_analysis_normal_int
+ \fi:
+ \__tl_analysis_a_loop:w
+ }
+\group_begin:
+ \char_set_catcode_group_begin:N \^^@
+ \char_set_catcode_group_end:N \^^E
+ \cs_new_protected:Npn \__tl_analysis_a_bgroup:w
+ { \__tl_analysis_a_group:nw { \exp_after:wN ^^@ \if_false: ^^E \fi: } }
+ \char_set_catcode_group_begin:N \^^B
+ \char_set_catcode_group_end:N \^^@
+ \cs_new_protected:Npn \__tl_analysis_a_egroup:w
+ { \__tl_analysis_a_group:nw { \if_false: ^^B \fi: ^^@ } }
+\group_end:
+\cs_new_protected:Npn \__tl_analysis_a_group:nw #1
+ {
+ \tex_lccode:D 0 = \__tl_analysis_extract_charcode: \scan_stop:
+ \tex_lowercase:D { \tex_toks:D \l__tl_analysis_index_int {#1} }
+ \if_int_compare:w \tex_lccode:D 0 = \tex_escapechar:D
+ \int_set:Nn \tex_escapechar:D { 139 - \tex_escapechar:D }
+ \fi:
+ \tex_afterassignment:D \__tl_analysis_a_group_test:w
+ \exp_after:wN \cs_set_eq:NN
+ \exp_after:wN \l__tl_analysis_char_token
+ \token_to_str:N
+ }
+\cs_new_protected:Npn \__tl_analysis_a_group_test:w
+ {
+ \if_charcode:w \l__tl_analysis_token \l__tl_analysis_char_token
+ \__tl_analysis_a_store:
+ \else:
+ \int_incr:N \l__tl_analysis_normal_int
+ \fi:
+ \__tl_analysis_a_loop:w
+ }
+\cs_new_protected:Npn \__tl_analysis_a_store:
+ {
+ \tex_advance:D \l__tl_analysis_nesting_int \l__tl_analysis_type_int
+ \if_int_compare:w \tex_lccode:D 0 = `\ \exp_stop_f:
+ \tex_multiply:D \l__tl_analysis_type_int 2 \exp_stop_f:
+ \fi:
+ \tex_skip:D \l__tl_analysis_index_int
+ = \l__tl_analysis_normal_int sp plus \l__tl_analysis_type_int sp \scan_stop:
+ \int_incr:N \l__tl_analysis_index_int
+ \int_zero:N \l__tl_analysis_normal_int
+ \if_int_compare:w \l__tl_analysis_nesting_int = -1 \exp_stop_f:
+ \cs_set_eq:NN \__tl_analysis_a_loop:w \scan_stop:
+ \fi:
+ }
+\cs_new_protected:Npn \__tl_analysis_a_safe:N #1
+ {
+ \if_charcode:w
+ \scan_stop:
+ \exp_after:wN \use_none:n \token_to_str:N #1 \prg_do_nothing:
+ \scan_stop:
+ \int_incr:N \l__tl_analysis_normal_int
+ \else:
+ \__tl_analysis_cs_space_count:NN \__tl_analysis_a_cs:ww #1
+ \fi:
+ \__tl_analysis_a_loop:w
+ }
+\cs_new_protected:Npn \__tl_analysis_a_cs:ww #1; #2;
+ {
+ \if_int_compare:w #1 > 0 \exp_stop_f:
+ \tex_skip:D \l__tl_analysis_index_int
+ = \__int_eval:w \l__tl_analysis_normal_int + 1 sp \scan_stop:
+ \tex_advance:D \l__tl_analysis_index_int #1 \exp_stop_f:
+ \l__tl_analysis_normal_int #2 \exp_stop_f:
+ \else:
+ \tex_advance:D \l__tl_analysis_normal_int #2 \exp_stop_f:
+ \fi:
+ }
+\cs_new_protected:Npn \__tl_analysis_b:n #1
+ {
+ \tl_gset:Nx \g__tl_analysis_result_tl
+ {
+ \__tl_analysis_b_loop:w 0; #1
+ \__prg_break_point:
+ }
+ }
+\cs_new:Npn \__tl_analysis_b_loop:w #1;
+ {
+ \exp_after:wN \__tl_analysis_b_normals:ww
+ \__int_value:w \tex_skip:D #1 ; #1 ;
+ }
+\cs_new:Npn \__tl_analysis_b_normals:ww #1;
+ {
+ \if_int_compare:w #1 = 0 \exp_stop_f:
+ \__tl_analysis_b_special:w
+ \fi:
+ \__tl_analysis_b_normal:wwN #1;
+ }
+\cs_new:Npn \__tl_analysis_b_normal:wwN #1; #2; #3
+ {
+ \exp_not:n { \exp_not:n { #3 } } \s__tl
+ \if_charcode:w
+ \scan_stop:
+ \exp_after:wN \use_none:n \token_to_str:N #3 \prg_do_nothing:
+ \scan_stop:
+ \exp_after:wN \__tl_analysis_b_char:Nww
+ \else:
+ \exp_after:wN \__tl_analysis_b_cs:Nww
+ \fi:
+ #3 #1; #2;
+ }
+\cs_new:Npx \__tl_analysis_b_char:Nww #1
+ {
+ \exp_not:N \if_meaning:w #1 \exp_not:N \tex_undefined:D
+ \token_to_str:N D \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_catcode_other_token
+ \token_to_str:N C \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_catcode_letter_token
+ \token_to_str:N B \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_math_toggle_token 3 \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_alignment_token 4 \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_math_superscript_token 7 \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_math_subscript_token 8 \exp_not:N \else:
+ \exp_not:N \if_catcode:w #1 \c_space_token
+ \token_to_str:N A \exp_not:N \else:
+ 6
+ \exp_not:n { \fi: \fi: \fi: \fi: \fi: \fi: \fi: \fi: }
+ \exp_not:N \__int_value:w `#1 \s__tl
+ \exp_not:N \exp_after:wN \exp_not:N \__tl_analysis_b_normals:ww
+ \exp_not:N \__int_value:w \exp_not:N \__int_eval:w - 1 +
+ }
+\cs_new:Npn \__tl_analysis_b_cs:Nww #1
+ {
+ 0 -1 \s__tl
+ \__tl_analysis_cs_space_count:NN \__tl_analysis_b_cs_test:ww #1
+ }
+\cs_new:Npn \__tl_analysis_b_cs_test:ww #1 ; #2 ; #3 ; #4 ;
+ {
+ \exp_after:wN \__tl_analysis_b_normals:ww
+ \__int_value:w \__int_eval:w
+ \if_int_compare:w #1 = 0 \exp_stop_f:
+ #3
+ \else:
+ \tex_skip:D \__int_eval:w #4 + #1 \__int_eval_end:
+ \fi:
+ - #2
+ \exp_after:wN ;
+ \__int_value:w \__int_eval:w #4 + #1 ;
+ }
+\group_begin:
+ \char_set_catcode_other:N A
+ \cs_new:Npn \__tl_analysis_b_special:w
+ \fi: \__tl_analysis_b_normal:wwN 0 ; #1 ;
+ {
+ \fi:
+ \if_int_compare:w #1 = \l__tl_analysis_index_int
+ \exp_after:wN \__prg_break:
+ \fi:
+ \tex_the:D \tex_toks:D #1 \s__tl
+ \if_case:w \etex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
+ A
+ \or: 1
+ \or: 1
+ \else: 2
+ \fi:
+ \if_int_odd:w \etex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
+ \exp_after:wN \__tl_analysis_b_special_char:wN \__int_value:w
+ \else:
+ \exp_after:wN \__tl_analysis_b_special_space:w \__int_value:w
+ \fi:
+ \__int_eval:w 1 + #1 \exp_after:wN ;
+ \token_to_str:N
+ }
+\group_end:
+\cs_new:Npn \__tl_analysis_b_special_char:wN #1 ; #2
+ {
+ \__int_value:w `#2 \s__tl
+ \__tl_analysis_b_loop:w #1 ;
+ }
+\cs_new:Npn \__tl_analysis_b_special_space:w #1 ; ~
+ {
+ 32 \s__tl
+ \__tl_analysis_b_loop:w #1 ;
+ }
+\cs_new_protected:Npn \__tl_analysis_map_inline:nn #1
+ {
+ \__tl_analysis:n {#1}
+ \int_gincr:N \g__prg_map_int
+ \exp_args:Nc \__tl_analysis_map_inline_aux:Nn
+ { __tl_analysis_map_inline_ \int_use:N \g__prg_map_int :wNw }
+ }
+\cs_new_protected:Npn \__tl_analysis_map_inline_aux:Nn #1#2
+ {
+ \cs_gset_protected:Npn #1 ##1 \s__tl ##2 ##3 \s__tl
+ {
+ \use_none:n ##2
+ #2
+ #1
+ }
+ \exp_after:wN #1
+ \g__tl_analysis_result_tl
+ \s__tl { ? \tl_map_break: } \s__tl
+ \__prg_break_point:Nn \tl_map_break: { \int_gdecr:N \g__prg_map_int }
+ }
+\cs_new_protected:Npn \tl_show_analysis:N #1
+ {
+ \tl_if_exist:NTF #1
+ {
+ \exp_args:No \__tl_analysis:n {#1}
+ \__msg_show_pre:nnxxxx { LaTeX / kernel } { show-tl-analysis }
+ { \token_to_str:N #1 } { \tl_if_empty:NTF #1 { } { ? } } { } { }
+ \__tl_analysis_show:
+ }
+ { \tl_show:N #1 }
+ }
+\cs_new_protected:Npn \tl_show_analysis:n #1
+ {
+ \__tl_analysis:n {#1}
+ \__msg_show_pre:nnxxxx { LaTeX / kernel } { show-tl-analysis }
+ { } { \tl_if_empty:nTF {#1} { } { ? } } { } { }
+ \__tl_analysis_show:
+ }
+\cs_new_protected:Npn \__tl_analysis_show:
+ {
+ \group_begin:
+ \exp_args:NNx
+ \group_end:
+ \__msg_show_wrap:n
+ {
+ \exp_after:wN \__tl_analysis_show_loop:wNw \g__tl_analysis_result_tl
+ \s__tl { ? \__prg_break: } \s__tl
+ \__prg_break_point:
+ }
+ }
+\cs_new:Npn \__tl_analysis_show_loop:wNw #1 \s__tl #2 #3 \s__tl
+ {
+ \use_none:n #2
+ \exp_not:n { \\ > \ \ }
+ \if_int_compare:w "#2 = 0 \exp_stop_f:
+ \exp_after:wN \__tl_analysis_show_cs:n
+ \else:
+ \if_int_compare:w "#2 = 13 \exp_stop_f:
+ \exp_after:wN \exp_after:wN
+ \exp_after:wN \__tl_analysis_show_active:n
+ \else:
+ \exp_after:wN \exp_after:wN
+ \exp_after:wN \__tl_analysis_show_normal:n
+ \fi:
+ \fi:
+ {#1}
+ \__tl_analysis_show_loop:wNw
+ }
+\cs_new:Npn \__tl_analysis_show_normal:n #1
+ {
+ \exp_after:wN \token_to_str:N #1 ~
+ ( \exp_after:wN \token_to_meaning:N #1 )
+ }
+\cs_new:Npn \__tl_analysis_show_value:N #1
+ {
+ \token_if_expandable:NF #1
+ {
+ \token_if_chardef:NTF #1 \__prg_break: { }
+ \token_if_mathchardef:NTF #1 \__prg_break: { }
+ \token_if_dim_register:NTF #1 \__prg_break: { }
+ \token_if_int_register:NTF #1 \__prg_break: { }
+ \token_if_skip_register:NTF #1 \__prg_break: { }
+ \token_if_toks_register:NTF #1 \__prg_break: { }
+ \use_none:nnn
+ \__prg_break_point:
+ \use:n { \exp_after:wN = \tex_the:D #1 }
+ }
+ }
+\cs_new:Npn \__tl_analysis_show_cs:n #1
+ { \exp_args:No \__tl_analysis_show_long:nn {#1} { control~sequence= } }
+\cs_new:Npn \__tl_analysis_show_active:n #1
+ { \exp_args:No \__tl_analysis_show_long:nn {#1} { active~character= } }
+\cs_new:Npn \__tl_analysis_show_long:nn #1
+ {
+ \__tl_analysis_show_long_aux:oofn
+ { \token_to_str:N #1 }
+ { \token_to_meaning:N #1 }
+ { \__tl_analysis_show_value:N #1 }
+ }
+\cs_new:Npn \__tl_analysis_show_long_aux:nnnn #1#2#3#4
+ {
+ \int_compare:nNnTF
+ { \str_count:n { #1 ~ ( #4 #2 #3 ) } }
+ > { \l_iow_line_count_int - 3 }
+ {
+ \str_range:nnn { #1 ~ ( #4 #2 #3 ) } { 1 }
+ {
+ \l_iow_line_count_int - 3
+ - \str_count:N \c__tl_analysis_show_etc_str
+ }
+ \c__tl_analysis_show_etc_str
+ }
+ { #1 ~ ( #4 #2 #3 ) }
+ }
+\cs_generate_variant:Nn \__tl_analysis_show_long_aux:nnnn { oof }
+\tl_const:Nx \c__tl_analysis_show_etc_str % (
+ { \token_to_str:N \ETC.) }
+\__msg_kernel_new:nnn { kernel } { show-tl-analysis }
+ {
+ The~token~list~ \tl_if_empty:nF {#1} { #1 ~ }
+ \tl_if_empty:nTF {#2}
+ { is~empty }
+ { contains~the~tokens: }
+ }
+%% File: l3regex.dtx Copyright (C) 2011-2017 The LaTeX3 Project
+\cs_new_protected:Npn \__regex_standard_escapechar:
+ { \int_set:Nn \tex_escapechar:D { `\\ } }
+\cs_new:Npn \__regex_toks_use:w { \tex_the:D \tex_toks:D }
+\cs_new_protected:Npn \__regex_toks_clear:N #1
+ { \tex_toks:D #1 { } }
+\cs_new_eq:NN \__regex_toks_set:Nn \tex_toks:D
+\cs_new_protected:Npn \__regex_toks_set:No #1
+ { \__regex_toks_set:Nn #1 \exp_after:wN }
+\cs_new_protected:Npn \__regex_toks_memcpy:NNn #1#2#3
+ {
+ \prg_replicate:nn {#3}
+ {
+ \tex_toks:D #1 = \tex_toks:D #2
+ \int_incr:N #1
+ \int_incr:N #2
+ }
+ }
+\cs_new_protected:Npn \__regex_toks_put_left:Nx #1#2
+ {
+ \cs_set:Npx \__regex_tmp:w { #2 }
+ \tex_toks:D #1 \exp_after:wN \exp_after:wN \exp_after:wN
+ { \exp_after:wN \__regex_tmp:w \tex_the:D \tex_toks:D #1 }
+ }
+\cs_new_protected:Npn \__regex_toks_put_right:Nx #1#2
+ {
+ \cs_set:Npx \__regex_tmp:w {#2}
+ \tex_toks:D #1 \exp_after:wN
+ { \tex_the:D \tex_toks:D \exp_after:wN #1 \__regex_tmp:w }
+ }
+\cs_new_protected:Npn \__regex_toks_put_right:Nn #1#2
+ { \tex_toks:D #1 \exp_after:wN { \tex_the:D \tex_toks:D #1 #2 } }
+\cs_new:Npn \__regex_current_cs_to_str:
+ {
+ \exp_after:wN \exp_after:wN \exp_after:wN \cs_to_str:N
+ \tex_the:D \tex_toks:D \l__regex_current_pos_int
+ }
+\cs_new:Npn \__regex_tmp:w { }
+\tl_new:N \l__regex_internal_a_tl
+\tl_new:N \l__regex_internal_b_tl
+\int_new:N \l__regex_internal_a_int
+\int_new:N \l__regex_internal_b_int
+\int_new:N \l__regex_internal_c_int
+\bool_new:N \l__regex_internal_bool
+\seq_new:N \l__regex_internal_seq
+\tl_new:N \g__regex_internal_tl
+\tl_const:Nn \c__regex_no_match_regex
+ {
+ \__regex_branch:n
+ { \__regex_class:NnnnN \c_true_bool { } { 1 } { 0 } \c_true_bool }
+ }
+\__intarray_new:Nn \g__regex_charcode_intarray { 65536 }
+\__intarray_new:Nn \g__regex_catcode_intarray { 65536 }
+\__intarray_new:Nn \g__regex_balance_intarray { 65536 }
+\int_new:N \l__regex_balance_int
+\tl_new:N \l__regex_cs_name_tl
+\int_const:Nn \c__regex_ascii_min_int { 0 }
+\int_const:Nn \c__regex_ascii_max_control_int { 31 }
+\int_const:Nn \c__regex_ascii_max_int { 127 }
+\int_const:Nn \c__regex_ascii_lower_int { `a - `A }
+\cs_new_protected:Npn \__regex_break_true:w
+ #1 \__regex_break_point:TF #2 #3 {#2}
+\cs_new_protected:Npn \__regex_break_point:TF #1 #2 { #2 }
+\cs_new_protected:Npn \__regex_item_reverse:n #1
+ {
+ #1
+ \__regex_break_point:TF { } \__regex_break_true:w
+ }
+\cs_new_protected:Npn \__regex_item_caseful_equal:n #1
+ {
+ \if_int_compare:w #1 = \l__regex_current_char_int
+ \exp_after:wN \__regex_break_true:w
+ \fi:
+ }
+\cs_new_protected:Npn \__regex_item_caseful_range:nn #1 #2
+ {
+ \reverse_if:N \if_int_compare:w #1 > \l__regex_current_char_int
+ \reverse_if:N \if_int_compare:w #2 < \l__regex_current_char_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
+ \fi:
+ \fi:
+ }
+\cs_new_protected:Npn \__regex_item_caseless_equal:n #1
+ {
+ \if_int_compare:w #1 = \l__regex_current_char_int
+ \exp_after:wN \__regex_break_true:w
+ \fi:
+ \if_int_compare:w \l__regex_case_changed_char_int = \c_max_int
+ \__regex_compute_case_changed_char:
+ \fi:
+ \if_int_compare:w #1 = \l__regex_case_changed_char_int
+ \exp_after:wN \__regex_break_true:w
+ \fi:
+ }
+\cs_new_protected:Npn \__regex_item_caseless_range:nn #1 #2
+ {
+ \reverse_if:N \if_int_compare:w #1 > \l__regex_current_char_int
+ \reverse_if:N \if_int_compare:w #2 < \l__regex_current_char_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
+ \fi:
+ \fi:
+ \if_int_compare:w \l__regex_case_changed_char_int = \c_max_int
+ \__regex_compute_case_changed_char:
+ \fi:
+ \reverse_if:N \if_int_compare:w #1 > \l__regex_case_changed_char_int
+ \reverse_if:N \if_int_compare:w #2 < \l__regex_case_changed_char_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
+ \fi:
+ \fi:
+ }
+\cs_new_protected:Npn \__regex_compute_case_changed_char:
+ {
+ \int_set_eq:NN \l__regex_case_changed_char_int \l__regex_current_char_int
+ \if_int_compare:w \l__regex_current_char_int > `Z \exp_stop_f:
+ \if_int_compare:w \l__regex_current_char_int > `z \exp_stop_f: \else:
+ \if_int_compare:w \l__regex_current_char_int < `a \exp_stop_f: \else:
+ \int_sub:Nn \l__regex_case_changed_char_int { \c__regex_ascii_lower_int }
+ \fi:
+ \fi:
+ \else:
+ \if_int_compare:w \l__regex_current_char_int < `A \exp_stop_f: \else:
+ \int_add:Nn \l__regex_case_changed_char_int { \c__regex_ascii_lower_int }
+ \fi:
+ \fi:
+ }
+\cs_new_eq:NN \__regex_item_equal:n ?
+\cs_new_eq:NN \__regex_item_range:nn ?
+\cs_new_protected:Npn \__regex_item_catcode:
+ {
+ "
+ \if_case:w \l__regex_current_catcode_int
+ 1 \or: 4 \or: 10 \or: 40
+ \or: 100 \or: \or: 1000 \or: 4000
+ \or: 10000 \or: \or: 100000 \or: 400000
+ \or: 1000000 \or: 4000000 \else: 1*0
+ \fi:
+ }
+\cs_new_protected:Npn \__regex_item_catcode:nT #1
+ {
+ \if_int_odd:w \__int_eval:w #1 / \__regex_item_catcode: \__int_eval_end:
+ \exp_after:wN \use:n
+ \else:
+ \exp_after:wN \use_none:n
+ \fi:
+ }
+\cs_new_protected:Npn \__regex_item_catcode_reverse:nT #1#2
+ { \__regex_item_catcode:nT {#1} { \__regex_item_reverse:n {#2} } }
+\cs_new_protected:Npn \__regex_item_exact:nn #1#2
+ {
+ \if_int_compare:w #1 = \l__regex_current_catcode_int
+ \if_int_compare:w #2 = \l__regex_current_char_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \__regex_break_true:w
+ \fi:
+ \fi:
+ }
+\cs_new_protected:Npn \__regex_item_exact_cs:n #1
+ {
+ \int_compare:nNnTF \l__regex_current_catcode_int = 0
+ {
+ \tl_set:Nx \l__regex_internal_a_tl
+ { \scan_stop: \__regex_current_cs_to_str: \scan_stop: }
+ \tl_if_in:noTF { \scan_stop: #1 \scan_stop: } \l__regex_internal_a_tl
+ { \__regex_break_true:w } { }
+ }
+ { }
+ }
+\cs_new_protected:Npn \__regex_item_cs:n #1
+ {
+ \int_compare:nNnT \l__regex_current_catcode_int = 0
+ {
+ \group_begin:
+ \tl_set:Nx \l__regex_cs_name_tl { \__regex_current_cs_to_str: }
+ \__regex_single_match:
+ \__regex_disable_submatches:
+ \__regex_build_for_cs:n {#1}
+ \bool_set_eq:NN \l__regex_saved_success_bool \g__regex_success_bool
+ \exp_args:NV \__regex_match:n \l__regex_cs_name_tl
+ \if_meaning:w \c_true_bool \g__regex_success_bool
+ \group_insert_after:N \__regex_break_true:w
+ \fi:
+ \bool_gset_eq:NN \g__regex_success_bool \l__regex_saved_success_bool
+ \group_end:
+ }
+ }
+\cs_new_protected:Npn \__regex_prop_d:
+ { \__regex_item_caseful_range:nn { `0 } { `9 } }
+\cs_new_protected:Npn \__regex_prop_h:
+ {
+ \__regex_item_caseful_equal:n { `\ }
+ \__regex_item_caseful_equal:n { `\^^I }
+ }
+\cs_new_protected:Npn \__regex_prop_s:
+ {
+ \__regex_item_caseful_equal:n { `\ }
+ \__regex_item_caseful_equal:n { `\^^I }
+ \__regex_item_caseful_equal:n { `\^^J }
+ \__regex_item_caseful_equal:n { `\^^L }
+ \__regex_item_caseful_equal:n { `\^^M }
+ }
+\cs_new_protected:Npn \__regex_prop_v:
+ { \__regex_item_caseful_range:nn { `\^^J } { `\^^M } } % lf, vtab, ff, cr
+\cs_new_protected:Npn \__regex_prop_w:
+ {
+ \__regex_item_caseful_range:nn { `a } { `z }
+ \__regex_item_caseful_range:nn { `A } { `Z }
+ \__regex_item_caseful_range:nn { `0 } { `9 }
+ \__regex_item_caseful_equal:n { `_ }
+ }
+\cs_new_protected:Npn \__regex_prop_N:
+ {
+ \__regex_item_reverse:n
+ { \__regex_item_caseful_equal:n { `\^^J } }
+ }
+\cs_new_protected:Npn \__regex_posix_alnum:
+ { \__regex_posix_alpha: \__regex_posix_digit: }
+\cs_new_protected:Npn \__regex_posix_alpha:
+ { \__regex_posix_lower: \__regex_posix_upper: }
+\cs_new_protected:Npn \__regex_posix_ascii:
+ {
+ \__regex_item_caseful_range:nn
+ \c__regex_ascii_min_int
+ \c__regex_ascii_max_int
+ }
+\cs_new_eq:NN \__regex_posix_blank: \__regex_prop_h:
+\cs_new_protected:Npn \__regex_posix_cntrl:
+ {
+ \__regex_item_caseful_range:nn
+ \c__regex_ascii_min_int
+ \c__regex_ascii_max_control_int
+ \__regex_item_caseful_equal:n \c__regex_ascii_max_int
+ }
+\cs_new_eq:NN \__regex_posix_digit: \__regex_prop_d:
+\cs_new_protected:Npn \__regex_posix_graph:
+ { \__regex_item_caseful_range:nn { `! } { `\~ } }
+\cs_new_protected:Npn \__regex_posix_lower:
+ { \__regex_item_caseful_range:nn { `a } { `z } }
+\cs_new_protected:Npn \__regex_posix_print:
+ { \__regex_item_caseful_range:nn { `\ } { `\~ } }
+\cs_new_protected:Npn \__regex_posix_punct:
+ {
+ \__regex_item_caseful_range:nn { `! } { `/ }
+ \__regex_item_caseful_range:nn { `: } { `@ }
+ \__regex_item_caseful_range:nn { `[ } { `` }
+ \__regex_item_caseful_range:nn { `\{ } { `\~ }
+ }
+\cs_new_protected:Npn \__regex_posix_space:
+ {
+ \__regex_item_caseful_equal:n { `\ }
+ \__regex_item_caseful_range:nn { `\^^I } { `\^^M }
+ }
+\cs_new_protected:Npn \__regex_posix_upper:
+ { \__regex_item_caseful_range:nn { `A } { `Z } }
+\cs_new_eq:NN \__regex_posix_word: \__regex_prop_w:
+\cs_new_protected:Npn \__regex_posix_xdigit:
+ {
+ \__regex_posix_digit:
+ \__regex_item_caseful_range:nn { `A } { `F }
+ \__regex_item_caseful_range:nn { `a } { `f }
+ }
+\cs_new_protected:Npn \__regex_escape_use:nnnn #1#2#3#4
+ {
+ \__tl_build:Nw \l__regex_internal_a_tl
+ \cs_set:Npn \__regex_escape_unescaped:N ##1 { #1 }
+ \cs_set:Npn \__regex_escape_escaped:N ##1 { #2 }
+ \cs_set:Npn \__regex_escape_raw:N ##1 { #3 }
+ \__regex_standard_escapechar:
+ \tl_gset:Nx \g__regex_internal_tl { \__str_to_other_fast:n {#4} }
+ \tl_set:Nx \l__regex_internal_b_tl
+ {
+ \exp_after:wN \__regex_escape_loop:N \g__regex_internal_tl
+ { break } \__prg_break_point:
+ }
+ \__tl_build_one:o \l__regex_internal_b_tl
+ \__tl_build_end:
+ \l__regex_internal_a_tl
+ }
+\cs_new:Npn \__regex_escape_loop:N #1
+ {
+ \cs_if_exist_use:cF { __regex_escape_\token_to_str:N #1:w }
+ { \__regex_escape_unescaped:N #1 }
+ \__regex_escape_loop:N
+ }
+\cs_new:cpn { __regex_escape_ \c_backslash_str :w }
+ \__regex_escape_loop:N #1
+ {
+ \cs_if_exist_use:cF { __regex_escape_/\token_to_str:N #1:w }
+ { \__regex_escape_escaped:N #1 }
+ \__regex_escape_loop:N
+ }
+\cs_new_eq:NN \__regex_escape_unescaped:N ?
+\cs_new_eq:NN \__regex_escape_escaped:N ?
+\cs_new_eq:NN \__regex_escape_raw:N ?
+\cs_new_eq:NN \__regex_escape_break:w \__prg_break:
+\cs_new:cpn { __regex_escape_/break:w }
+ {
+ \if_false: { \fi: }
+ \__msg_kernel_error:nn { regex } { trailing-backslash }
+ \exp_after:wN \use_none:n \exp_after:wN { \if_false: } \fi:
+ }
+\cs_new:cpn { __regex_escape_~:w } { }
+\cs_new:cpx { __regex_escape_/a:w }
+ { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^G }
+\cs_new:cpx { __regex_escape_/t:w }
+ { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^I }
+\cs_new:cpx { __regex_escape_/n:w }
+ { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^J }
+\cs_new:cpx { __regex_escape_/f:w }
+ { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^L }
+\cs_new:cpx { __regex_escape_/r:w }
+ { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^M }
+\cs_new:cpx { __regex_escape_/e:w }
+ { \exp_not:N \__regex_escape_raw:N \iow_char:N \^^[ }
+\cs_new:cpn { __regex_escape_/x:w } \__regex_escape_loop:N
+ {
+ \exp_after:wN \__regex_escape_x_end:w
+ \__int_value:w "0 \__regex_escape_x_test:N
+ }
+\cs_new:Npn \__regex_escape_x_end:w #1 ;
+ {
+ \int_compare:nNnTF {#1} > \c_max_char_int
+ {
+ \if_false: { \fi: }
+ \__tl_build_one:o \l__regex_internal_b_tl
+ \__msg_kernel_error:nnx { regex } { x-overflow } {#1}
+ \tl_set:Nx \l__regex_internal_b_tl
+ { \if_false: } \fi:
+ }
+ {
+ \exp_last_unbraced:Nf \__regex_escape_raw:N
+ { \char_generate:nn {#1} { 12 } }
+ }
+ }
+\cs_new:Npn \__regex_escape_x_test:N #1
+ {
+ \str_if_eq_x:nnTF {#1} { break } { ; }
+ {
+ \if_charcode:w \c_space_token #1
+ \exp_after:wN \__regex_escape_x_test:N
+ \else:
+ \exp_after:wN \__regex_escape_x_testii:N
+ \exp_after:wN #1
+ \fi:
+ }
+ }
+\cs_new:Npn \__regex_escape_x_testii:N #1
+ {
+ \if_charcode:w \c_left_brace_str #1
+ \exp_after:wN \__regex_escape_x_loop:N
+ \else:
+ \__regex_hexadecimal_use:NTF #1
+ { \exp_after:wN \__regex_escape_x:N }
+ { ; \exp_after:wN \__regex_escape_loop:N \exp_after:wN #1 }
+ \fi:
+ }
+\cs_new:Npn \__regex_escape_x:N #1
+ {
+ \str_if_eq_x:nnTF {#1} { break } { ; }
+ {
+ \__regex_hexadecimal_use:NTF #1
+ { ; \__regex_escape_loop:N }
+ { ; \__regex_escape_loop:N #1 }
+ }
+ }
+\cs_new:Npn \__regex_escape_x_loop:N #1
+ {
+ \str_if_eq_x:nnTF {#1} { break }
+ { ; \__regex_escape_x_loop_error:n { } {#1} }
+ {
+ \__regex_hexadecimal_use:NTF #1
+ { \__regex_escape_x_loop:N }
+ {
+ \token_if_eq_charcode:NNTF \c_space_token #1
+ { \__regex_escape_x_loop:N }
+ {
+ ;
+ \exp_after:wN
+ \token_if_eq_charcode:NNTF \c_right_brace_str #1
+ { \__regex_escape_loop:N }
+ { \__regex_escape_x_loop_error:n {#1} }
+ }
+ }
+ }
+ }
+\cs_new:Npn \__regex_escape_x_loop_error:n #1
+ {
+ \if_false: { \fi: }
+ \__tl_build_one:o \l__regex_internal_b_tl
+ \__msg_kernel_error:nnx { regex } { x-missing-rbrace } {#1}
+ \tl_set:Nx \l__regex_internal_b_tl
+ { \if_false: } \fi: \__regex_escape_loop:N #1
+ }
+\prg_new_conditional:Npnn \__regex_hexadecimal_use:N #1 { TF }
+ {
+ \if_int_compare:w 1 < "1 \token_to_str:N #1 \exp_stop_f:
+ #1 \prg_return_true:
+ \else:
+ \if_case:w \__int_eval:w
+ \exp_after:wN ` \token_to_str:N #1 - `a
+ \__int_eval_end:
+ A
+ \or: B
+ \or: C
+ \or: D
+ \or: E
+ \or: F
+ \else:
+ \prg_return_false:
+ \exp_after:wN \use_none:n
+ \fi:
+ \prg_return_true:
+ \fi:
+ }
+\prg_new_conditional:Npnn \__regex_char_if_special:N #1 { TF }
+ {
+ \if_int_compare:w `#1 > `Z \exp_stop_f:
+ \if_int_compare:w `#1 > `z \exp_stop_f:
+ \if_int_compare:w `#1 < \c__regex_ascii_max_int
+ \prg_return_true: \else: \prg_return_false: \fi:
+ \else:
+ \if_int_compare:w `#1 < `a \exp_stop_f:
+ \prg_return_true: \else: \prg_return_false: \fi:
+ \fi:
+ \else:
+ \if_int_compare:w `#1 > `9 \exp_stop_f:
+ \if_int_compare:w `#1 < `A \exp_stop_f:
+ \prg_return_true: \else: \prg_return_false: \fi:
+ \else:
+ \if_int_compare:w `#1 < `0 \exp_stop_f:
+ \if_int_compare:w `#1 < `\ \exp_stop_f:
+ \prg_return_false: \else: \prg_return_true: \fi:
+ \else: \prg_return_false: \fi:
+ \fi:
+ \fi:
+ }
+\prg_new_conditional:Npnn \__regex_char_if_alphanumeric:N #1 { TF }
+ {
+ \if_int_compare:w `#1 > `Z \exp_stop_f:
+ \if_int_compare:w `#1 > `z \exp_stop_f:
+ \prg_return_false:
+ \else:
+ \if_int_compare:w `#1 < `a \exp_stop_f:
+ \prg_return_false: \else: \prg_return_true: \fi:
+ \fi:
+ \else:
+ \if_int_compare:w `#1 > `9 \exp_stop_f:
+ \if_int_compare:w `#1 < `A \exp_stop_f:
+ \prg_return_false: \else: \prg_return_true: \fi:
+ \else:
+ \if_int_compare:w `#1 < `0 \exp_stop_f:
+ \prg_return_false: \else: \prg_return_true: \fi:
+ \fi:
+ \fi:
+ }
+\int_new:N \l__regex_group_level_int
+\int_new:N \l__regex_mode_int
+\int_const:Nn \c__regex_cs_in_class_mode_int { -6 }
+\int_const:Nn \c__regex_cs_mode_int { -2 }
+\int_const:Nn \c__regex_outer_mode_int { 0 }
+\int_const:Nn \c__regex_catcode_mode_int { 2 }
+\int_const:Nn \c__regex_class_mode_int { 3 }
+\int_const:Nn \c__regex_catcode_in_class_mode_int { 6 }
+\int_new:N \l__regex_catcodes_int
+\int_new:N \l__regex_default_catcodes_int
+\bool_new:N \l__regex_catcodes_bool
+\int_const:Nn \c__regex_catcode_C_int { "1 }
+\int_const:Nn \c__regex_catcode_B_int { "4 }
+\int_const:Nn \c__regex_catcode_E_int { "10 }
+\int_const:Nn \c__regex_catcode_M_int { "40 }
+\int_const:Nn \c__regex_catcode_T_int { "100 }
+\int_const:Nn \c__regex_catcode_P_int { "1000 }
+\int_const:Nn \c__regex_catcode_U_int { "4000 }
+\int_const:Nn \c__regex_catcode_D_int { "10000 }
+\int_const:Nn \c__regex_catcode_S_int { "100000 }
+\int_const:Nn \c__regex_catcode_L_int { "400000 }
+\int_const:Nn \c__regex_catcode_O_int { "1000000 }
+\int_const:Nn \c__regex_catcode_A_int { "4000000 }
+\int_const:Nn \c__regex_all_catcodes_int { "5515155 }
+\cs_new_eq:NN \l__regex_internal_regex \c__regex_no_match_regex
+\seq_new:N \l__regex_show_prefix_seq
+\int_new:N \l__regex_show_lines_int
+\cs_new_protected:Npn \__regex_get_digits:NTFw #1#2#3#4#5
+ {
+ \__regex_if_raw_digit:NNTF #4 #5
+ { #1 = #5 \__regex_get_digits_loop:nw {#2} }
+ { #3 #4 #5 }
+ }
+\cs_new:Npn \__regex_get_digits_loop:nw #1#2#3
+ {
+ \__regex_if_raw_digit:NNTF #2 #3
+ { #3 \__regex_get_digits_loop:nw {#1} }
+ { \scan_stop: #1 #2 #3 }
+ }
+\prg_new_conditional:Npnn \__regex_if_raw_digit:NN #1#2 { TF }
+ {
+ \if_meaning:w \__regex_compile_raw:N #1
+ \if_int_compare:w 1 < 1 #2 \exp_stop_f:
+ \prg_return_true:
+ \else:
+ \prg_return_false:
+ \fi:
+ \else:
+ \prg_return_false:
+ \fi:
+ }
+\cs_new:Npn \__regex_if_in_class:TF
+ {
+ \if_int_odd:w \l__regex_mode_int
+ \exp_after:wN \use_i:nn
+ \else:
+ \exp_after:wN \use_ii:nn
+ \fi:
+ }
+\cs_new:Npn \__regex_if_in_cs:TF
+ {
+ \if_int_odd:w \l__regex_mode_int
+ \exp_after:wN \use_ii:nn
+ \else:
+ \if_int_compare:w \l__regex_mode_int < \c__regex_outer_mode_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \use_i:nn
+ \else:
+ \exp_after:wN \exp_after:wN \exp_after:wN \use_ii:nn
+ \fi:
+ \fi:
+ }
+\cs_new:Npn \__regex_if_in_class_or_catcode:TF
+ {
+ \if_int_odd:w \l__regex_mode_int
+ \exp_after:wN \use_i:nn
+ \else:
+ \if_int_compare:w \l__regex_mode_int > \c__regex_outer_mode_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \use_i:nn
+ \else:
+ \exp_after:wN \exp_after:wN \exp_after:wN \use_ii:nn
+ \fi:
+ \fi:
+ }
+\cs_new:Npn \__regex_if_within_catcode:TF
+ {
+ \if_int_compare:w \l__regex_mode_int > \c__regex_outer_mode_int
+ \exp_after:wN \use_i:nn
+ \else:
+ \exp_after:wN \use_ii:nn
+ \fi:
+ }
+\cs_new_protected:Npn \__regex_chk_c_allowed:T
+ {
+ \if_int_compare:w \l__regex_mode_int = \c__regex_outer_mode_int
+ \exp_after:wN \use:n
+ \else:
+ \if_int_compare:w \l__regex_mode_int = \c__regex_class_mode_int
+ \exp_after:wN \exp_after:wN \exp_after:wN \use:n
+ \else:
+ \__msg_kernel_error:nn { regex } { c-bad-mode }
+ \exp_after:wN \exp_after:wN \exp_after:wN \use_none:n
+ \fi:
+ \fi:
+ }
+\cs_new_protected:Npn \__regex_mode_quit_c:
+ {
+ \if_int_compare:w \l__regex_mode_int = \c__regex_catcode_mode_int
+ \int_set_eq:NN \l__regex_mode_int \c__regex_outer_mode_int
+ \else:
+ \if_int_compare:w \l__regex_mode_int = \c__regex_catcode_in_class_mode_int
+ \int_set_eq:NN \l__regex_mode_int \c__regex_class_mode_int
+ \fi:
+ \fi:
+ }
+\cs_new_protected:Npn \__regex_compile:w
+ {
+ \__tl_build_x:Nw \l__regex_internal_regex
+ \int_zero:N \l__regex_group_level_int
+ \int_set_eq:NN \l__regex_default_catcodes_int \c__regex_all_catcodes_int
+ \int_set_eq:NN \l__regex_catcodes_int \l__regex_default_catcodes_int
+ \cs_set:Npn \__regex_item_equal:n { \__regex_item_caseful_equal:n }
+ \cs_set:Npn \__regex_item_range:nn { \__regex_item_caseful_range:nn }
+ \__tl_build_one:n { \__regex_branch:n { \if_false: } \fi: }
+ }
+\cs_new_protected:Npn \__regex_compile_end:
+ {
+ \__regex_if_in_class:TF
+ {
+ \__msg_kernel_error:nn { regex } { missing-rbrack }
+ \use:c { __regex_compile_]: }
+ \prg_do_nothing: \prg_do_nothing:
+ }
+ { }
+ \if_int_compare:w \l__regex_group_level_int > 0 \exp_stop_f:
+ \__msg_kernel_error:nnx { regex } { missing-rparen }
+ { \int_use:N \l__regex_group_level_int }
+ \prg_replicate:nn
+ { \l__regex_group_level_int }
+ {
+ \__tl_build_one:n
+ {
+ \if_false: { \fi: }
+ \if_false: { \fi: } { 1 } { 0 } \c_true_bool
+ }
+ \__tl_build_end:
+ \__tl_build_one:o \l__regex_internal_regex
+ }
+ \fi:
+ \__tl_build_one:n { \if_false: { \fi: } }
+ \__tl_build_end:
+ }
+\cs_new_protected:Npn \__regex_compile:n #1
+ {
+ \__regex_compile:w
+ \__regex_standard_escapechar:
+ \int_set_eq:NN \l__regex_mode_int \c__regex_outer_mode_int
+ \__regex_escape_use:nnnn
+ {
+ \__regex_char_if_special:NTF ##1
+ \__regex_compile_special:N \__regex_compile_raw:N ##1
+ }
+ {
+ \__regex_char_if_alphanumeric:NTF ##1
+ \__regex_compile_escaped:N \__regex_compile_raw:N ##1
+ }
+ { \__regex_compile_raw:N ##1 }
+ { #1 }
+ \prg_do_