texlive[49609] Master/texmf-dist: tagpdf (5jan19)

commits+karl at tug.org commits+karl at tug.org
Sat Jan 5 23:41:39 CET 2019


Revision: 49609
          http://tug.org/svn/texlive?view=revision&revision=49609
Author:   karl
Date:     2019-01-05 23:41:39 +0100 (Sat, 05 Jan 2019)
Log Message:
-----------
tagpdf (5jan19)

Modified Paths:
--------------
    trunk/Master/texmf-dist/doc/latex/tagpdf/README.md
    trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-koma.pdf
    trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-koma.tex
    trunk/Master/texmf-dist/doc/latex/tagpdf/ex-softhyphen.pdf
    trunk/Master/texmf-dist/doc/latex/tagpdf/ex-tagpdf-template.pdf
    trunk/Master/texmf-dist/doc/latex/tagpdf/tagpdf.pdf
    trunk/Master/texmf-dist/doc/latex/tagpdf/tagpdf.tex
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-checks-code.sty
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-luatex.def
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-generic.sty
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-lua.sty
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-shared.sty
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-pdftex.def
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-roles-code.sty
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-struct-code.sty
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-tree-code.sty
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-user.sty
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf.lua
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf.sty

Added Paths:
-----------
    trunk/Master/texmf-dist/doc/latex/tagpdf/ex-attribute.pdf
    trunk/Master/texmf-dist/doc/latex/tagpdf/ex-attribute.tex
    trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-memoir.pdf
    trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-memoir.tex
    trunk/Master/texmf-dist/doc/latex/tagpdf/ex-spaceglyph-listings.pdf
    trunk/Master/texmf-dist/doc/latex/tagpdf/ex-spaceglyph-listings.tex
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-attr-code.sty
    trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-space-code.sty

Modified: trunk/Master/texmf-dist/doc/latex/tagpdf/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/latex/tagpdf/README.md	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/doc/latex/tagpdf/README.md	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,6 +1,6 @@
 #tagpdf — A package to create tagged pdf
-Packageversion: 0.3 
-Packagedate: 2018/08/06
+Packageversion: 0.50 
+Packagedate: 2019/01/04
 Author: Ulrike Fischer
 
 ## License
@@ -16,8 +16,11 @@
 - tagpdf-mc-code-shared.sty  (internal code)
 - tagpdf-mc-code-generic.sty  (internal code)
 - tagpdf-mc-code-lua.sty  (internal code)
-- tagpdf-stuct-code.sty  (internal code)
+- tagpdf-struct-code.sty  (internal code)
 - tagpdf-tree-code.sty   (internal code)
+- tagpdf-space-code.sty  (internal code)
+- tagpdf-attr-code.sty   (internal code)
+- tagpdf-checks-code.sty (internal code)
 - tagpdf-user.sty        (user commands) 
 - tagpdf-pdftex.def      (pdftex specific code)
 - tagpdf-luatex.def      (luatex specific code)

Added: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-attribute.pdf
===================================================================
(Binary files differ)

Index: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-attribute.pdf
===================================================================
--- trunk/Master/texmf-dist/doc/latex/tagpdf/ex-attribute.pdf	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/doc/latex/tagpdf/ex-attribute.pdf	2019-01-05 22:41:39 UTC (rev 49609)

Property changes on: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-attribute.pdf
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+application/pdf
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-attribute.tex
===================================================================
--- trunk/Master/texmf-dist/doc/latex/tagpdf/ex-attribute.tex	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/latex/tagpdf/ex-attribute.tex	2019-01-05 22:41:39 UTC (rev 49609)
@@ -0,0 +1,63 @@
+% !Mode:: "TeX:DE:UTF-8:Main"
+\documentclass{scrartcl}
+\usepackage[english]{babel}
+\usepackage{tagpdf}
+\tagpdfsetup{activate-all,uncompress}
+
+\ifluatex\else
+\pdfcatalog{/Lang (en-UK)}
+\pdfinfo {/Title (Title)}
+\fi
+
+\tagpdfsetup
+{
+ newattribute = {TH-col} { <</Owner /Table /Scope /Column>> },
+ newattribute = {TH-row} { <</Owner /Table /Scope /Row>>    },
+ newattribute = {TH-both}{ <</Owner /Table /Scope /Both>>   },
+ newattribute = {Lay-borderdotted}{ <</Owner /Layout /Borderstyle /Dotted >>   },
+}
+\begin{document}
+\pagestyle{empty} %
+\tagstructbegin{tag=Document}
+
+\tagstructbegin{tag=Table}
+\begin{center}
+\begin{tabular}{ l | c | }
+\hline
+\tagstructbegin{tag=THead}
+ \tagstructbegin{tag=TR}
+  \tagstructbegin{tag=TH,attribute= {TH-both,Lay-borderdotted}}
+   \tagmcbegin{tag=TH}
+   h1
+   \tagmcend
+   \tagstructend &
+  \tagstructbegin{tag=TH,attribute-class= {TH-col,Lay-borderdotted}}
+    \tagmcbegin{tag=TH}
+    h2
+    \tagmcend
+  \tagstructend
+ \tagstructend %TR
+ \tagstructend %Thead
+  \\ \hline
+ \tagstructbegin{tag=TBody}
+   \tagstructbegin{tag=TR}
+    \tagstructbegin{tag=TD}
+    \tagmcbegin{tag=TD}
+    r1
+    \tagmcend
+    \tagstructend %TD
+&
+    \tagstructbegin{tag=TD}
+    \tagmcbegin{tag=TD}
+    h2
+    \tagmcend
+    \tagstructend
+  \tagstructend %TR
+ \tagstructend %TBody
+    \\ \hline
+  \end{tabular}
+\end{center}
+\tagstructend
+
+\tagstructend
+\end{document} 
\ No newline at end of file


Property changes on: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-attribute.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-koma.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-koma.tex
===================================================================
--- trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-koma.tex	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-koma.tex	2019-01-05 22:41:39 UTC (rev 49609)
@@ -11,7 +11,6 @@
 \documentclass{scrbook}
 \usepackage[english]{babel}
 
-
 \usepackage{tagpdf}
 
 \tagpdfifpdftexT
@@ -112,11 +111,11 @@
 
 \ExplSyntaxOn
 \prop_new:N   \g_tag_section_level_prop
-\prop_put:Nnn \g_tag_section_level_prop {chapter}{H1}
-\prop_put:Nnn \g_tag_section_level_prop {section}{H2}
-\prop_put:Nnn \g_tag_section_level_prop {subsection}{H3}
-\prop_put:Nnn \g_tag_section_level_prop {subsubsection}{H4}
-\prop_put:Nnn \g_tag_section_level_prop {paragraph}{H5}
+\prop_gput:Nnn \g_tag_section_level_prop {chapter}{H1}
+\prop_gput:Nnn \g_tag_section_level_prop {section}{H2}
+\prop_gput:Nnn \g_tag_section_level_prop {subsection}{H3}
+\prop_gput:Nnn \g_tag_section_level_prop {subsubsection}{H4}
+\prop_gput:Nnn \g_tag_section_level_prop {paragraph}{H5}
 
 \renewcommand{\chapterlinesformat}[3]
  {

Added: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-memoir.pdf
===================================================================
(Binary files differ)

Index: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-memoir.pdf
===================================================================
--- trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-memoir.pdf	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-memoir.pdf	2019-01-05 22:41:39 UTC (rev 49609)

Property changes on: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-memoir.pdf
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+application/pdf
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-memoir.tex
===================================================================
--- trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-memoir.tex	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-memoir.tex	2019-01-05 22:41:39 UTC (rev 49609)
@@ -0,0 +1,252 @@
+% !Mode:: "TeX:DE:UTF-8:Main"
+
+\documentclass[a4paper,oneside]{memoir}
+\settocdepth{subsubsection}
+\setsecnumdepth{subsubsection}
+
+\usepackage[english]{babel}
+
+
+\usepackage{tagpdf}
+
+\tagpdfifpdftexT
+ {
+  %set language / can also be done with hyperref
+  \pdfcatalog{/Lang (en-US)}
+  \usepackage[T1]{fontenc}
+ }
+
+\tagpdfifluatexT
+ {
+  %set language / can also be done with hyperref
+  \pdfextension catalog{/Lang (en-US)}
+  \usepackage{fontspec}
+  \usepackage{luacode}
+ }
+
+
+\tagpdfsetup{tabsorder=structure,
+             activate-all,
+             uncompress
+             }
+
+\usepackage{lipsum}%\textheight3cm
+
+\usepackage{etoolbox}
+
+\ifdef\cfttocbeforelisthook{
+  \appto\cfttocbeforelisthook{\tagstructbegin{tag=TOC}}%\noindent TOC\par}
+}{
+  \csdef\cfttocbeforelisthook{\tagstructbegin{tag=TOC}}%\noindent TOC\par}
+}
+\ifdef\cfttocafterlisthook{
+  \appto\cfttocafterlisthook{\tagstructend}%\noindent /TOC\par}
+}{
+  \csdef\cfttocafterlisthook{\tagstructend}%\noindent /TOC\par}
+}
+
+\def\M{{\fontsize{3pt}{3pt}\sffamily\selectfont NN}}
+
+%what about unnumbered entries in the toc??????
+\appto\cftchapterpresnum{%
+     \tagstructbegin{tag=TOCI}% is there a better place? what about unnumbered entries?
+     \tagstructbegin{tag=Lbl}%
+     \tagmcbegin{tag=Lbl}%
+}
+\appto\cftchapteraftersnum{%
+  \tagmcend
+  \tagstructend}
+
+\appto\cftsectionpresnum{%
+    \tagstructbegin{tag=TOCI}% is there a better place? what about unnumbered entries?
+    \tagstructbegin{tag=Lbl}%
+    \tagmcbegin{tag=Lbl}%
+}
+\appto\cftsectionaftersnum{%
+ \tagmcend
+ \tagstructend}
+ 
+\appto\cftsubsectionpresnum{%
+    \tagstructbegin{tag=TOCI}% is there a better place? what about unnumbered entries?
+    \tagstructbegin{tag=Lbl}%
+    \tagmcbegin{tag=Lbl}%
+ }
+\appto\cftsubsectionaftersnum{%
+ \tagmcend
+ \tagstructend}
+\appto\cftsubsubsectionpresnum{%
+    \tagstructbegin{tag=TOCI}% is there a better place? what about unnumbered entries?
+    \tagstructbegin{tag=Lbl}%
+    \tagmcbegin{tag=Lbl}%
+}
+\appto\cftsubsubsectionaftersnum{%
+ \tagmcend
+ \tagstructend}
+
+%begin of entry
+\appto\cftchapteraftersnumb{%
+     \tagstructbegin{tag=P}%
+     \tagmcbegin{tag=P}}
+\appto\cftsectionaftersnumb{%
+     \tagstructbegin{tag=P}%
+     \tagmcbegin{tag=P}}
+\appto\cftsubsectionaftersnumb{%
+     \tagstructbegin{tag=P}%
+     \tagmcbegin{tag=P}}
+\appto\cftsubsubsectionaftersnumb{%
+     \tagstructbegin{tag=P}%
+     \tagmcbegin{tag=P}}
+
+%end of entry
+\pretocmd\cftchapterfillnum{%
+ \tagmcend
+ \tagstructend
+}{}{}
+\pretocmd\cftsectionfillnum{
+ \tagmcend
+ \tagstructend}{}{}
+\pretocmd\cftsubsectionfillnum{
+ \tagmcend
+ \tagstructend}{}{}
+\pretocmd\cftsubsubsectionfillnum{%
+ \tagmcend
+ \tagstructend}{}{}
+
+
+\makeatletter
+\patchcmd\cftchapterformatpnum{\cftchapterpagefont #1}{%
+  %\llap{\M pnum}%
+  \tagstructbegin{tag=Reference}%
+   \tagmcbegin{tag=Reference}
+  \cftchapterpagefont #1%
+  %\rlap{\M pnum}%
+  \tagmcend
+  \tagstructend
+  \tagstructend %for /TOCI. is there a better place?
+}{}{}
+\patchcmd\cftsectionformatpnum{\@nameuse{cftsectionpagefont}#1}{%
+  %\llap{\M pnum}%
+  \tagstructbegin{tag=Reference}%
+   \tagmcbegin{tag=Reference}
+   \@nameuse{cftsectionpagefont}#1%
+  %\rlap{\M pnum}%
+  \tagmcend
+ \tagstructend
+ \tagstructend %for /TOCI. is there a better place?
+}{}{\typeout{patch failed}}
+\patchcmd\cftsubsectionformatpnum{\@nameuse{cftsubsectionpagefont}#1}{%
+  %\llap{\M}%
+  \tagstructbegin{tag=Reference}%
+   \tagmcbegin{tag=Reference}
+   \@nameuse{cftsubsectionpagefont}#1%
+  %\rlap{\M}%
+   \tagmcend
+  \tagstructend
+  \tagstructend %for /TOCI. is there a better place?
+}{}{\typeout{patch failed}}
+
+\patchcmd\cftsubsubsectionformatpnum{\@nameuse{cftsubsubsectionpagefont}#1}{%
+  %\llap{\M}%
+  \tagstructbegin{tag=Reference}%
+   \tagmcbegin{tag=Reference}
+   \@nameuse{cftsubsubsectionpagefont}#1%
+  %\rlap{\M}%
+   \tagmcend
+  \tagstructend
+  \tagstructend %for /TOCI. is there a better place?
+}{}{\typeout{patch failed}}
+
+
+% section is typeset using \M at sect
+\ExplSyntaxOn
+\prop_new:N   \g_tag_section_level_prop
+\prop_put:Nnn \g_tag_section_level_prop {chapter}{H1}
+\prop_put:Nnn \g_tag_section_level_prop {section}{H2}
+\prop_put:Nnn \g_tag_section_level_prop {Starredsection}{H2}
+\prop_put:Nnn \g_tag_section_level_prop {subsection}{H3}
+\prop_put:Nnn \g_tag_section_level_prop {subsubsection}{H4}
+\prop_put:Nnn \g_tag_section_level_prop {paragraph}{H5}
+\ExplSyntaxOff
+\patchcmd\M at sect{#9\@@par}{%
+ %this place could be used for something between number and text
+  #9% 
+     \tagmcend
+     \tagstructend
+  \@@par%
+}{}{}
+
+% section* use \@ssect, but memoir redefines it a little before
+% calling the kernel version
+
+\ExplSyntaxOn
+\patchcmd\@ssect{\@mem at old@ssect{#1}{#2}{#3}{#4}{#5}}{%
+  \@mem at old@ssect{#1}{#2}{#3}{#4}{%
+  \tagstructbegin{tag=H2}% how to get level???
+  \tagmcbegin    {tag=H2}% how to get level???
+   #5
+  \tagmcend
+  \tagstructend}%
+}{}{}
+
+% section numbers are typeset using \@seccntformat
+
+
+\pretocmd\@seccntformat{%
+ \tagstructbegin{tag=\prop_item:Nn\g_tag_section_level_prop{#1}}%
+ \tagmcbegin    {tag=\prop_item:Nn\g_tag_section_level_prop{#1}}
+}{}{}
+\ExplSyntaxOff
+% could be used to insert something after the number ...
+%\patchcmd\@seccntformat{\endcsname}{%
+%  \endcsname\rlap{\M}%
+%}{}{}
+
+\renewcommand{\cftdot}{\tagmcbegin{artifact}.\tagmcend}
+
+\makeatother
+ % I don't want to handle header/footer now
+\pagestyle{empty}
+\makeevenfoot{plain}{}{}{}
+\makeoddfoot{plain}{}{}{}
+
+
+%% missing yet: chapter titles
+\begin{document}
+
+\tagstructbegin{tag=Document}
+
+\tableofcontents*
+
+\chapter{Test chapter}
+
+\section{Test section}
+
+ \tagstructbegin{tag=P}
+   \tagmcbegin{tag=P}
+test
+   \tagmcend
+ \tagstructend
+
+
+\subsection{Test subsection}
+
+ \tagstructbegin{tag=P}
+   \tagmcbegin{tag=P}
+test
+   \tagmcend
+ \tagstructend
+
+\subsubsection{Test subsubsection}
+
+ \tagstructbegin{tag=P}
+   \tagmcbegin{tag=P}
+test
+   \tagmcend
+ \tagstructend
+
+\section*{Starred section}
+
+\tagstructend
+\end{document}
+
+


Property changes on: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-patch-sectioning-memoir.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-softhyphen.pdf
===================================================================
(Binary files differ)

Added: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-spaceglyph-listings.pdf
===================================================================
(Binary files differ)

Index: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-spaceglyph-listings.pdf
===================================================================
--- trunk/Master/texmf-dist/doc/latex/tagpdf/ex-spaceglyph-listings.pdf	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/doc/latex/tagpdf/ex-spaceglyph-listings.pdf	2019-01-05 22:41:39 UTC (rev 49609)

Property changes on: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-spaceglyph-listings.pdf
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+application/pdf
\ No newline at end of property
Added: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-spaceglyph-listings.tex
===================================================================
--- trunk/Master/texmf-dist/doc/latex/tagpdf/ex-spaceglyph-listings.tex	                        (rev 0)
+++ trunk/Master/texmf-dist/doc/latex/tagpdf/ex-spaceglyph-listings.tex	2019-01-05 22:41:39 UTC (rev 49609)
@@ -0,0 +1,21 @@
+% !Mode:: "TeX:DE:UTF-8:Main"
+\documentclass{article}
+\usepackage{amsmath}
+\usepackage[ngerman]{babel}
+\usepackage{tagpdf,listings}
+\makeatletter
+%\def\lst at outputspace{\pdffakespace\ } %not so good
+% this here works fine with luatex, if the font has a space glyph:
+\def\lst at visiblespace{\lst at ttfamily{\char32}{\char32}}
+% with pdftex this could work
+%\def\lst at visiblespace{\lst at ttfamily{\pdffakespace\ }{\pdffakespace\ }}
+\tagpdfsetup{activate-all,uncompress,interwordspace=on,show-spaces,log=v}
+\begin{document}\pagestyle{empty}
+\tagstructbegin{tag=Document}
+\tagmcbegin{tag=P}
+\begin{lstlisting}[showspaces]
+aaa aaa    xxxx
+\end{lstlisting}
+\tagmcend
+\tagstructend
+\end{document}
\ No newline at end of file


Property changes on: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-spaceglyph-listings.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/doc/latex/tagpdf/ex-tagpdf-template.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/latex/tagpdf/tagpdf.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/latex/tagpdf/tagpdf.tex
===================================================================
--- trunk/Master/texmf-dist/doc/latex/tagpdf/tagpdf.tex	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/doc/latex/tagpdf/tagpdf.tex	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,10 +1,9 @@
 % !Mode:: "TeX:DE:UTF-8:Main"
 \makeatletter
-\def\UlrikeFischer at package@version{0.3}
-\def\UlrikeFischer at package@date{2018/08/06}
+\def\UlrikeFischer at package@version{0.50}
+\def\UlrikeFischer at package@date{2019/01/04}
 \makeatother
 \documentclass[DIV=12,parskip=half-,bibliography=totoc]{scrartcl}
-\usepackage[utf8]{inputenc}
 \usepackage[T1]{fontenc}
 \usepackage[english]{babel}
 \usepackage[autostyle]{csquotes}
@@ -145,6 +144,11 @@
 
 I no longer try to (pdf-)escape the tag names: it is a bit unclear how to do it at best with luatex. This will perhaps later change again.
 
+\subsection{Changes in 0.5}
+I added code to handle attributes and attribute classes, see section~\ref{sec:attributes} and corrected a small number of code errors.
+
+I added code to add \enquote{real} space glyphs to the pdf, see section \ref{sec:spacechars}.
+
 \section{Setup}
 
 \minisec{Activation needed!}
@@ -172,19 +176,23 @@
 
 The key-val list understands the following keys:
 \begin{description}
-\item[\PrintKeyName{activate-mc} ] Boolean, initially false. Activates the code related to marked content.
+\item[\PrintKeyName{activate-all}] Boolean, initially false. Activates everything, that's normally the sensible thing to do.
+\item[\PrintKeyName{activate-mc}] Boolean, initially false. Activates the code related to marked content.
 \item[\PrintKeyName{activate-struct}] Boolean, initially false. Activates the code related to structures. Should be used only if \PrintKeyName{activate-mc} has been used too.
 \item[\PrintKeyName{activate-tree}] Boolean, initially false. Activates the code related to trees. Should be used only if the two other keys has been used too.
-\item[\PrintKeyName{activate-all}] Boolean, initially false. Activates everything, that normally the sensible thing to do.
-\item[\PrintKeyName{add-new-tag}] See section \ref{sec:new-tag} for a description.
-\item[\PrintKeyName{check-tags}] Boolean, initially true. Activates some safety checks (but doesn't very much currently. It will perhaps be merged with the log-level key).
+\item[\PrintKeyName{add-new-tag}] Allows to define new tag names, see section \ref{sec:new-tag} for a description.
+\item[\PrintKeyName{check-tags}] Boolean, initially true. Activates some safety checks (but doesn't do very much currently. It will perhaps be merged with the log-level key).
 \item[\PrintKeyName{compresslevel}] Value is an integer between 0 and 9. It sets both the pdfcompresslevel and the pdfobjcompresslevel.
 \item[\PrintKeyName{inputencoding}]  Some keys convert their value and so need to know the encoding of the file. The default encoding is utf8.  When using pdflatex you can switch to another encoding with this key. The value should be one of names allowed by \texttt{l3str}. There is no checking that the name is valid (I don't think that it sensible to use something else than utf8).
+\item[\PrintKeyName{interwordspace}] Choice key, possible values are \PrintKeyName{true}/""\PrintKeyName{on} and \PrintKeyName{false}/\PrintKeyName{off}. The key activates/deactivates the insertion of space glyphs, see section~\ref{sec:spacechars}.
 \item[\PrintKeyName{log}] Choice key, possible values \PrintKeyName{none}, \PrintKeyName{v}, \PrintKeyName{vv}, \PrintKeyName{vvv},  \PrintKeyName{all}.  Setups the log level.  Changing the value affects currently mostly the luamode: \enquote{higher} values gives more messages in the log. The current levels and messages have been setup in a quite ad-hoc manner and will need improvement.
+\item[\PrintKeyName{newattribute}] This key takes two arguments and declares an attribute. See \ref{sec:attributes}.
+\item[\PrintKeyName{show-spaces}] Boolean.\marginnote{luamode} That's a debug option, it helps to see where space glyph will be inserted if \PrintKeyName{interwordspace} is activated.
 \item[\PrintKeyName{tabsorder}] Choice key, possible values are \PrintKeyName{row}, \PrintKeyName{column}, \PrintKeyName{structure}, \PrintKeyName{none}.  This decides if a \verb+/Tabs+ value is written to the dictionary of the page objects. Not really needed for tagging itself, but one of the things you probably need for accessibility checks. So I added it. Currently the tabsorder is the same for all pages. Perhaps this should be changed \ldots.
-\item[\PrintKeyName{tagunmarked}] Boolean,\marginnote{luamode} initially true. When this boolean is true, the lua code will try to mark everything that has not been marked yet as an artifact. The benefit is that one doesn't have to mark up every deco rule oneself. The danger is that it perhaps marks things that shouldn't be marked -- it hasn't been tested yet with complicated documents containing annotations etc.
+\item[\PrintKeyName{tagunmarked}] Boolean,\marginnote{luamode} initially true. When this boolean is true, the lua code will try to mark everything that has not been marked yet as an artifact. The benefit is that one doesn't have to mark up every deco rule oneself. The danger is that it perhaps marks things that shouldn't be marked -- it hasn't been tested yet with complicated documents containing annotations etc. See also section~\ref{sec:lazy} for a discussion about automatic tagging.
 \item[\PrintKeyName{uncompress}] Equivalent to using \texttt{compresslevel=0}.
 
+
 \end{description}
 
 \section{Tagging}
@@ -206,7 +214,7 @@
 endstream
 \end{lstlisting}
 
-From this stream one can extract the characters and their placement on the page but not their semantic meaning  (the first line is actually a section heading, the last the page number). And while in the example the order is correct there is actually no garanty that the stream contains the text in the order it should be read.
+From this stream one can extract the characters and their placement on the page but not their semantic meaning  (the first line is actually a section heading, the last the page number). And while in the example the order is correct there is actually no guaranty that the stream contains the text in the order it should be read.
 
 Tagging means to enrich the pdf with information about the \emph{semantic} meaning and the \emph{reading order}. (Tagging can do more, one can also store all sorts of layout information like font properties and indentation with tags. But as I already wrote this package concentrates on the part of tagging that is needed to improve accessibility.)
 
@@ -216,7 +224,7 @@
 To tag a pdf three tasks must be  carried out:
 
 \begin{enumerate}
-\item \textbf{The mark-content-task}:\marginnote{mc-task} The document must add \enquote{labels} to the page stream which allows to identify and reference the various chunks of text and other content.  This is the most difficult part of tagging -- both for the document writer but also for the package code. At first there can be quite many chunks as every one is a leaf node of the structure and so often a rather small unit.  At second the chunks must be defined page-wise -- and this is not easy when you don't know where the page breaks are. At last some text is created automatically, e.g. the toc, references, citations, list numbers etc and it is not always easy to mark them correctly.
+\item \textbf{The mark-content-task}:\marginnote{mc-task} The document must add \enquote{labels} to the page stream which allows to identify and reference the various chunks of text and other content.  This is the most difficult part of tagging -- both for the document writer but also for the package code. At first there can be quite many chunks as every one is a leaf node of the structure and so often a rather small unit.  At second the chunks must be defined page-wise -- and this is not easy when you don't know where the page breaks are. Also in a standard document a lot text is created automatically, e.g. the toc, references, citations, list numbers etc and it is not always easy to mark them correctly.
 
 \item \textbf{The structure-task}:\marginnote{struct-task}  The document must declare the structure. This means marking the start and end of semantically connected portions of the document (correctly nested as a tree). This too means some work for the document writer, but less than for the mc-task: at first quite often the mc-task and the structure-task can be combined, e.g. when you mark up a list number or a tabular cell or a section header; at second one doesn't have to worry about page breaks so quite often one can patch standard environments to declare the structure. On the other side a number of structures end in \LaTeX\ only implicitly -- e.g. an item ends at the next item, so getting the pdf structure right still means that additional mark up must be added.
 
@@ -336,10 +344,10 @@
 
   \item The literals are inserted directly and not at shipout. This means that due to the asynchronous page breaking of \TeX\ the MCID-number can be wrong even if the counter is reset at every page (this package uses in generic mode a label-ref-system to get around this problem. This sadly means that three compilations are needed until everything has settled down).
 
-  \item There exist environments that process their content more than once -- examples are \texttt{align} and \texttt{tabularx}.
+  \item There exist environments which process their content more than once -- examples are \texttt{align} and \texttt{tabularx}.
        So one has to check for doublettes and holes in the counting system.
 
-  \item Pdf is a page oriented format. And this means that the start and the end marker must be on the same page \ldots\ \emph{so what to do with normal paragraphs that split over pages??}. This question will be handled in subsection~\ref{sec:splitpara}.
+  \item Pdf is a page oriented format. And this means that the start and the end marker must be on the same page \ldots\ \emph{so what to do with normal paragraphs that split over pages??}. This question will be discussed in subsection~\ref{sec:splitpara}.
 \end{enumerate}
 
 \subsubsection{Generic mode versus lua mode in the mc-task}
@@ -352,7 +360,7 @@
 \item Pagebreaks between start and end of the marker are \emph{not} a problem. So you can mark a complete paragraph. If a pagebreak occur directly after an start marker or before an end marker this can lead to empty chunks in the pdf and so bloat up pdf a bit, but this is imho not really a problem (compared to the size increase by the rest of the tagging).
 \item The commands don't insert literals directly and so affect line and page breaking much less.
 \item The numbering of the MCID are done at shipout, so no label/ref system is needed.
-\item The code can do some marking automatically. Currently everything that has not been marked up by the document is marked as artifact. This can probably be extended and improved.
+\item The code can do some marking automatically. Currently everything that has not been marked up by the document is marked as artifact.
 \end{enumerate}
 
 \subsubsection{Commands to mark content and chunks}
@@ -370,7 +378,7 @@
 
 The key-val list understands the following keys:
 \begin{description}
-  \item[\PrintKeyName{tag}] This is required, unless you use the \PrintKeyName{artifact} key. The value of the key is normally one of the standard type listed in section \ref{sec:new-tag}. It is possible to setup new tags, see the same section. The value of the key is expanded, so it can be a command. The expansion is passed unchanged to the pdf, so it should with a starting slash give a valid pdf name  (some ascii with numbers like \texttt{H4} is fine).
+  \item[\PrintKeyName{tag}] This is required, unless you use the \PrintKeyName{artifact} key. The value of the key is normally one of the standard type listed in section \ref{sec:new-tag} (without a slash at the begin, this is added by the code). It is possible to setup new tags, see the same section. The value of the key is expanded, so it can be a command. The expansion is passed unchanged to the pdf, so it should with a starting slash give a valid pdf name  (some ascii with numbers like \texttt{H4} is fine).
 
   \item[\PrintKeyName{artifact}] This will setup the marked content as an artifact. The key should be used for content that should be ignored. The key can take one of the values \PrintKeyName{pagination}, \PrintKeyName{layout},  \PrintKeyName{page},  \PrintKeyName{background} and \PrintKeyName{notype} (this is the default). Text in the header and footer should be marked with \PrintKeyName{artifact=pagination}.
 
@@ -380,13 +388,13 @@
 
       The\marginnote{lua mode only} lua mode will mark up everything unmarked as \texttt{artifact=notype}. You can suppress this behaviour by setting the tagpdfsetup key \texttt{tagunmarked} to false. See section \ref{ssec:setup}.
 
-  \item[\PrintKeyName{stash}] Normally marked content will be stored in the \enquote{current} structure. This may not be what you want. As an example you may perhaps want to put a marginnote behind or before the paragraph it is in the tex-code. With this boolean key the content is marked but not stored in the kid-key of the current  structure.
+  \item[\PrintKeyName{stash}] Normally marked content will be stored in the \enquote{current} structure. This may not be what you want. As an example you may perhaps want to put a marginnote behind or before the paragraph it is in the tex-code. With this boolean key the content is marked but not stored in the kid-key of the current structure.
 
   \item[\PrintKeyName{label}] This key sets a label by which you can call the marked content later in another structure (if it has been stashed with the previous key). Internally the label name will start with \texttt{tagpdf-}.
 
-  \item[\PrintKeyName]{alttext}] This key inserts an \texttt{/Alt} value in the property dictionary of the BDC operator. See section~\ref{sec:alttext}. The value is handled as verbatim string, commands are not expanded.
+  \item[\PrintKeyName{alttext}] This key inserts an \texttt{/Alt} value in the property dictionary of the BDC operator. See section~\ref{sec:alttext}. The value is handled as verbatim string, commands are not expanded.
 
-  \item[\PrintKeyName]{alttext-o} This key inserts an \texttt{/Alt} value in the property dictionary of the BDC operator. See section~\ref{sec:alttext}. The value is handled as verbatim string like the key \PrintKeyName]{alttext} but expanded once (the \texttt{o} refers to the \texttt{o} type in expl3). That means that you do something like this:
+  \item[\PrintKeyName{alttext-o}] This key inserts an \texttt{/Alt} value in the property dictionary of the BDC operator. See section~\ref{sec:alttext}. The value is handled as verbatim string like the key \PrintKeyName{alttext} but expanded once (the \texttt{o} refers to the \texttt{o} type in expl3). That means that you can do something like this:
 
       \begin{lstlisting}
       \newcommand\myalttext{\frac{a}{b}}
@@ -395,9 +403,9 @@
 
       and it will insert \verb+\frac{a}{b}+  (hex encoded) in the pdf.
 
-  \item[\PrintKeyName]{actualtext}] This key inserts an \texttt{/ActualText} value in the property dictionary of the BDC operator. See section~\ref{sec:alttext}. The value is handled as verbatim string, commands are not expanded.
+  \item[\PrintKeyName{actualtext}] This key inserts an \texttt{/ActualText} value in the property dictionary of the BDC operator. See section~\ref{sec:alttext}. The value is handled as verbatim string, commands are not expanded.
 
-  \item[\PrintKeyName]{actualtext-o} This key inserts an \texttt{/ActualText} value in the property dictionary of the BDC operator. See section~\ref{sec:alttext}. The value is handled as verbatim string like the key \PrintKeyName]{actualtext} but expanded once (the \texttt{o} refers to the \texttt{o} type in expl3). That means that you do something like this:
+  \item[\PrintKeyName{actualtext-o}] This key inserts an \texttt{/ActualText} value in the property dictionary of the BDC operator. See section~\ref{sec:alttext}. The value is handled as verbatim string like the key \PrintKeyName{actualtext} but expanded once (the \texttt{o} refers to the \texttt{o} type in expl3). That means that you can do something like this:
 
       \begin{lstlisting}
       \newcommand\myactualtext{X}
@@ -406,7 +414,7 @@
 
       and it will insert \verb+X+ (hex encoded)  in the pdf.
 
-  \item[\PrintKeyName{raw}] This key allows you to add more entries to the properties dictionary. The value must be correct, low-level pdf. E.g. \verb+raw=/Alt (Hello)+ will insert an alternative Text. (I will probably add keys for \texttt{/Alt} and \texttt{/Actualtext} later, but I haven't made up my mind regarding the encoding yes).
+  \item[\PrintKeyName{raw}] This key allows you to add more entries to the properties dictionary. The value must be correct, low-level pdf. E.g. \verb+raw=/Alt (Hello)+ will insert an alternative Text.
 \end{description}
 
 \ExplSyntaxOn
@@ -537,9 +545,9 @@
   \item[\PrintKeyName{tag}] This is required. The value of the key is normally one of the standard type listed in section \ref{sec:new-tag}. It is possible to setup new tags/types, see the same section.
   \item[\PrintKeyName{stash}] Normally a new structure inserts itself as a kid into the currently active structure. This key prohibits this. The structure is nevertheless from now on \enquote{the current active structure} and parent for following  marked content and structures.
   \item[\PrintKeyName{label}] This key sets a label by which you can use the structure later in another structure. Internally the label name will start with \texttt{tagpdfstruct-}.
-  \item[\PrintKeyName{alttext}] This key inserts an \texttt{/Alt} value in the dictionary of structure object. See section~\ref{sec:alttext}. The value is handled as verbatim string and hex encoded.
+  \item[\PrintKeyName{alttext}] This key inserts an \texttt{/Alt} value in the dictionary of structure object, see section~\ref{sec:alttext}. The value is handled as verbatim string and hex encoded.
 
-  \item[\PrintKeyName{alttext-o}] This key inserts an \texttt{/Alt} value in the dictionary of structure object. See section~\ref{sec:alttext}. The value is handled as verbatim string like the key \PrintKeyName]{alttext} but expanded once (the \texttt{o} refers to the \texttt{o} type in expl3). That means that you do something like this:
+  \item[\PrintKeyName{alttext-o}] This key inserts an \texttt{/Alt} value in the dictionary of a structure object,  see section~\ref{sec:alttext}. The value is handled as verbatim string like the key \PrintKeyName{alttext} but expanded once (the \texttt{o} refers to the \texttt{o} type in expl3). That means that you can do something like this:
 
       \begin{lstlisting}
       \newcommand\myalttext{\frac{a}{b}}
@@ -548,9 +556,9 @@
 
       and it will insert \verb+\frac{a}{b}+  (hex encoded) in the pdf.
 
-  \item[\PrintKeyName{actualtext}] This key inserts an \texttt{/ActualText} value in the dictionary of structure object. See section~\ref{sec:alttext}. The value is handled as verbatim string, commands are not expanded.
+  \item[\PrintKeyName{actualtext}] This key inserts an \texttt{/ActualText} value in the dictionary of structure object,  see section~\ref{sec:alttext}. The value is handled as verbatim string, commands are not expanded.
 
-  \item[\PrintKeyName{actualtext-o}] This key inserts an \texttt{/ActualText} value in the dictionary of structure object. See section~\ref{sec:alttext}. The value is handled as verbatim string like the key \PrintKeyName]{actualtext} but expanded once (the \texttt{o} refers to the \texttt{o} type in expl3). That means that you do something like this:
+  \item[\PrintKeyName{actualtext-o}] This key inserts an \texttt{/ActualText} value in the dictionary of structure object, see section~\ref{sec:alttext}. The value is handled as verbatim string like the key \PrintKeyName{actualtext} but expanded once (the \texttt{o} refers to the \texttt{o} type in expl3). That means that you can do something like this:
 
       \begin{lstlisting}
       \newcommand\myactualtext{X}
@@ -558,11 +566,20 @@
       \end{lstlisting}
 
       and it will insert \verb+X+ (hex encoded)  in the pdf.
+    \item[\PrintKeyName{attribute}] This key takes as argument a comma list of attribute names (use braces to protect the commas from the external key-val parser) and allows to add one or more attribute dictionary entries in the structure object. As an example
+      \begin{lstlisting}
+      \tagstructbegin{tag=TH,attribute= TH-row}
+      \end{lstlisting}. See also section~\ref{sec:attributes}.
+  \item[\PrintKeyName{attribute-class}] This key takes as argument a comma list of attribute names (use braces to protect the commas from the external key-val parser) and allows to add one or more attribute classes to the structure object. As an example
+      \begin{lstlisting}
+      \tagstructbegin{tag=TH,attribute-class= TH-row}
+      \end{lstlisting}. See also section~\ref{sec:attributes}.
+
   \item[\PrintKeyName{title}] This key allows to set the dictionary entry \texttt{/Title} in the structure object.
       The value is handled as verbatim string and hex encoded. Commands are not expanded.
   \item[\PrintKeyName{title-o}] This key allows to set the dictionary entry \texttt{/Title} in the structure object.
       The value is expanded once and then handled as verbatim string like the \PrintKeyName{title} key.
- \end{description}
+\end{description}
 
 
 \ExplSyntaxOn
@@ -570,7 +587,7 @@
 \DescribeMacro\uftag_struct_end:
 \ExplSyntaxOff
 
-This ends a structure. They don't end a group and it doesn't matter if they are in another group as the starting commands.
+These commands end a structure. They don't end a group and it doesn't matter if they are in another group as the starting commands.
 
 \ExplSyntaxOn
 \DescribeMacro\tagstructuse{<label>}
@@ -583,7 +600,41 @@
 
 A document should have at least one structure which contains the whole document. A suitable tag is \texttt{Document} or \texttt{Article}. I'm considering to automatically inserting it.
 
+\subsubsection{Attributes and attribute classes}\label{sec:attributes}
 
+Structure Element can have so-called attributes. A single attribute is a dictionary\footnote{or a stream but this is currently not supported by the package as I don't know an use-case} with at least the required key \verb+/Owner+ which describes the scope the attribute applies too.
+As an example here an attribute that can be attached to tabular header (type TH) and adds the info that the header is a column header:
+\begin{lstlisting}
+<</Owner /Table /Scope /Column>>
+\end{lstlisting}
+
+One or more such attributes can be attached to a structure element. It is also possible to store such an attribute under a symbolic name in a so-called \enquote{ClassedMap} and then to attached references to such classes to a structure.
+
+To use such attributes you must at first declare it in \verb+\tagpdfsetup+ with the key \texttt{newattribute}. This key takes two argument, a name and the content of the attribute.
+The name should be a sensible key name, the content a dictionary.
+\begin{lstlisting}
+\tagpdfsetup
+ {
+  newattribute =
+   {TH-col}{<</Owner /Table /Scope /Column>>},
+  newattribute =
+   {TH-row}{<</Owner /Table /Scope /Row>>},
+  }
+\end{lstlisting}
+
+Attributes are only written to the pdf when used, so it is not a problem to predeclare a number of standard attributes.
+
+It is your responsability that the content of the dictionary is valid pdf and that the values are sensible!
+
+Attributes can then be used with the key \PrintKeyName{attribute} or \PrintKeyName{attribute-class} which both take a comma list of attribute names as argument\footnote{That's not really a sensible example}:
+\begin{lstlisting}
+\tagstructbegin{tag=TH,
+ attribute-class= {TH-row,TH-col},
+ attribute      = {TH-row,TH-col},
+ }
+\end{lstlisting}
+
+
 \subsection{Task 3: tree Management}
 When all the document content has been correctly marked and the data for the trees has been collected they must be flushed to the pdf. This is done automatically (if the package has been activated) with the following command in \verb+\AfterEndDocument+:
 
@@ -591,13 +642,13 @@
 \DescribeMacro\uftag_finish_structure:
 \ExplSyntaxOff
 
-This will hopefully write all the needed objects and values to the pdf. (Beside the already mentioned  \texttt{StructTreeRoot} and \texttt{StructElem} objects, additionally a so-called \texttt{ParentTree} is needed which records the parents of all the marked contents bits, a \texttt{Rolemap} and a few more values and dictionaries).
+This will hopefully write all the needed objects and values to the pdf. (Beside the already mentioned  \texttt{StructTreeRoot} and \texttt{StructElem} objects, additionally a so-called \texttt{ParentTree} is needed which records the parents of all the marked contents bits, a \texttt{Rolemap}, perhaps a \texttt{ClassMap} and object for the attributes, and a few more values and dictionaries).
 
 I'm not quite sure if this shouldn't be a really internal command.
 
 
 \subsection{A fully marked up document body}
-The following shows the marking need for a section, a sentence and a list with two items. It is obvious that one wouldn't want to do like this for real documents. If tagging should be usable, the commands must be hidden as much as possible inside suitable \LaTeX\ commands and enviroments.
+The following shows the marking needed for a section, a sentence and a list with two items. It is obvious that one wouldn't like to have to do this for real documents. If tagging should be usable, the commands must be hidden as much as possible inside suitable \LaTeX\ commands and enviroments.
 
 \begin{lstlisting}
 \begin{document}
@@ -649,6 +700,16 @@
 \end{document}
 \end{lstlisting}
 
+\subsection{Lazy and automatic tagging}\label{sec:lazy}
+
+A number of features of pdf readers need a fully tagged pdf. As an example screen readers tend to ignore alternative text (see section~\ref{sec:alttext}) if the pdf is not fully tagged. Also reflowing a pdf only works for me (even if real space chars are in the pdf) if the pdf is fully tagged.
+
+This means that even if you don't care about a proper structure you should try to add at least some minimal tagging. With pdflatex this is not easy due to the page break problem. But with lualatex you can use an \texttt{Document} structure and inside it rather large mc-chunks. This minimizes the needed work.
+
+One could ask if in lua mode the code couldn't try to mark up unmarked parts e.g. as P-type chunks, like it marks them up as artifacts currently. Sadly this is not so easy, as it is quite difficult to reliably identify the structure and the place in the kids array where such chunks belongs too. I also don't think that it is really needed. It is not so difficult to define user macros which e.g. opens a structure and start an mc-chunk or which close an open mc-chunk before issuing the next \verb+\tagmcbegin+.
+
+
+
 \section{Alternative text, ActualText and text-to-speech software}\label{sec:alttext}
 
 The pdf format allows to add alternative text through the  \PrintKeyName{/Alt} and the \PrintKeyName{/ActualText} key\footnote{There is also the \PrintKeyName{/E} key for acronyms but I will ignore it for now}. Both can be added either to the marked content in the page stream or to the object describing the structure.
@@ -685,10 +746,30 @@
 
 New tags can be defined in the setup command with the key \texttt{add-new-tag}. It takes a value consisting of two names separated by a slash. The first is the new name, the second a known (e.g. a standard) tag it should be mapped too. Example:
 
-\Macro\tagpdfsetup{add-new-type = section/H1}
+\Macro\tagpdfsetup{add-new-tag = section/H1}
 
 
 
+\section{\enquote{Real} space glyphs}\label{sec:spacechars}
+
+TeX uses only spaces (horizontal movements) to separate words. That means that a pdf reader has to use some heuristic when copying text or reflowing the text to decide if a space is meant as a word boundary or e.g. as a kerning. Accessible document should use real space glyphs (U+0032) from a font in such places.
+
+With the key \PrintKeyName{interwordspace} you can activate such space glyphs.
+
+With pdftex this will simply call the primitive \verb+\pdfinterwordspaceon+. pdftex will then insert at various places a char from a font called dummy-space. Attention! This means that at every space there are additional font switches in the pdf: from the current font to the dummy-space font and back again. This will make the pdf larger. As \verb+\pdfinterwordspaceon+ is a primitive function it can't be fine tuned or adapted. You can only turn it on and off and insert manually such a space glyph with \verb+\pdffakespace+.
+
+With luatex (in luamode) interwordspace is implemented with a lua-function which is inserted in two callbacks and marks up the places where it seems sensible to inter a space glyph. Later in the process (when also the mc-markers are inserted) the space glyphs are injected -- the code will take the glyph from the current font if this has a space glyph or switch to the default latin modern font. The current code works reasonable well in normal text.
+
+The key \PrintKeyName{show-spaces} will show lines at the places where in lua mode spaces are inserted and so can help you to find problematic places. For listings -- which have a quite specific handling of spaces -- you can find a suggestion in the example \texttt{ex-space-glyph-listings}.
+
+\emph{Attention:} Even with real spaces copy\& pasting of code doesn't need to give the correct results: you get spaces but not necessarly the right number of spaces. The pdf viewers I tried all copied four real space glyphs as one space. I only got the four spaces with the export to text or xml in the AdobePro.
+
+\DescribeMacro\pdffakespace
+
+This is in pdftex a primitive. It inserts the dummy space glyph. \pkg{tagpdf} defines this command also for luatex -- attention if can perhaps insert break points.
+
+
+
 \section{Accessibility is not only tagging}
 
  A tagged pdf is needed for accessibility but this is not enough. As already mentioned there are more requirements:
@@ -702,13 +783,11 @@
       \end{verbatim}
  and perhaps some\verb+\pdfglyphtounicode+ commands.
  \item Hard and soft hyphen must be distinct.
- \item Spaces between words should be space glyphs and not only a horizontal movement.
+ \item Spaces between words should be space glyphs and not only a horizontal movement. See section~\ref{sec:spacechars}.
  \item Various small infos must be present in the catalog dictionary, info dictionary and the page dictionaries, e.g. metadata like title.
 \end{itemize}
  If suitable I will add code for this tasks to this packages. But some of them can also be done already with existing packages like hyperref, hyperxmp, pdfx.
 
-
-
 \section{To-do}
 \begin{itemize}
 \item Add commands and keys to enable/disable the checks.
@@ -720,7 +799,6 @@
 \item Examples
 \item Write more Tests
 \item Write more Tests
-\item \enquote{Fake spaces}
 \item Unicode
 \item Hyphenation char
 \item Think about included (tagged) pdf. Can one handle them?

Added: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-attr-code.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-attr-code.sty	                        (rev 0)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-attr-code.sty	2019-01-05 22:41:39 UTC (rev 49609)
@@ -0,0 +1,110 @@
+\ProvidesExplPackage {tagpdf-attr-code} {2019/01/04} {0.50}
+ {part of tagpdf - code related to attributes and attribute classes}
+
+% the obj is written in tagpdf-tree-code.
+
+\seq_new:N  \g__uftag_attr_class_used_seq
+\prop_new:N \g__uftag_attr_objnum_prop %will contain obj num of used attributes
+
+\prop_new:N \g__uftag_attr_entries_prop
+\tl_new:N   \g__uftag_attr_class_content_tl
+\tl_new:N   \l__uftag_attr_objtmp_tl
+\tl_new:N   \l__uftag_attr_value_tl
+
+
+\cs_new_protected:Nn \__uftag_attr_new_entry:nn %#1:name, #2: content
+ {
+  \prop_gput:Nnn \g__uftag_attr_entries_prop
+   {#1}{#2}
+ }
+
+\keys_define:nn { uftag / setup }
+ {
+  newattribute .code:n =
+   {
+    \__uftag_attr_new_entry:nn #1
+   }
+ }
+
+
+% the key for the structure:
+\keys_define:nn { uftag / struct }
+{
+ attribute-class .code:n =
+ {
+   \clist_set:No \l_tmpa_clist { #1 }
+   \seq_set_from_clist:NN \l_tmpa_seq \l_tmpa_clist
+   \seq_map_inline:Nn \l_tmpa_seq
+    {
+     \prop_if_in:NnF \g__uftag_attr_entries_prop {##1}
+      {
+       \msg_error:nnn { uftag } { attr-unknown } { ##1 }
+      }
+     \seq_gput_left:Nn\g__uftag_attr_class_used_seq { ##1}
+    }
+   \seq_set_map:NNn \l_tmpb_seq \l_tmpa_seq
+   {
+     /##1
+   }
+  \tl_set:Nx \l_tmpa_tl
+   {
+    \int_compare:nT { \seq_count:N \l_tmpa_seq > 1 }{[}
+    \seq_use:Nn \l_tmpb_seq  { \c_space_tl  }
+    \int_compare:nT { \seq_count:N \l_tmpa_seq > 1 }{]}
+   }
+   \int_compare:nT { \seq_count:N \l_tmpa_seq > 0 }
+    {
+      \__uftag_prop_gput:cnx
+       { g__uftag_struct_\int_eval:n {\c at g__uftag_struct_abs_int}_prop }
+       { C }
+       { \l_tmpa_tl }
+      %\prop_show:c  { g__uftag_struct_\int_eval:n {\c at g__uftag_struct_abs_int}_prop }
+    }
+   }
+}
+
+\keys_define:nn { uftag / struct }
+ {
+  attribute .code:n  = % A property (attribute, value currently a dictionary)
+   {
+    \clist_set:No          \l_tmpa_clist { #1 }
+    \seq_set_from_clist:NN \l_tmpa_seq \l_tmpa_clist
+    \tl_set:Nx \l__uftag_attr_value_tl
+    {
+     \int_compare:nT { \seq_count:N \l_tmpa_seq > 1 }{[}
+    }
+    \seq_map_inline:Nn \l_tmpa_seq
+    {
+     \prop_if_in:NnF \g__uftag_attr_entries_prop {##1}
+      {
+       \msg_error:nnn { uftag } { attr-unknown } { ##1 }
+      }
+     \prop_if_in:NnF \g__uftag_attr_objnum_prop {##1}
+      {
+       \__uftag_pdfreserveobjnum:N \l_tmpa_tl
+       \__uftag_pdfuseobjnum:Nx    \l_tmpa_tl
+        {
+          \prop_item:Nn\g__uftag_attr_entries_prop {##1}
+        }
+        \prop_gput:NnV \g__uftag_attr_objnum_prop {##1} {\l_tmpa_tl}
+      }
+     \tl_put_right:Nx \l__uftag_attr_value_tl
+      {
+       \c_space_tl
+       \prop_item:Nn \g__uftag_attr_objnum_prop {##1}
+       \c_space_tl 0 \c_space_tl R
+      }
+ %    \tl_show:N \l__uftag_attr_value_tl
+    }
+    \tl_put_right:Nx \l__uftag_attr_value_tl
+     {
+      \int_compare:nT { \seq_count:N \l_tmpa_seq > 1 }{]}
+     }
+ %   \tl_show:N \l__uftag_attr_value_tl
+    \__uftag_prop_gput:cnx
+     { g__uftag_struct_\int_eval:n {\c at g__uftag_struct_abs_int}_prop }
+     { A }
+     { \l__uftag_attr_value_tl }
+   },
+ }
+\endinput


Property changes on: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-attr-code.sty
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-checks-code.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-checks-code.sty	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-checks-code.sty	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {tagpdf-checks-code} {2018/08/06} {0.3}
+\ProvidesExplPackage {tagpdf-checks-code} {2019/01/04} {0.50}
  {part of tagpdf - code related to checks and messages}
 
 
@@ -19,7 +19,10 @@
 \msg_new:nnn {uftag} {struct-used-twice}     { structure~with~label~#1~has~already~been~used}
 \msg_new:nnn {uftag} {struct-label-unknown}  { structure~with~label~#1~is~unknown~rerun}
 
+% attributes
+\msg_new:nnn {uftag} {attr-unknown}  { attribute~#1~is~unknown}
 
+
 %Roles
 \msg_new:nnn {uftag} {role-missing}     { tag~#1~has~no~role~assigned  }
 \msg_new:nnn {uftag} {role-unknown}     { role~#1~is~not~known  }

Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-luatex.def
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-luatex.def	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-luatex.def	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,8 +1,10 @@
-\ProvidesExplFile {tagpdf-luatex.def} {2018/08/06} {0.3}
+\ProvidesExplFile {tagpdf-luatex.def} {2019/01/04} {0.50}
   {tagpdf driver for luatex}
 
 \newattribute \g__uftag_mc_type_attr     %the value represent the type
 \newattribute \g__uftag_mc_cnt_attr      %will hold the \c at g__uftag_MCID_abs_int value
+\newattribute \g__uftag_interwordspace_attr
+\newattribute \g__uftag_interwordfont_attr
 
 % The lua code
 \directlua { tagpdf=require('tagpdf.lua') }
@@ -11,7 +13,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 % needed for \str_set_convert:Nnon
-\tl_new:N   \g__uftag_inputencoding_tl 
+\tl_new:N   \g__uftag_inputencoding_tl
 \tl_gset:Nn \g__uftag_inputencoding_tl {}
 
 

Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-generic.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-generic.sty	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-generic.sty	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {tagpdf-mc-code-generic} {2018/08/06} {0.3}
+\ProvidesExplPackage {tagpdf-mc-code-generic} {2019/01/04} {0.50}
  {part of tagpdf - code related to marking chunks - generic mode}
 
 % for the label system

Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-lua.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-lua.sty	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-lua.sty	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {tagpdf-mc-code-lua} {2018/08/06} {0.3}
+\ProvidesExplPackage {tagpdf-mc-code-lua} {2019/01/04} {0.50}
   {tagpdf - mc code only for the luamode }
 
 % the two attibutes are defined in the driver file.

Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-shared.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-shared.sty	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-mc-code-shared.sty	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {tagpdf-mc-code-shared} {2018/08/06} {0.3}
+\ProvidesExplPackage {tagpdf-mc-code-shared} {2019/01/04} {0.50}
  {part of tagpdf - code related to marking chunks - code shared by generic and luamode }
 
 % I use a latex counter for the absolute count, so that it is added to

Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-pdftex.def
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-pdftex.def	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-pdftex.def	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,4 +1,4 @@
-\ProvidesExplFile {tagpdf-pdftex.def} {2018/08/06} {0.3}
+\ProvidesExplFile {tagpdf-pdftex.def} {2019/01/04} {0.50}
   {tagpdf driver for pdftex}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-roles-code.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-roles-code.sty	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-roles-code.sty	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {tagpdf-roles-code} {2018/08/06} {0.3}
+\ProvidesExplPackage {tagpdf-roles-code} {2019/01/04} {0.50}
  {part of tagpdf - code related to roles and structure names}
 
 \__uftag_seq_new:N     \g__uftag_role_tags_seq  %to get names from numbers

Added: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-space-code.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-space-code.sty	                        (rev 0)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-space-code.sty	2019-01-05 22:41:39 UTC (rev 49609)
@@ -0,0 +1,45 @@
+\ProvidesExplPackage {tagpdf-space-code} {2019/01/04} {0.50}
+ {part of tagpdf - code related to real space chars}
+% luatex uses an attribute (declared in the driver file) and some luacode in tagpdf.lua
+% perhaps I will split the code by engine in the future
+
+\sys_if_engine_pdftex:T
+{
+ \pdfglyphtounicode{space}{0020}
+ \keys_define:nn { uftag / setup }
+  {
+    interwordspace .choices:nn = { true, on }  { \pdfinterwordspaceon },
+    interwordspace .choices:nn = { false, off }{ \pdfinterwordspaceon },
+    show-spaces .bool_set:N = \l__uftag_showspaces_bool
+  }
+
+}
+
+
+\sys_if_engine_luatex:T
+{
+ \keys_define:nn { uftag / setup }
+  {
+    interwordspace .choices:nn = { true, on }
+                                 { \directlua{uftag.func.markspaceon()} },
+
+    interwordspace .choices:nn = { false, off }
+                                 {\directlua{uftag.func.markspaceoff()} },
+    show-spaces      .choice:,
+    show-spaces  / true  .code:n = {\directlua{uftag.trace.showspaces=true}},
+    show-spaces  / false .code:n = {\directlua{uftag.trace.showspaces=nil}},
+    show-spaces .default:n = true
+  }
+ \cs_new_protected:Nn \__uftag_fakespace:
+  {
+   \group_begin:
+    \setattribute\g__uftag_interwordspace_attr{1}
+    \setattribute\g__uftag_interwordfont_attr{\directlua{tex.print(\the\catcodetable at latex, font.current())}}
+    \skip_horizontal:n{\c_zero_skip}
+   \group_end:
+  }
+}
+
+
+
+\endinput


Property changes on: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-space-code.sty
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-struct-code.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-struct-code.sty	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-struct-code.sty	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {tagpdf-struct-code} {2018/08/06} {0.3}
+\ProvidesExplPackage {tagpdf-struct-code} {2019/01/04} {0.50}
  {part of tagpdf - code related to storing structure}
 
 % I will use a latex counter for the structure count
@@ -41,8 +41,7 @@
 % when the entries are created (title,lange,alt,E,actualtext)
 
 
-\seq_new:N             \c__uftag_struct_StructTreeRoot_entries_seq
-\seq_set_from_clist:Nn \c__uftag_struct_StructTreeRoot_entries_seq
+\seq_const_from_clist:Nn \c__uftag_struct_StructTreeRoot_entries_seq
  {%p. 857/858
   Type,              % always /StructTreeRoot
   K,                 % kid, dictionary or array of dictionaries
@@ -53,8 +52,7 @@
   ClassMap
  }
 
-\seq_new:N             \c__uftag_struct_StructElem_entries_seq
-\seq_set_from_clist:Nn \c__uftag_struct_StructElem_entries_seq
+\seq_const_from_clist:Nn \c__uftag_struct_StructElem_entries_seq
  {%p 858 f
   Type,              %always /StructElem
   S,                 %tag/type

Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-tree-code.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-tree-code.sty	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-tree-code.sty	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {tagpdf-tree-code} {2018/08/06} {0.3}
+\ProvidesExplPackage {tagpdf-tree-code} {2019/01/04} {0.50}
  {part of tagpdf - code related to writing trees and dictionaries to the pdf}
 
 %this does the actual finishing:
@@ -123,11 +123,44 @@
    }
  }
 
+%classmap, should only be written, if values has been used
+
+\cs_new_protected:Nn \__uftag_tree_class_write_map:
+ {
+  \tl_gclear:N \g__uftag_attr_class_content_tl
+  \seq_gremove_duplicates:N \g__uftag_attr_class_used_seq
+  \seq_set_map:NNn \l_tmpa_seq \g__uftag_attr_class_used_seq
+   {
+     /##1\c_space_tl
+     \prop_item:Nn \g__uftag_attr_entries_prop
+       {##1}
+   }
+  \tl_gset:Nx \g__uftag_attr_class_content_tl
+   {
+    \seq_use:Nn \l_tmpa_seq
+     { \iow_newline: }
+   }
+  \tl_if_empty:NF \g__uftag_attr_class_content_tl
+  {
+   \__uftag_pdfreserveobjnum:N \l_tmpa_tl
+   \tl_const:Nx \c__uftag_tree_obj_classmap_tl { \l_tmpa_tl }
+   \__uftag_pdfuseobjnum:Nx \c__uftag_tree_obj_classmap_tl
+   { <<\g__uftag_attr_class_content_tl>> }
+
+   \__uftag_prop_gput:cnx
+    { g__uftag_struct_0_prop }
+    { ClassMap }
+    { \c__uftag_tree_obj_classmap_tl\c_space_tl0\c_space_tl R  }
+  }
+ }
+
+
 \cs_new:Nn \uftag_finish_structure:
  {
   \__uftag_pdfcatalog:n {^^J/MarkInfo\c_space_tl<</Marked\c_space_tl true>> }
   \__uftag_tree_write_parenttree:
   \__uftag_tree_write_rolemap:
+  \__uftag_tree_class_write_map:
   \__uftag_tree_write_structelements:
   \__uftag_tree_write_structtreeroot:
  }

Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-user.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-user.sty	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf-user.sty	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {tagpdf-user} {2018/08/06} {0.3}
+\ProvidesExplPackage {tagpdf-user} {2019/01/04} {0.50}
   {tagpdf - user commands}
 
 \NewDocumentCommand \tagpdfsetup { m }
@@ -84,6 +84,13 @@
    }
  }
 
+\sys_if_engine_luatex:T
+{
+  \NewDocumentCommand\pdffakespace { }
+  {
+   \__uftag_fakespace:
+  }
+}
 
 
 \endinput

Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf.lua
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf.lua	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf.lua	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,5 +1,5 @@
--- Packageversion: 0.3 
--- Packagedate: 2018/08/06
+-- Packageversion: 0.50 
+-- Packagedate: 2019/01/04
 -- tagpdf.lua
 -- Ulrike Fischer
 
@@ -42,6 +42,7 @@
  uftag.func.mark_shipout (): a wrapper around the core function which inserts the last EMC
  uftag.func.fill_parent_tree_line (page): outputs the entries of the parenttree for this page
  uftag.func.output_parenttree(): outputs the content of the parenttree
+ uftag.func.markspaceon(), uftag.func.markspaceoff(): (de)activates the marking of positions for space chars
  uftag.trace.show_mc_data (num): shows uftag.mc[num] 
  uftag.trace.show_all_mc_data (max): shows a maximum about mc's
  uftag.trace.show_seq: shows a sequence (array)
@@ -48,11 +49,15 @@
  uftag.trace.show_struct_data (num): shows data of structure num
  uftag.trace.show_prop: shows a prop 
  uftag.trace.log
+ uftag.trace.showspaces : boolean
 --]]
 
-local mctypeattributeid   = luatexbase.registernumber ("g__uftag_mc_type_attr")
-local mccntattributeid    = luatexbase.registernumber ("g__uftag_mc_cnt_attr")
+local mctypeattributeid       = luatexbase.registernumber ("g__uftag_mc_type_attr")
+local mccntattributeid        = luatexbase.registernumber ("g__uftag_mc_cnt_attr")
+local iwspaceattributeid = luatexbase.registernumber ("g__uftag_interwordspace_attr")
+local iwfontattributeid = luatexbase.registernumber ("g__uftag_interwordfont_attr")
 
+
 local catlatex       = luatexbase.registernumber("catcodetable at latex")
 local tagunmarkedbool= token.create("g__uftag_tagunmarked_bool")
 local truebool       = token.create("c_true_bool") 
@@ -63,11 +68,13 @@
 local nodeid           = node.id
 local nodecopy         = node.copy
 local nodegetattribute = node.get_attribute
+local nodesetattribute = node.set_attribute
 local nodenew          = node.new
 local nodetail         = node.tail
 local nodeslide        = node.slide
 local noderemove       = node.remove
 local nodetraverseid   = node.traverse_id
+local nodetraverse     = node.traverse
 local nodeinsertafter  = node.insert_after
 local nodeinsertbefore = node.insert_before
 local pdfpageref       = pdf.pageref 
@@ -77,6 +84,7 @@
 local RULE           = node.id("rule")
 local DISC           = node.id("disc")
 local GLUE           = node.id("glue")
+local GLYPH          = node.id("glyph")
 local KERN           = node.id("kern")
 local PENALTY        = node.id("penalty")
 local LOCAL_PAR      = node.id("local_par")
@@ -146,7 +154,89 @@
  return head
 end
 
+-- this is for debugging the space chars
+local function __uftag_show_spacemark (head,current,color,height)
+ local markcolor = color or "1 0 0"
+ local markheight = height or 10 
+ local pdfstring = node.new("whatsit","pdf_literal")
+       pdfstring.data =
+       string.format("q "..markcolor.." RG "..markcolor.." rg 0.4 w 0 %g m 0 %g l S Q",-3,markheight)
+       head = node.insert_after(head,current,pdfstring)
+ return head
+end
 
+--[[ a function to mark up places where real space chars should be inserted
+     it only sets an attribute.
+--]]    
+
+local function __uftag_mark_spaces (head)
+  local inside_math = false
+  for n in nodetraverse(head) do
+    local id = n.id
+    if id == GLYPH then
+      local glyph = n
+      if glyph.next and (glyph.next.id == GLUE)
+        and not inside_math  and (glyph.next.width >0)
+      then
+        nodesetattribute(glyph.next,iwspaceattributeid,1)
+        nodesetattribute(glyph.next,iwfontattributeid,glyph.font)
+      -- for debugging  
+       if uftag.trace.showspaces then 
+        __uftag_show_spacemark (head,glyph)
+       end 
+      end
+    elseif id == PENALTY then
+      local glyph = n
+      -- uftag.trace.log ("PENALTY ".. n.subtype.."VALUE"..n.penalty,3) 
+      if glyph.next and (glyph.next.id == GLUE)
+        and not inside_math  and (glyph.next.width >0) and n.subtype==0
+      then
+        nodesetattribute(glyph.next,iwspaceattributeid,1)
+      --  nodesetattribute(glyph.next,iwfontattributeid,glyph.font)
+      -- for debugging  
+       if uftag.trace.showspaces then 
+        __uftag_show_spacemark (head,glyph)
+       end 
+      end 
+    elseif id == MATH then
+      inside_math = (n.subtype == 0)
+    end
+  end
+  return head
+end
+
+local function __uftag_activate_mark_space ()
+ if not luatexbase.in_callback ("pre_linebreak_filter","markspaces") then
+  luatexbase.add_to_callback("pre_linebreak_filter",__uftag_mark_spaces,"markspaces")
+  luatexbase.add_to_callback("hpack_filter",__uftag_mark_spaces,"markspaces")
+ end 
+end
+
+uftag.func.markspaceon=__uftag_activate_mark_space
+
+local function __uftag_deactivate_mark_space ()
+ if luatexbase.in_callback ("pre_linebreak_filter","markspaces") then
+ luatexbase.remove_from_callback("pre_linebreak_filter","markspaces")
+ luatexbase.remove_from_callback("hpack_filter","markspaces")
+ end
+end
+--
+uftag.func.markspaceoff=__uftag_deactivate_mark_space
+
+local default_space_char = node.new(GLYPH)
+local default_fontid     = font.id("TU/lmr/m/n/10")
+default_space_char.char  = 32
+default_space_char.font  = default_fontid
+
+local function __uftag_insert_space_char (head,n,fontid)
+ if luaotfload.aux.slot_of_name(fontid,"space") then
+  local space
+  -- head, space = node.insert_before(head, n, ) -- Set the right font
+  -- n.width = n.width - space.width
+  -- space.attr = n.attr
+ end
+end
+
 --[[
     Now follows the core function
     It wades through the shipout box and checks the attributes
@@ -179,6 +269,7 @@
   end  
   for n in node.traverse(head) do
     local mccnt, mctype, tag = __uftag_get_mc_cnt_type_tag (n)
+    local spaceattr = nodegetattribute(n,iwspaceattributeid)  or -1
     uftag.trace.log ("NODE ".. node.type(node.getid(n)).." MC"..tostring(mccnt).." => TAG "..tostring(mctype).." => " .. tostring(tag),3)
     if n.id == HLIST
     then -- enter the hlist
@@ -187,10 +278,26 @@
     elseif n.id == VLIST then -- enter the vlist     
      mcopen,mcpagecnt,mccntprev,mctypeprev= 
       uftag.func.mark_page_elements (n,mcpagecnt,mccntprev,mcopen,"INTERNAL VLIST",mctypeprev)
-    elseif n.id == GLUE then       -- glue is ignored
+    elseif n.id == GLUE then       -- at glue real space chars are inserted, for the rest it is ignored 
+     -- for debugging       
+     if uftag.trace.showspaces and spaceattr==1  then 
+        __uftag_show_spacemark (head,n,"0 1 0")
+     end
+     if spaceattr==1  then 
+        local space
+        local space_char = node.copy(default_space_char)
+        local curfont    = nodegetattribute(n,iwfontattributeid)  
+        uftag.trace.log ("FONT ".. tostring(curfont),3)
+        if curfont and luaotfload.aux.slot_of_name(curfont,"space") then
+          space_char.font=curfont
+        end
+        head, space = node.insert_before(head, n, space_char) -- 
+        n.width     = n.width - space.width
+        space.attr  = n.attr
+     end
     elseif n.id == LOCAL_PAR then  -- local_par is ignored 
     elseif n.id == PENALTY then    -- penalty is ignored
-    elseif n.id == KERN then       -- kern is ignored
+    elseif n.id == KERN then       -- kern is ignored     
     else
      -- math is currently only logged. 
      -- we could mark the whole as math

Modified: trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf.sty	2019-01-05 22:41:18 UTC (rev 49608)
+++ trunk/Master/texmf-dist/tex/latex/tagpdf/tagpdf.sty	2019-01-05 22:41:39 UTC (rev 49609)
@@ -1,8 +1,16 @@
 \RequirePackage{expl3}[2018/06/14]
+%\RequirePackage[enable-debug]{expl3}[2018/06/14]
 
-\ProvidesExplPackage {tagpdf} {2018/08/06} {0.3}
+\ProvidesExplPackage {tagpdf} {2019/01/04} {0.50}
  {A package to experiment with pdf tagging}
+%\debug_on:n{check-declarations,check-expressions,deprecation}
 
+%map internal tag to package name
+\prop_if_exist:NT \g_msg_module_name_prop
+ {
+  \prop_gput:Nnn \g_msg_module_name_prop { uftag }{ tagpdf }
+ }
+
 % storing internal names to my name space:
 \cs_set_eq:NN \__uftag_tex_pdffeedback:D         \tex_pdffeedback:D
 \cs_set_eq:NN \__uftag_tex_pdfextension:D        \tex_pdfextension:D
@@ -33,8 +41,8 @@
 \RequirePackage{atbegshi}
 \RequirePackage{zref-base,zref-lastpage}
 \RequirePackage{etoolbox}
-\RequirePackage{pdfescape}
-\RequirePackage{pdftexcmds}
+%\RequirePackage{pdfescape} %check if needed
+%\RequirePackage{pdftexcmds}%check if needed (expandable commands!)
 
 \RequirePackage{l3str-convert}
 \cs_generate_variant:Nn \str_set_convert:Nnnn {Nonn, Noon, Nnon }
@@ -59,7 +67,7 @@
  activate-tree   .bool_gset:N = \g_uftag_active_tree_bool,
  activate-struct .bool_gset:N = \g_uftag_active_struct_bool,
  activate-all    .meta:n ={activate-mc,activate-tree,activate-struct},
- check-tags      .bool_set:N = \g__uftag_check_tags_bool,
+ check-tags      .bool_gset:N = \g__uftag_check_tags_bool,
  check-tags      .initial:n  = true,
  log             .choice:,
  log / none      .code:n = {\int_set:Nn \l__uftag_loglevel_int { 0 }},
@@ -155,7 +163,7 @@
 \cs_generate_variant:Nn \__uftag_prop_show:N  { c }
 \cs_generate_variant:Nn \prop_gput:Nnn {Nxx}
 \cs_generate_variant:Nn \prop_put:Nnn  {Nxx}
-\cs_generate_variant:Nn \__uftag_pdfuseobjnum:Nn {Nx}
+\cs_generate_variant:Nn \__uftag_pdfuseobjnum:Nn {Nx,No}
 \cs_generate_variant:Nn \__uftag_pdfuseobjnum:nn {nx,xx}
 \cs_generate_variant:Nn \__uftag_gset_pdfpageattr:n {x}
 
@@ -176,6 +184,7 @@
 \RequirePackage { tagpdf-user }
 \RequirePackage { tagpdf-tree-code }
 \RequirePackage { tagpdf-roles-code }
+\RequirePackage { tagpdf-attr-code }
 % mc-code is split:
 \RequirePackage { tagpdf-mc-code-shared }
 \bool_if:NTF \g__uftag_mode_lua_bool
@@ -187,5 +196,5 @@
  }
 
 \RequirePackage { tagpdf-struct-code }
-
+\RequirePackage { tagpdf-space-code }
 \endinput



More information about the tex-live-commits mailing list